Skip to content

Commit 378b2fd

Browse files
authored
fix: block sync hangs on initial node start (#1314)
1 parent b550fee commit 378b2fd

File tree

6 files changed

+79
-27
lines changed

6 files changed

+79
-27
lines changed

Makefile

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -162,33 +162,53 @@ iex: compile-all
162162
test-iex:
163163
MIX_ENV=test iex -S mix run -- --mode db
164164

165+
##################
166+
# NODE RUNNERS
167+
DYSCOVERY_PORT ?= 30303
168+
165169
#▶️ checkpoint-sync: @ Run an interactive terminal using checkpoint sync.
166170
checkpoint-sync: compile-all
167-
iex -S mix run -- --checkpoint-sync-url https://mainnet-checkpoint-sync.stakely.io/ --metrics
171+
iex -S mix run -- --checkpoint-sync-url https://mainnet-checkpoint-sync.stakely.io/ --metrics --discovery-port $(DYSCOVERY_PORT)
172+
173+
#▶️ checkpoint-sync.logfile: @ Run an interactive terminal using checkpoint sync with a log file.
174+
checkpoint-sync.logfile: compile-all
175+
iex -S mix run -- --checkpoint-sync-url https://mainnet-checkpoint-sync.stakely.io/ --metrics --log-file ./logs/mainnet.log
168176

169177
#▶️ checkpoint-sync.logfile: @ Run an interactive terminal using checkpoint sync with a log file.
170178
checkpoint-sync.logfile: compile-all
171179
iex -S mix run -- --checkpoint-sync-url https://mainnet-checkpoint-sync.stakely.io/ --metrics --log-file ./logs/mainnet.log
172180

173181
#▶️ sepolia: @ Run an interactive terminal using sepolia network
174182
sepolia: compile-all
175-
iex -S mix run -- --checkpoint-sync-url https://sepolia.beaconstate.info --network sepolia --metrics
183+
iex -S mix run -- --checkpoint-sync-url https://sepolia.beaconstate.info --network sepolia --metrics --discovery-port $(DYSCOVERY_PORT)
184+
185+
#▶️ sepolia.logfile: @ Run an interactive terminal using sepolia network with a log file
186+
sepolia.logfile: compile-all
187+
iex -S mix run -- --checkpoint-sync-url https://sepolia.beaconstate.info --network sepolia --metrics --log-file ./logs/sepolia.log
176188

177189
#▶️ sepolia.logfile: @ Run an interactive terminal using sepolia network with a log file
178190
sepolia.logfile: compile-all
179191
iex -S mix run -- --checkpoint-sync-url https://sepolia.beaconstate.info --network sepolia --metrics --log-file ./logs/sepolia.log
180192

181193
#▶️ holesky: @ Run an interactive terminal using holesky network
182194
holesky: compile-all
183-
iex -S mix run -- --checkpoint-sync-url https://checkpoint-sync.holesky.ethpandaops.io --network holesky
195+
iex -S mix run -- --checkpoint-sync-url https://checkpoint-sync.holesky.ethpandaops.io --network holesky --discovery-port $(DYSCOVERY_PORT)
196+
197+
#▶️ holesky.logfile: @ Run an interactive terminal using holesky network with a log file
198+
holesky.logfile: compile-all
199+
iex -S mix run -- --checkpoint-sync-url https://checkpoint-sync.holesky.ethpandaops.io --network holesky --log-file ./logs/holesky.log
184200

185201
#▶️ holesky.logfile: @ Run an interactive terminal using holesky network with a log file
186202
holesky.logfile: compile-all
187203
iex -S mix run -- --checkpoint-sync-url https://checkpoint-sync.holesky.ethpandaops.io --network holesky --log-file ./logs/holesky.log
188204

189205
#▶️ gnosis: @ Run an interactive terminal using gnosis network
190206
gnosis: compile-all
191-
iex -S mix run -- --checkpoint-sync-url https://checkpoint.gnosischain.com --network gnosis
207+
iex -S mix run -- --checkpoint-sync-url https://checkpoint.gnosischain.com --network gnosis --discovery-port $(DYSCOVERY_PORT)
208+
209+
#▶️ gnosis.logfile: @ Run an interactive terminal using gnosis network with a log file
210+
gnosis.logfile: compile-all
211+
iex -S mix run -- --checkpoint-sync-url https://checkpoint.gnosischain.com --network gnosis --log-file ./logs/gnosis.log --discovery-port $(DYSCOVERY_PORT)
192212

193213
#🔴 test: @ Run tests
194214
test: compile-all

lib/lambda_ethereum_consensus/fork_choice/fork_choice.ex

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ defmodule LambdaEthereumConsensus.ForkChoice do
118118
@doc """
119119
Get the current chain slot based on the system time.
120120
121-
TODO: There are just 2 uses of this function outside this module:
121+
There are just 2 uses of this function outside this module:
122122
- At the begining of SyncBlocks.run/1 function, to get the head slot
123123
- In the Helpers.block_root_by_block_id/1 function
124124
"""

lib/lambda_ethereum_consensus/p2p/blob_downloader.ex

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@ defmodule LambdaEthereumConsensus.P2P.BlobDownloader do
3232
def request_blobs_by_range(slot, count, on_blobs, retries) do
3333
Logger.debug("Requesting blobs", slot: slot)
3434

35-
# TODO: handle no-peers asynchronously?
3635
peer_id = get_some_peer()
3736

3837
# NOTE: BeaconBlocksByRangeRequest == BlobSidecarsByRangeRequest
@@ -62,7 +61,7 @@ defmodule LambdaEthereumConsensus.P2P.BlobDownloader do
6261
P2P.Peerbook.penalize_peer(peer_id)
6362

6463
if retries > 0 do
65-
Logger.debug("Retrying request for #{count} blobs", slot: slot)
64+
Logger.debug("Retrying request for #{count} blobs: #{inspect(reason)}", slot: slot)
6665
request_blobs_by_range(slot, count, on_blobs, retries - 1)
6766
{:ok, store}
6867
else
@@ -123,8 +122,8 @@ defmodule LambdaEthereumConsensus.P2P.BlobDownloader do
123122
defp get_some_peer() do
124123
case P2P.Peerbook.get_some_peer() do
125124
nil ->
126-
Process.sleep(100)
127-
get_some_peer()
125+
# TODO: handle no-peers asynchronously
126+
raise "No peers available to request blobs from."
128127

129128
peer_id ->
130129
peer_id

lib/lambda_ethereum_consensus/p2p/block_downloader.ex

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,6 @@ defmodule LambdaEthereumConsensus.P2P.BlockDownloader do
6666
def request_blocks_by_range(slot, count, on_blocks, retries) do
6767
Logger.debug("Requesting block", slot: slot)
6868

69-
# TODO: handle no-peers asynchronously?
7069
peer_id = get_some_peer()
7170

7271
request =
@@ -173,7 +172,8 @@ defmodule LambdaEthereumConsensus.P2P.BlockDownloader do
173172
if retries > 0 do
174173
:telemetry.execute([:network, :request], %{blocks: 0}, Map.put(tags, :result, "retry"))
175174
pretty_roots = Enum.map_join(roots, ", ", &Base.encode16/1)
176-
Logger.debug("Retrying request for blocks with roots #{pretty_roots}")
175+
176+
Logger.debug("Retrying request for block roots #{pretty_roots}: #{inspect(reason)}")
177177
request_blocks_by_root(roots, on_blocks, retries - 1)
178178
{:ok, store}
179179
else
@@ -186,8 +186,8 @@ defmodule LambdaEthereumConsensus.P2P.BlockDownloader do
186186
defp get_some_peer() do
187187
case P2P.Peerbook.get_some_peer() do
188188
nil ->
189-
Process.sleep(100)
190-
get_some_peer()
189+
# TODO: handle no-peers asynchronously
190+
raise "No peers available to request blocks from."
191191

192192
peer_id ->
193193
peer_id

lib/lambda_ethereum_consensus/p2p/peerbook.ex

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,13 @@ defmodule LambdaEthereumConsensus.P2P.Peerbook do
22
@moduledoc """
33
General peer bookkeeping.
44
"""
5+
require Logger
56
alias LambdaEthereumConsensus.Libp2pPort
67
alias LambdaEthereumConsensus.Store.KvSchema
8+
alias LambdaEthereumConsensus.Utils
79

810
@initial_score 100
11+
@penalizing_score 15
912
@target_peers 128
1013
@max_prune_size 8
1114
@prune_percentage 0.05
@@ -41,24 +44,51 @@ defmodule LambdaEthereumConsensus.P2P.Peerbook do
4144
Get some peer from the peerbook.
4245
"""
4346
def get_some_peer() do
44-
# TODO: use some algorithm to pick a good peer, for now it's random
47+
# TODO: This is a very naive implementation of a peer selection algorithm,
48+
# this sorts the peers every time. The same is true for the pruning.
4549
peerbook = fetch_peerbook!()
4650

4751
if peerbook == %{} do
4852
nil
4953
else
50-
{peer_id, _score} = Enum.random(peerbook)
51-
peer_id
54+
peerbook
55+
|> Enum.sort_by(fn {_peer_id, score} -> -score end)
56+
|> Enum.take(5)
57+
|> Enum.random()
58+
|> elem(0)
5259
end
5360
end
5461

5562
def penalize_peer(peer_id) do
56-
fetch_peerbook!() |> Map.delete(peer_id) |> store_peerbook()
63+
Logger.debug("[Peerbook] Penalizing peer: #{inspect(Utils.format_shorten_binary(peer_id))}")
64+
65+
peer_score = fetch_peerbook!() |> Map.get(peer_id)
66+
67+
case peer_score do
68+
nil ->
69+
:ok
70+
71+
score when score - @penalizing_score <= 0 ->
72+
Logger.debug("[Peerbook] Removing peer: #{inspect(Utils.format_shorten_binary(peer_id))}")
73+
74+
fetch_peerbook!()
75+
|> Map.delete(peer_id)
76+
|> store_peerbook()
77+
78+
score ->
79+
fetch_peerbook!()
80+
|> Map.put(peer_id, score - @penalizing_score)
81+
|> store_peerbook()
82+
end
5783
end
5884

5985
def handle_new_peer(peer_id) do
6086
peerbook = fetch_peerbook!()
6187

88+
Logger.debug(
89+
"[Peerbook] New peer connected: #{inspect(Utils.format_shorten_binary(peer_id))}"
90+
)
91+
6292
if not Map.has_key?(peerbook, peer_id) do
6393
:telemetry.execute([:peers, :connection], %{id: peer_id}, %{result: "success"})
6494
Map.put(peerbook, peer_id, @initial_score) |> store_peerbook()
@@ -81,14 +111,10 @@ defmodule LambdaEthereumConsensus.P2P.Peerbook do
81111
defp prune() do
82112
peerbook = fetch_peerbook!()
83113
len = map_size(peerbook)
114+
prune_size = if len > 0, do: calculate_prune_size(len), else: 0
84115

85-
if len != 0 do
86-
prune_size =
87-
(len * @prune_percentage)
88-
|> round()
89-
|> min(@max_prune_size)
90-
|> min(len - @target_peers)
91-
|> max(0)
116+
if prune_size > 0 do
117+
Logger.debug("[Peerbook] Pruning #{prune_size} peers by challenge")
92118

93119
n = :rand.uniform(len)
94120

@@ -100,6 +126,14 @@ defmodule LambdaEthereumConsensus.P2P.Peerbook do
100126
end
101127
end
102128

129+
defp calculate_prune_size(len) do
130+
(len * @prune_percentage)
131+
|> round()
132+
|> min(@max_prune_size)
133+
|> min(len - @target_peers)
134+
|> max(0)
135+
end
136+
103137
defp store_peerbook(peerbook), do: put("", peerbook)
104138

105139
defp fetch_peerbook(), do: get("")

lib/libp2p_port.ex

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,6 @@ defmodule LambdaEthereumConsensus.Libp2pPort do
99

1010
use GenServer
1111

12-
@tick_time 1000
13-
1412
alias LambdaEthereumConsensus.Beacon.PendingBlocks
1513
alias LambdaEthereumConsensus.Beacon.SyncBlocks
1614
alias LambdaEthereumConsensus.ForkChoice
@@ -84,7 +82,8 @@ defmodule LambdaEthereumConsensus.Libp2pPort do
8482
discovery_addresses: [String.t()]
8583
}
8684

87-
@sync_delay_millis 10_000
85+
@tick_time 1000
86+
@sync_delay_millis 15_000
8887

8988
######################
9089
### API

0 commit comments

Comments
 (0)