From 14e26e521607c1031ed3a817bcca8ee84927d9b5 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Mon, 19 Jun 2023 04:28:26 +0000 Subject: [PATCH 01/30] p3_server_peer/p3_server_peers config --- apps/arweave/include/ar_config.hrl | 1 + apps/arweave/src/ar.erl | 3 ++ apps/arweave/src/ar_config.erl | 28 +++++++++---------- apps/arweave/test/ar_config_tests.erl | 3 +- .../test/ar_config_tests_config_fixture.json | 1 + 5 files changed, 21 insertions(+), 15 deletions(-) diff --git a/apps/arweave/include/ar_config.hrl b/apps/arweave/include/ar_config.hrl index eae1f3d00..1f57f5b05 100644 --- a/apps/arweave/include/ar_config.hrl +++ b/apps/arweave/include/ar_config.hrl @@ -191,6 +191,7 @@ = ?DEFAULT_MAX_NONCE_LIMITER_LAST_STEP_VALIDATION_THREAD_COUNT, nonce_limiter_server_trusted_peers = [], nonce_limiter_client_peers = [], + p3_server_peers = [], debug = false, repair_rocksdb = [], run_defragmentation = false, diff --git a/apps/arweave/src/ar.erl b/apps/arweave/src/ar.erl index a0e26b932..8d7d5c056 100644 --- a/apps/arweave/src/ar.erl +++ b/apps/arweave/src/ar.erl @@ -541,6 +541,9 @@ parse_cli_args(["vdf_server_trusted_peer", Peer | Rest], C) -> parse_cli_args(["vdf_client_peer", RawPeer | Rest], C = #config{ nonce_limiter_client_peers = Peers }) -> parse_cli_args(Rest, C#config{ nonce_limiter_client_peers = [RawPeer | Peers] }); +parse_cli_args(["p3_server_peer", RawPeer | Rest], + C = #config{ p3_server_peers = Peers }) -> + parse_cli_args(Rest, C#config{ p3_server_peers = [RawPeer | Peers] }); parse_cli_args(["debug" | Rest], C) -> parse_cli_args(Rest, C#config{ debug = true }); parse_cli_args(["repair_rocksdb", Path | Rest], #config{ repair_rocksdb = L } = C) -> diff --git a/apps/arweave/src/ar_config.erl b/apps/arweave/src/ar_config.erl index 5c8c45323..992abe6c8 100644 --- a/apps/arweave/src/ar_config.erl +++ b/apps/arweave/src/ar_config.erl @@ -473,18 +473,27 @@ parse_options([{<<"max_nonce_limiter_last_step_validation_thread_count">>, D} | parse_options([{<<"vdf_server_trusted_peer">>, <<>>} | Rest], Config) -> parse_options(Rest, Config); parse_options([{<<"vdf_server_trusted_peer">>, Peer} | Rest], Config) -> - parse_options(Rest, parse_vdf_server_trusted_peer(Peer, Config)); + #config{ nonce_limiter_server_trusted_peers = Peers } = Config, + parse_options(Rest, + Config#config{ nonce_limiter_server_trusted_peers = Peers ++ parse_peers([Peer]) }); parse_options([{<<"vdf_server_trusted_peers">>, Peers} | Rest], Config) when is_list(Peers) -> - parse_options(Rest, parse_vdf_server_trusted_peers(Peers, Config)); + #config{ nonce_limiter_server_trusted_peers = ExistingPeers } = Config, + parse_options(Rest, + Config#config{ nonce_limiter_server_trusted_peers = ExistingPeers ++ parse_peers(Peers) }); parse_options([{<<"vdf_server_trusted_peers">>, Peers} | _], _) -> {error, {bad_type, vdf_server_trusted_peers, array}, Peers}; parse_options([{<<"vdf_client_peers">>, Peers} | Rest], Config) when is_list(Peers) -> - parse_options(Rest, Config#config{ nonce_limiter_client_peers = Peers }); + parse_options(Rest, Config#config{ nonce_limiter_client_peers = parse_peers(Peers) }); parse_options([{<<"vdf_client_peers">>, Peers} | _], _) -> {error, {bad_type, vdf_client_peers, array}, Peers}; +parse_options([{<<"p3_server_peers">>, Peers} | Rest], Config) when is_list(Peers) -> + parse_options(Rest, Config#config{ p3_server_peers = parse_peers(Peers) }); +parse_options([{<<"p3_server_peers">>, Peers} | _], _) -> + {error, {bad_type, vdf_clp3_server_peersient_peers, array}, Peers}; + parse_options([{<<"debug">>, B} | Rest], Config) when is_boolean(B) -> parse_options(Rest, Config#config{ debug = B }); @@ -631,17 +640,8 @@ parse_requests_per_minute_limit_by_ip({[]}, Parsed) -> parse_requests_per_minute_limit_by_ip(_, _) -> error. -parse_vdf_server_trusted_peers([Peer | Rest], Config) -> - Config2 = parse_vdf_server_trusted_peer(Peer, Config), - parse_vdf_server_trusted_peers(Rest, Config2); -parse_vdf_server_trusted_peers([], Config) -> - Config. - -parse_vdf_server_trusted_peer(Peer, Config) when is_binary(Peer) -> - parse_vdf_server_trusted_peer(binary_to_list(Peer), Config); -parse_vdf_server_trusted_peer(Peer, Config) -> - #config{ nonce_limiter_server_trusted_peers = Peers } = Config, - Config#config{ nonce_limiter_server_trusted_peers = Peers ++ [Peer] }. +parse_peers(Peers) -> + [ar_util:peer_to_str(Peer) || Peer <- Peers]. format_config(Config) -> Fields = record_info(fields, config), diff --git a/apps/arweave/test/ar_config_tests.erl b/apps/arweave/test/ar_config_tests.erl index 3497e42e4..437b461d4 100644 --- a/apps/arweave/test/ar_config_tests.erl +++ b/apps/arweave/test/ar_config_tests.erl @@ -110,7 +110,8 @@ parse_config() -> max_nonce_limiter_validation_thread_count = 2, max_nonce_limiter_last_step_validation_thread_count = 3, nonce_limiter_server_trusted_peers = ["127.0.0.1", "2.3.4.5", "6.7.8.9:1982"], - nonce_limiter_client_peers = [<<"2.3.6.7:1984">>, <<"4.7.3.1:1983">>, <<"3.3.3.3">>], + nonce_limiter_client_peers = ["2.3.6.7:1984", "4.7.3.1:1983", "3.3.3.3"], + p3_server_peers = ["10.1.2.3:1985", "10.4.5.6"], run_defragmentation = true, defragmentation_trigger_threshold = 1_000, defragmentation_modules = [ diff --git a/apps/arweave/test/ar_config_tests_config_fixture.json b/apps/arweave/test/ar_config_tests_config_fixture.json index 73c6a5a49..8658317fe 100644 --- a/apps/arweave/test/ar_config_tests_config_fixture.json +++ b/apps/arweave/test/ar_config_tests_config_fixture.json @@ -115,6 +115,7 @@ "vdf_server_trusted_peer": "127.0.0.1", "vdf_server_trusted_peers": ["2.3.4.5", "6.7.8.9:1982"], "vdf_client_peers": ["2.3.6.7:1984", "4.7.3.1:1983", "3.3.3.3"], + "p3_server_peers": ["10.1.2.3:1985", "10.4.5.6"], "run_defragmentation": true, "defragmentation_trigger_threshold": 1000, "defragment_modules": [ From c1e89afe381b9656a15310a05784077eecad1fb1 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Thu, 22 Jun 2023 08:13:09 +0000 Subject: [PATCH 02/30] log peer stats --- apps/arweave/src/ar_peers.erl | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index c2e50a0fa..1be782c31 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -162,6 +162,7 @@ stats(Peers) -> Peers). discover_peers() -> + stats(), case ets:lookup(?MODULE, peers) of [] -> ok; @@ -385,9 +386,11 @@ discover_peers([Peer | Peers]) -> discover_peers(Peers). format_stats(Peer, Perf) -> - io:format("\t~s ~.2f kB/s (~p transfers, ~B failures)~n", + KB = Perf#performance.bytes / 1024, + Seconds = (Perf#performance.time + 1) / 1000000, + io:format("\t~s ~.2f kB/s (~.2f kB, ~.2f s, ~p transfers, ~B failures)~n", [string:pad(ar_util:format_peer(Peer), 20, trailing, $ ), - (Perf#performance.bytes / 1024) / ((Perf#performance.time + 1) / 1000000), + KB / Seconds, KB, Seconds, Perf#performance.transfers, Perf#performance.failures]). load_peers() -> From 50218e3f94e504b6cf55abe482d1a3ef2b099c72 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Thu, 22 Jun 2023 22:28:16 +0000 Subject: [PATCH 03/30] logging around what is driving up some performance metrics --- apps/arweave/src/ar_peers.erl | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 1be782c31..59185ca03 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -285,10 +285,12 @@ handle_info({event, peer, {made_request, Peer, Release}}, State) -> {noreply, State}; handle_info({event, peer, {served_tx, Peer, TimeDelta, Size}}, State) -> + ?LOG_DEBUG([{event, update_rating}, {type, served_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), update_rating(Peer, TimeDelta, Size), {noreply, State}; handle_info({event, peer, {served_block, Peer, TimeDelta, Size}}, State) -> + ?LOG_DEBUG([{event, update_rating}, {type, served_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), update_rating(Peer, TimeDelta, Size), {noreply, State}; @@ -297,6 +299,7 @@ handle_info({event, peer, {gossiped_tx, Peer, TimeDelta, Size}}, State) -> %% Otherwise, one may exploit the endpoint to gain reputation. case check_external_peer(Peer) of ok -> + ?LOG_DEBUG([{event, update_rating}, {type, gossiped_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), update_rating(Peer, TimeDelta, Size); _ -> ok @@ -308,6 +311,7 @@ handle_info({event, peer, {gossiped_block, Peer, TimeDelta, Size}}, State) -> %% Otherwise, one may exploit the endpoint to gain reputation. case check_external_peer(Peer) of ok -> + ?LOG_DEBUG([{event, update_rating}, {type, gossiped_block}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), update_rating(Peer, TimeDelta, Size); _ -> ok @@ -315,6 +319,7 @@ handle_info({event, peer, {gossiped_block, Peer, TimeDelta, Size}}, State) -> {noreply, State}; handle_info({event, peer, {served_chunk, Peer, TimeDelta, Size}}, State) -> + ?LOG_DEBUG([{event, update_rating}, {type, served_chunk}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), update_rating(Peer, TimeDelta, Size), {noreply, State}; From ffc991893a34295ca17e39a7fc97b47ef3c35c70 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Sat, 24 Jun 2023 13:01:48 +0000 Subject: [PATCH 04/30] rebalance sync peers every minute --- .../src/ar_data_sync_worker_master.erl | 53 +++++++++++++++---- 1 file changed, 43 insertions(+), 10 deletions(-) diff --git a/apps/arweave/src/ar_data_sync_worker_master.erl b/apps/arweave/src/ar_data_sync_worker_master.erl index 6f6492ec7..5abe89ec0 100644 --- a/apps/arweave/src/ar_data_sync_worker_master.erl +++ b/apps/arweave/src/ar_data_sync_worker_master.erl @@ -14,6 +14,7 @@ -include_lib("arweave/include/ar_data_sync.hrl"). -include_lib("eunit/include/eunit.hrl"). +-define(REBALANCE_FREQUENCY_MS, 60*1000). -define(READ_RANGE_CHUNKS, 10). -define(MIN_MAX_ACTIVE, 8). -define(LATENCY_ALPHA, 0.1). @@ -73,6 +74,7 @@ ready_for_work() -> init(Workers) -> process_flag(trap_exit, true), gen_server:cast(?MODULE, process_main_queue), + ar_util:cast_after(?REBALANCE_FREQUENCY_MS, ?MODULE, rebalance_peers), {ok, #state{ workers = queue:from_list(Workers), @@ -116,6 +118,12 @@ handle_cast({task_completed, {sync_range, {Worker, Result, Peer, Duration}}}, St {PeerTasks3, State4} = process_peer_queue(PeerTasks2, State3), {noreply, set_peer_tasks(PeerTasks3, State4)}; +handle_cast(rebalance_peers, State) -> + ar_util:cast_after(?REBALANCE_FREQUENCY_MS, ?MODULE, rebalance_peers), + ?LOG_DEBUG([{event, rebalance_peers}]), + AllPeerTasks = maps:values(State#state.peer_tasks), + {noreply, rebalance_peers(AllPeerTasks, State)}; + handle_cast(Cast, State) -> ?LOG_WARNING("event: unhandled_cast, cast: ~p", [Cast]), {noreply, State}. @@ -330,15 +338,41 @@ complete_sync_range(PeerTasks, Result, Duration, State) -> LatencyTarget = trunc(calculate_ema( State#state.latency_target, IsOK, Milliseconds, LatencyTargetAlpha)), - PeerTasks2 = PeerTasks#peer_tasks{ latency_ema = LatencyEMA, success_ema = SuccessEMA }, - {PeerTasks3, State2} = cut_peer_queue( - max_peer_queue(PeerTasks2, State), - PeerTasks2, - State), - PeerTasks4 = update_active( - PeerTasks3, IsOK, Milliseconds, State2#state.worker_count, LatencyTarget), - {PeerTasks4, State2#state{ latency_target = LatencyTarget }}. + PeerTasks2 = PeerTasks#peer_tasks{ + latency_ema = LatencyEMA, + success_ema = SuccessEMA, + active_count = PeerTasks#peer_tasks.active_count - 1 + }, + {PeerTasks2, State#state{ latency_target = LatencyTarget }}. +rebalance_peers([], State) -> + State; +rebalance_peers([PeerTasks | Rest], State) -> + {PeerTasks2, State2} = rebalance_peer(PeerTasks, State), + State3 = set_peer_tasks(PeerTasks2, State2), + rebalance_peers(Rest, State3). + +rebalance_peer(PeerTasks, State) -> + {PeerTasks2, State2} = cut_peer_queue( + max_peer_queue(PeerTasks, State), + PeerTasks, + State), + IsOK = true, + Milliseconds = PeerTasks2#peer_tasks.latency_ema, + WorkerCount = State2#state.worker_count, + LatencyTarget = State2#state.latency_target, + PeerTasks3 = update_active(PeerTasks2, IsOK, Milliseconds, WorkerCount, LatencyTarget), + ?LOG_DEBUG([ + {event, update_active}, + {peer, ar_util:format_peer(PeerTasks3#peer_tasks.peer)}, + {before_max, PeerTasks2#peer_tasks.max_active}, + {after_max, PeerTasks3#peer_tasks.max_active}, + {worker_count, WorkerCount}, + {active_count, PeerTasks2#peer_tasks.active_count}, + {latency_target, LatencyTarget}, + {latency_ema, Milliseconds} + ]), + {PeerTasks3, State2}. %%-------------------------------------------------------------------- %% Helpers @@ -403,7 +437,7 @@ update_active(PeerTasks, IsOK, Milliseconds, WorkerCount, LatencyTarget) -> %% batch of queued tasks and since the max_active is so high we overwhelm the peer. LatencyEMA = PeerTasks#peer_tasks.latency_ema, MaxActive = PeerTasks#peer_tasks.max_active, - ActiveCount = PeerTasks#peer_tasks.active_count - 1, + ActiveCount = PeerTasks#peer_tasks.active_count, TargetMaxActive = case { IsOK, Milliseconds < LatencyTarget, LatencyEMA < LatencyTarget} of {false, _, _} -> @@ -429,7 +463,6 @@ update_active(PeerTasks, IsOK, Milliseconds, WorkerCount, LatencyTarget) -> ), %% Can't have less than the minimum. PeerTasks#peer_tasks{ - active_count = ActiveCount, max_active = max(TaskLimitedMaxActive, ?MIN_MAX_ACTIVE) }. From dd1aaf00915d0f4242bf8d1aa573d89558e89751 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Mon, 26 Jun 2023 07:51:36 +0000 Subject: [PATCH 05/30] call update_rating on all web requests --- apps/arweave/src/ar_http.erl | 11 +++-- apps/arweave/src/ar_peers.erl | 75 +++++++++++++++++++++++++---------- 2 files changed, 61 insertions(+), 25 deletions(-) diff --git a/apps/arweave/src/ar_http.erl b/apps/arweave/src/ar_http.erl index 7862e28bd..af342d269 100644 --- a/apps/arweave/src/ar_http.erl +++ b/apps/arweave/src/ar_http.erl @@ -63,6 +63,8 @@ req(Args) -> req(Args, ReestablishedConnection) -> StartTime = erlang:monotonic_time(), #{ peer := Peer, path := Path, method := Method } = Args, + PathLabel = ar_http_iface_server:label_http_path(list_to_binary(Path)), + ar_peers:start_request(Peer, PathLabel, Method), Response = case catch gen_server:call(?MODULE, {get_connection, Args}, infinity) of {ok, PID} -> ar_rate_limiter:throttle(Peer, Path), @@ -89,6 +91,9 @@ req(Args, ReestablishedConnection) -> true -> ok; false -> + Status = ar_metrics:get_status_class(Response), + ElapsedNative = EndTime - StartTime, + ar_peers:end_request(Peer, PathLabel, Method, Response), %% NOTE: the erlang prometheus client looks at the metric name to determine units. %% If it sees _duration_ it assumes the observed value is in %% native units and it converts it to .To query native units, use: @@ -96,9 +101,9 @@ req(Args, ReestablishedConnection) -> %% See: https://github.com/deadtrickster/prometheus.erl/blob/6dd56bf321e99688108bb976283a80e4d82b3d30/src/prometheus_time.erl#L2-L84 prometheus_histogram:observe(ar_http_request_duration_seconds, [ method_to_list(Method), - ar_http_iface_server:label_http_path(list_to_binary(Path)), - ar_metrics:get_status_class(Response) - ], EndTime - StartTime) + PathLabel, + Status + ], ElapsedNative) end, Response. %%% ================================================================== diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 59185ca03..937b705db 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -10,7 +10,7 @@ -export([start_link/0, get_peers/0, get_trusted_peers/0, is_public_peer/1, get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, - resolve_and_cache_peer/2]). + resolve_and_cache_peer/2, start_request/3, end_request/4]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -148,6 +148,19 @@ get_peer_release(Peer) -> -1 end. +start_request(Peer, PathLabel, Method) -> + gen_server:cast(?MODULE, {start_request, Peer, PathLabel, Method}). + +end_request(Peer, PathLabel, Method, {ok, {{<<"200">>, _}, _, Body, Start, End}} = Response) -> + gen_server:cast(?MODULE, {end_request, + Peer, PathLabel, Method, + ar_metrics:get_status_class(Response), + End-Start, byte_size(term_to_binary(Body))}); +end_request(Peer, PathLabel, Method, Response) -> + %% TODO: error response + ok. + + %% @doc Print statistics about the current peers. stats() -> Connected = get_peers(), @@ -261,6 +274,21 @@ handle_cast(ping_peers, State) -> ping_peers(lists:sublist(Peers, 100)), {noreply, State}; +handle_cast({start_request, Peer, PathLabel, Method}, State) -> + {noreply, State}; + +handle_cast({end_request, Peer, PathLabel, _Method, Status, ElapsedMicroseconds, Size}, State) -> + ?LOG_DEBUG([ + {event, update_rating}, + {path, PathLabel}, + {status, Status}, + {peer, ar_util:format_peer(Peer)}, + {time_delta, ElapsedMicroseconds}, + {size, Size} + ]), + update_rating(Peer, ElapsedMicroseconds, Size), + {noreply, State}; + handle_cast(Cast, State) -> ?LOG_WARNING("event: unhandled_cast, cast: ~p", [Cast]), {noreply, State}. @@ -285,42 +313,42 @@ handle_info({event, peer, {made_request, Peer, Release}}, State) -> {noreply, State}; handle_info({event, peer, {served_tx, Peer, TimeDelta, Size}}, State) -> - ?LOG_DEBUG([{event, update_rating}, {type, served_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - update_rating(Peer, TimeDelta, Size), + % ?LOG_DEBUG([{event, update_rating}, {type, served_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), + % update_rating(Peer, TimeDelta, Size), {noreply, State}; handle_info({event, peer, {served_block, Peer, TimeDelta, Size}}, State) -> - ?LOG_DEBUG([{event, update_rating}, {type, served_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - update_rating(Peer, TimeDelta, Size), + % ?LOG_DEBUG([{event, update_rating}, {type, served_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), + % update_rating(Peer, TimeDelta, Size), {noreply, State}; handle_info({event, peer, {gossiped_tx, Peer, TimeDelta, Size}}, State) -> %% Only the first peer who sent the given transaction is rated. %% Otherwise, one may exploit the endpoint to gain reputation. - case check_external_peer(Peer) of - ok -> - ?LOG_DEBUG([{event, update_rating}, {type, gossiped_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - update_rating(Peer, TimeDelta, Size); - _ -> - ok - end, + % case check_external_peer(Peer) of + % ok -> + % ?LOG_DEBUG([{event, update_rating}, {type, gossiped_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), + % update_rating(Peer, TimeDelta, Size); + % _ -> + % ok + % end, {noreply, State}; handle_info({event, peer, {gossiped_block, Peer, TimeDelta, Size}}, State) -> %% Only the first peer who sent the given block is rated. %% Otherwise, one may exploit the endpoint to gain reputation. - case check_external_peer(Peer) of - ok -> - ?LOG_DEBUG([{event, update_rating}, {type, gossiped_block}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - update_rating(Peer, TimeDelta, Size); - _ -> - ok - end, + % case check_external_peer(Peer) of + % ok -> + % ?LOG_DEBUG([{event, update_rating}, {type, gossiped_block}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), + % update_rating(Peer, TimeDelta, Size); + % _ -> + % ok + % end, {noreply, State}; handle_info({event, peer, {served_chunk, Peer, TimeDelta, Size}}, State) -> - ?LOG_DEBUG([{event, update_rating}, {type, served_chunk}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - update_rating(Peer, TimeDelta, Size), + % ?LOG_DEBUG([{event, update_rating}, {type, served_chunk}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), + % update_rating(Peer, TimeDelta, Size), {noreply, State}; handle_info({event, peer, {bad_response, {Peer, _Type, _Reason}}}, State) -> @@ -480,7 +508,10 @@ may_be_rotate_peer_ports(Peer) -> end. get_ip_port({A, B, C, D, Port}) -> - {{A, B, C, D}, Port}. + {{A, B, C, D}, Port}; + +get_ip_port({Domain, Port}) -> + {Domain, Port}. construct_peer({A, B, C, D}, Port) -> {A, B, C, D, Port}. From 5e6eeeeb569d9f39a3472c65e263b1bcf89b87fc Mon Sep 17 00:00:00 2001 From: James Piechota Date: Mon, 26 Jun 2023 13:40:11 +0000 Subject: [PATCH 06/30] better handling of the blacklist domain, also try to reduce ar_peers messages --- apps/arweave/src/ar_block_pre_validator.erl | 4 ++-- apps/arweave/src/ar_data_sync.erl | 4 ++-- apps/arweave/src/ar_header_sync.erl | 4 ++-- apps/arweave/src/ar_http_iface_client.erl | 2 +- apps/arweave/src/ar_http_iface_middleware.erl | 4 ++-- apps/arweave/src/ar_peers.erl | 19 ++++++++++--------- apps/arweave/src/ar_randomx_state.erl | 2 +- apps/arweave/src/ar_tx_blacklist.erl | 3 ++- 8 files changed, 22 insertions(+), 20 deletions(-) diff --git a/apps/arweave/src/ar_block_pre_validator.erl b/apps/arweave/src/ar_block_pre_validator.erl index ce534398f..f330729ae 100644 --- a/apps/arweave/src/ar_block_pre_validator.erl +++ b/apps/arweave/src/ar_block_pre_validator.erl @@ -733,7 +733,7 @@ pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp, ReadBodyTime, BodySize) -> accept_block(B, Peer, ReadBodyTime, BodySize, Timestamp, Gossip) -> ar_ignore_registry:add(B#block.indep_hash), ar_events:send(block, {new, B, #{ source => {peer, Peer}, gossip => Gossip }}), - ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), + % ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), record_block_pre_validation_time(Timestamp), ?LOG_INFO([{event, accepted_block}, {height, B#block.height}, {indep_hash, ar_util:encode(B#block.indep_hash)}]). @@ -777,7 +777,7 @@ pre_validate_pow(B, BDS, PrevB, Peer, Timestamp, ReadBodyTime, BodySize) -> B2 = B#block{ txs = include_transactions(B#block.txs) }, ar_events:send(block, {new, B2, #{ source => {peer, Peer}, recall_byte => RecallByte }}), - ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), + % ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), record_block_pre_validation_time(Timestamp), prometheus_counter:inc(block2_received_transactions, count_received_transactions(B#block.txs)), diff --git a/apps/arweave/src/ar_data_sync.erl b/apps/arweave/src/ar_data_sync.erl index 96aed7423..ff63d7961 100644 --- a/apps/arweave/src/ar_data_sync.erl +++ b/apps/arweave/src/ar_data_sync.erl @@ -944,7 +944,7 @@ handle_cast({store_fetched_chunk, Peer, Time, TransferSize, Byte, Proof} = Cast, ar_util:cast_after(1000, self(), Cast), {noreply, State}; false -> - ar_events:send(peer, {served_chunk, Peer, Time, TransferSize}), + % ar_events:send(peer, {served_chunk, Peer, Time, TransferSize}), ar_packing_server:request_unpack(AbsoluteOffset, ChunkArgs), ?LOG_DEBUG([{event, requested_fetched_chunk_unpacking}, {data_path_hash, ar_util:encode(crypto:hash(sha256, @@ -964,7 +964,7 @@ handle_cast({store_fetched_chunk, Peer, Time, TransferSize, Byte, Proof} = Cast, decrement_chunk_cache_size(), process_invalid_fetched_chunk(Peer, Byte, State); {true, DataRoot, TXStartOffset, ChunkEndOffset, TXSize, ChunkSize, ChunkID} -> - ar_events:send(peer, {served_chunk, Peer, Time, TransferSize}), + % ar_events:send(peer, {served_chunk, Peer, Time, TransferSize}), AbsoluteTXStartOffset = BlockStartOffset + TXStartOffset, AbsoluteEndOffset = AbsoluteTXStartOffset + ChunkEndOffset, ChunkArgs = {unpacked, Chunk, AbsoluteEndOffset, TXRoot, ChunkSize}, diff --git a/apps/arweave/src/ar_header_sync.erl b/apps/arweave/src/ar_header_sync.erl index 62119d821..ad947e0a6 100644 --- a/apps/arweave/src/ar_header_sync.erl +++ b/apps/arweave/src/ar_header_sync.erl @@ -511,10 +511,10 @@ download_block(Peers, H, H2, TXRoot) -> end, case BH of H when Height >= Fork_2_0 -> - ar_events:send(peer, {served_block, Peer, Time, Size}), + % ar_events:send(peer, {served_block, Peer, Time, Size}), download_txs(Peers, B, TXRoot); H2 when Height < Fork_2_0 -> - ar_events:send(peer, {served_block, Peer, Time, Size}), + % ar_events:send(peer, {served_block, Peer, Time, Size}), download_txs(Peers, B, TXRoot); _ -> ?LOG_WARNING([ diff --git a/apps/arweave/src/ar_http_iface_client.erl b/apps/arweave/src/ar_http_iface_client.erl index edf74898a..83ef0a49b 100644 --- a/apps/arweave/src/ar_http_iface_client.erl +++ b/apps/arweave/src/ar_http_iface_client.erl @@ -840,7 +840,7 @@ get_tx_from_remote_peer(Peer, TXID) -> ar_events:send(peer, {bad_response, {Peer, tx, invalid}}), {error, invalid_tx}; true -> - ar_events:send(peer, {served_tx, Peer, Time, Size}), + % ar_events:send(peer, {served_tx, Peer, Time, Size}), TX end; Error -> diff --git a/apps/arweave/src/ar_http_iface_middleware.erl b/apps/arweave/src/ar_http_iface_middleware.erl index 92dd6962f..f5808a353 100644 --- a/apps/arweave/src/ar_http_iface_middleware.erl +++ b/apps/arweave/src/ar_http_iface_middleware.erl @@ -1830,8 +1830,8 @@ handle_post_tx_accepted(Req, TX, Peer) -> %% of excessive transaction volumes. {A, B, C, D, _} = Peer, ar_blacklist_middleware:decrement_ip_addr({A, B, C, D}, Req), - ar_events:send(peer, {gossiped_tx, Peer, erlang:get(read_body_time), - erlang:get(body_size)}), + % ar_events:send(peer, {gossiped_tx, Peer, erlang:get(read_body_time), + % erlang:get(body_size)}), ar_events:send(tx, {new, TX, Peer}), TXID = TX#tx.id, ar_ignore_registry:remove_temporary(TXID), diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 937b705db..a7435299d 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -148,16 +148,20 @@ get_peer_release(Peer) -> -1 end. -start_request(Peer, PathLabel, Method) -> - gen_server:cast(?MODULE, {start_request, Peer, PathLabel, Method}). +start_request(Peer, PathLabel, get) -> + gen_server:cast(?MODULE, {start_request, Peer, PathLabel, get}); +start_request(Peer, PathLabel, _) -> + ok. -end_request(Peer, PathLabel, Method, {ok, {{<<"200">>, _}, _, Body, Start, End}} = Response) -> +end_request(Peer, PathLabel, get, {ok, {{<<"200">>, _}, _, Body, Start, End}} = Response) -> gen_server:cast(?MODULE, {end_request, - Peer, PathLabel, Method, + Peer, PathLabel, get, ar_metrics:get_status_class(Response), End-Start, byte_size(term_to_binary(Body))}); -end_request(Peer, PathLabel, Method, Response) -> +end_request(Peer, PathLabel, get, Response) -> %% TODO: error response + ok; +end_request(Peer, PathLabel, _, Response) -> ok. @@ -508,10 +512,7 @@ may_be_rotate_peer_ports(Peer) -> end. get_ip_port({A, B, C, D, Port}) -> - {{A, B, C, D}, Port}; - -get_ip_port({Domain, Port}) -> - {Domain, Port}. + {{A, B, C, D}, Port}. construct_peer({A, B, C, D}, Port) -> {A, B, C, D, Port}. diff --git a/apps/arweave/src/ar_randomx_state.erl b/apps/arweave/src/ar_randomx_state.erl index 89dca9096..96cdd13c2 100644 --- a/apps/arweave/src/ar_randomx_state.erl +++ b/apps/arweave/src/ar_randomx_state.erl @@ -331,7 +331,7 @@ get_block2(BH, Peers, RetryCount) -> {Peer, B, Time, Size} -> case ar_block:indep_hash(B) of BH -> - ar_events:send(peer, {served_block, Peer, Time, Size}), + % ar_events:send(peer, {served_block, Peer, Time, Size}), {ok, B}; InvalidBH -> ?LOG_WARNING([ diff --git a/apps/arweave/src/ar_tx_blacklist.erl b/apps/arweave/src/ar_tx_blacklist.erl index c1e3976b3..98586d73e 100644 --- a/apps/arweave/src/ar_tx_blacklist.erl +++ b/apps/arweave/src/ar_tx_blacklist.erl @@ -517,10 +517,11 @@ load_from_url(URL) -> #{ host := Host, path := Path, scheme := Scheme } = M = uri_string:parse(URL), Query = case maps:get(query, M, not_found) of not_found -> <<>>; Q -> [<<"?">>, Q] end, Port = maps:get(port, M, case Scheme of "http" -> 80; "https" -> 443 end), + Peer = ar_util:parse_peer(Host ++ ":" ++ integer_to_list(Port)), Reply = ar_http:req(#{ method => get, - peer => {Host, Port}, + peer => Peer, path => binary_to_list(iolist_to_binary([Path, Query])), is_peer_request => false, timeout => 20000, From 0bce164adb38f4274b2487f0538212db32c17d00 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Tue, 27 Jun 2023 00:28:19 +0000 Subject: [PATCH 07/30] Don't track host-based peers (e.g. the blacklist URL) --- apps/arweave/src/ar_peers.erl | 8 +++++++- apps/arweave/src/ar_tx_blacklist.erl | 3 +-- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index a7435299d..47dc02aa0 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -148,11 +148,17 @@ get_peer_release(Peer) -> -1 end. +start_request({_Host, _Port}, _, _) -> + %% Only track requests for IP-based peers as the rest of the stack assumes an IP-based peer. + ok; start_request(Peer, PathLabel, get) -> gen_server:cast(?MODULE, {start_request, Peer, PathLabel, get}); -start_request(Peer, PathLabel, _) -> +start_request(_Peer, _PathLabel, _) -> ok. +end_request({_Host, _Port}, _, _, _) -> + %% Only track requests for IP-based peers as the rest of the stack assumes an IP-based peer. + ok; end_request(Peer, PathLabel, get, {ok, {{<<"200">>, _}, _, Body, Start, End}} = Response) -> gen_server:cast(?MODULE, {end_request, Peer, PathLabel, get, diff --git a/apps/arweave/src/ar_tx_blacklist.erl b/apps/arweave/src/ar_tx_blacklist.erl index 98586d73e..c1e3976b3 100644 --- a/apps/arweave/src/ar_tx_blacklist.erl +++ b/apps/arweave/src/ar_tx_blacklist.erl @@ -517,11 +517,10 @@ load_from_url(URL) -> #{ host := Host, path := Path, scheme := Scheme } = M = uri_string:parse(URL), Query = case maps:get(query, M, not_found) of not_found -> <<>>; Q -> [<<"?">>, Q] end, Port = maps:get(port, M, case Scheme of "http" -> 80; "https" -> 443 end), - Peer = ar_util:parse_peer(Host ++ ":" ++ integer_to_list(Port)), Reply = ar_http:req(#{ method => get, - peer => Peer, + peer => {Host, Port}, path => binary_to_list(iolist_to_binary([Path, Query])), is_peer_request => false, timeout => 20000, From 4e8b05749ba193376c4755b3682b16fd2b1d6098 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Wed, 28 Jun 2023 01:21:09 +0000 Subject: [PATCH 08/30] Simplify handling of ReadTime and ReadBodySize --- apps/arweave/src/ar_block_pre_validator.erl | 252 ++++++++---------- apps/arweave/src/ar_data_sync.erl | 2 +- apps/arweave/src/ar_data_sync_worker.erl | 5 +- apps/arweave/src/ar_events_sup.erl | 3 +- apps/arweave/src/ar_http_iface_client.erl | 9 +- apps/arweave/src/ar_http_iface_middleware.erl | 15 +- apps/arweave/src/ar_http_iface_server.erl | 10 +- apps/arweave/src/ar_p3_config.erl | 3 +- apps/arweave/src/ar_peers.erl | 69 ++--- apps/arweave/src/ar_poller.erl | 14 +- 10 files changed, 187 insertions(+), 195 deletions(-) diff --git a/apps/arweave/src/ar_block_pre_validator.erl b/apps/arweave/src/ar_block_pre_validator.erl index f330729ae..f1ac1ac6a 100644 --- a/apps/arweave/src/ar_block_pre_validator.erl +++ b/apps/arweave/src/ar_block_pre_validator.erl @@ -2,7 +2,7 @@ -behaviour(gen_server). --export([start_link/2, pre_validate/5]). +-export([start_link/2, pre_validate/3]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -39,13 +39,14 @@ start_link(Name, Workers) -> %% is called. Afterwards, the block is put in a limited-size priority queue. %% Bigger-height blocks from better-rated peers have higher priority. Additionally, %% the processing is throttled by IP and solution hash. -pre_validate(B, Peer, Timestamp, ReadBodyTime, BodySize) -> +%% Returns: ok, invalid, skipped +pre_validate(B, Peer, Timestamp) -> #block{ indep_hash = H } = B, case ar_ignore_registry:member(H) of true -> - ok; + skipped; false -> - pre_validate_is_peer_banned(B, Peer, Timestamp, ReadBodyTime, BodySize) + pre_validate_is_peer_banned(B, Peer, Timestamp) end. %%%=================================================================== @@ -71,9 +72,10 @@ handle_cast(pre_validate, #state{ pqueue = Q, size = Size, ip_timestamps = IPTim ar_util:cast_after(50, ?MODULE, pre_validate), {noreply, State}; false -> - {{_, {B, PrevB, SolutionResigned, Peer, Timestamp, ReadBodyTime, BodySize}}, + {{_, {B, PrevB, SolutionResigned, Peer, Timestamp}}, Q2} = gb_sets:take_largest(Q), - Size2 = Size - BodySize, + BlockSize = byte_size(term_to_binary(B)), + Size2 = Size - BlockSize, case ar_ignore_registry:permanent_member(B#block.indep_hash) of true -> gen_server:cast(?MODULE, pre_validate), @@ -102,8 +104,7 @@ handle_cast(pre_validate, #state{ pqueue = Q, size = Size, ip_timestamps = IPTim {previous_block, ar_util:encode(PrevB#block.indep_hash)}]), pre_validate_nonce_limiter_seed_data(B, PrevB, - SolutionResigned, Peer, Timestamp, - ReadBodyTime, BodySize), + SolutionResigned, Peer, Timestamp), {IPTimestamps2, HashTimestamps2}; false -> {IPTimestamps2, HashTimestamps} @@ -115,13 +116,13 @@ handle_cast(pre_validate, #state{ pqueue = Q, size = Size, ip_timestamps = IPTim end end; -handle_cast({enqueue, {B, PrevB, SolutionResigned, Peer, Timestamp, ReadBodyTime, BodySize}}, +handle_cast({enqueue, {B, PrevB, SolutionResigned, Peer, Timestamp}}, State) -> #state{ pqueue = Q, size = Size } = State, Priority = priority(B, Peer), - Size2 = Size + BodySize, - Q2 = gb_sets:add_element({Priority, {B, PrevB, SolutionResigned, Peer, Timestamp, - ReadBodyTime, BodySize}}, Q), + BlockSize = byte_size(term_to_binary(B)), + Size2 = Size + BlockSize, + Q2 = gb_sets:add_element({Priority, {B, PrevB, SolutionResigned, Peer, Timestamp}}, Q), {Q3, Size3} = case Size2 > ?MAX_PRE_VALIDATION_QUEUE_SIZE of true -> @@ -177,15 +178,15 @@ terminate(_Reason, _State) -> %%% Private functions. %%%=================================================================== -pre_validate_is_peer_banned(B, Peer, Timestamp, ReadBodyTime, BodySize) -> +pre_validate_is_peer_banned(B, Peer, Timestamp) -> case ar_blacklist_middleware:is_peer_banned(Peer) of not_banned -> - pre_validate_previous_block(B, Peer, Timestamp, ReadBodyTime, BodySize); + pre_validate_previous_block(B, Peer, Timestamp); banned -> - ok + skipped end. -pre_validate_previous_block(B, Peer, Timestamp, ReadBodyTime, BodySize) -> +pre_validate_previous_block(B, Peer, Timestamp) -> PrevH = B#block.previous_block, case ar_node:get_block_shadow_from_cache(PrevH) of not_found -> @@ -193,73 +194,67 @@ pre_validate_previous_block(B, Peer, Timestamp, ReadBodyTime, BodySize) -> %% successive blocks are distributed at the same time. Do not %% ban the peer as the block might be valid. If the network adopts %% this block, ar_poller will catch up. - ok; + skipped; #block{ height = PrevHeight } = PrevB -> case B#block.height == PrevHeight + 1 of false -> - ok; + invalid; true -> case B#block.height >= ar_fork:height_2_6() of true -> PrevCDiff = B#block.previous_cumulative_diff, case PrevB#block.cumulative_diff == PrevCDiff of true -> - pre_validate_indep_hash(B, PrevB, Peer, Timestamp, - ReadBodyTime, BodySize); + pre_validate_indep_hash(B, PrevB, Peer, Timestamp); false -> - ok + invalid end; false -> - pre_validate_may_be_fetch_chunk(B, PrevB, Peer, Timestamp, - ReadBodyTime, BodySize) + pre_validate_may_be_fetch_chunk(B, PrevB, Peer, Timestamp) end end end. -pre_validate_indep_hash(#block{ indep_hash = H } = B, PrevB, Peer, Timestamp, ReadBodyTime, - BodySize) -> +pre_validate_indep_hash(#block{ indep_hash = H } = B, PrevB, Peer, Timestamp) -> case catch compute_hash(B, PrevB#block.cumulative_diff) of {ok, {BDS, H}} -> ar_ignore_registry:add_temporary(H, 5000), - pre_validate_timestamp(B, BDS, PrevB, Peer, Timestamp, ReadBodyTime, BodySize); + pre_validate_timestamp(B, BDS, PrevB, Peer, Timestamp); {ok, H} -> case ar_ignore_registry:permanent_member(H) of true -> - ok; + skipped; false -> ar_ignore_registry:add_temporary(H, 5000), - pre_validate_timestamp(B, none, PrevB, Peer, Timestamp, ReadBodyTime, - BodySize) + pre_validate_timestamp(B, none, PrevB, Peer, Timestamp) end; {error, invalid_signature} -> post_block_reject_warn(B, check_signature, Peer), ar_events:send(block, {rejected, invalid_signature, B#block.indep_hash, Peer}), - ok; + invalid; {ok, _DifferentH} -> post_block_reject_warn(B, check_indep_hash, Peer), ar_events:send(block, {rejected, invalid_hash, B#block.indep_hash, Peer}), - ok + invalid end. -pre_validate_timestamp(B, BDS, PrevB, Peer, Timestamp, ReadBodyTime, BodySize) -> +pre_validate_timestamp(B, BDS, PrevB, Peer, Timestamp) -> #block{ indep_hash = H } = B, case ar_block:verify_timestamp(B, PrevB) of true -> - pre_validate_existing_solution_hash(B, BDS, PrevB, Peer, Timestamp, ReadBodyTime, - BodySize); + pre_validate_existing_solution_hash(B, BDS, PrevB, Peer, Timestamp); false -> post_block_reject_warn(B, check_timestamp, Peer, [{block_time, B#block.timestamp}, {current_time, os:system_time(seconds)}]), ar_events:send(block, {rejected, invalid_timestamp, H, Peer}), ar_ignore_registry:remove_temporary(B#block.indep_hash), - ok + invalid end. -pre_validate_existing_solution_hash(B, BDS, PrevB, Peer, Timestamp, ReadBodyTime, BodySize) -> +pre_validate_existing_solution_hash(B, BDS, PrevB, Peer, Timestamp) -> case B#block.height >= ar_fork:height_2_6() of false -> - pre_validate_last_retarget(B, BDS, PrevB, false, Peer, Timestamp, ReadBodyTime, - BodySize); + pre_validate_last_retarget(B, BDS, PrevB, false, Peer, Timestamp); true -> SolutionH = B#block.hash, #block{ hash = SolutionH, nonce = Nonce, reward_addr = RewardAddr, @@ -316,15 +311,15 @@ pre_validate_existing_solution_hash(B, BDS, PrevB, Peer, Timestamp, ReadBodyTime case ValidatedCachedSolutionDiff of not_found -> pre_validate_nonce_limiter_global_step_number(B, BDS, PrevB, false, Peer, - Timestamp, ReadBodyTime, BodySize); + Timestamp); invalid -> post_block_reject_warn(B, check_resigned_solution_hash, Peer), ar_events:send(block, {rejected, invalid_resigned_solution_hash, B#block.indep_hash, Peer}), - ok; + invalid; {valid, B3} -> pre_validate_nonce_limiter_global_step_number(B3, BDS, PrevB, true, Peer, - Timestamp, ReadBodyTime, BodySize) + Timestamp) end end. @@ -358,8 +353,7 @@ get_last_step_prev_output(B) -> PrevOutput end. -pre_validate_nonce_limiter_global_step_number(B, BDS, PrevB, SolutionResigned, Peer, Timestamp, - ReadBodyTime, BodySize) -> +pre_validate_nonce_limiter_global_step_number(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) -> BlockInfo = B#block.nonce_limiter_info, StepNumber = BlockInfo#nonce_limiter_info.global_step_number, PrevBlockInfo = PrevB#block.nonce_limiter_info, @@ -389,49 +383,43 @@ pre_validate_nonce_limiter_global_step_number(B, BDS, PrevB, SolutionResigned, P ar_events:send(block, {rejected, invalid_nonce_limiter_global_step_number, H, Peer}), ar_ignore_registry:remove_temporary(B#block.indep_hash), - ok; + invalid; true -> prometheus_gauge:set(block_vdf_advance, StepNumber - CurrentStepNumber), pre_validate_previous_solution_hash(B, BDS, PrevB, SolutionResigned, Peer, - Timestamp, ReadBodyTime, BodySize) + Timestamp) end. -pre_validate_previous_solution_hash(B, BDS, PrevB, SolutionResigned, Peer, Timestamp, - ReadBodyTime, BodySize) -> +pre_validate_previous_solution_hash(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) -> case B#block.previous_solution_hash == PrevB#block.hash of false -> post_block_reject_warn(B, check_previous_solution_hash, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_previous_solution_hash, B#block.indep_hash, Peer}), - ok; + invalid; true -> - pre_validate_last_retarget(B, BDS, PrevB, SolutionResigned, Peer, Timestamp, - ReadBodyTime, BodySize) + pre_validate_last_retarget(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) end. -pre_validate_last_retarget(B, BDS, PrevB, SolutionResigned, Peer, Timestamp, ReadBodyTime, - BodySize) -> +pre_validate_last_retarget(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) -> case B#block.height >= ar_fork:height_2_6() of false -> - pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp, - ReadBodyTime, BodySize); + pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp); true -> case ar_block:verify_last_retarget(B, PrevB) of true -> - pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp, - ReadBodyTime, BodySize); + pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp); false -> post_block_reject_warn(B, check_last_retarget, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_last_retarget, B#block.indep_hash, Peer}), - ok + invalid end end. -pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp, ReadBodyTime, - BodySize) -> +pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) -> DiffValid = case B#block.height >= ar_fork:height_2_6() of true -> @@ -442,16 +430,15 @@ pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp, ReadBo case DiffValid of true -> pre_validate_cumulative_difficulty(B, BDS, PrevB, SolutionResigned, Peer, - Timestamp, ReadBodyTime, BodySize); + Timestamp); _ -> post_block_reject_warn(B, check_difficulty, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_difficulty, B#block.indep_hash, Peer}), - ok + invalid end. -pre_validate_cumulative_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp, - ReadBodyTime, BodySize) -> +pre_validate_cumulative_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) -> case B#block.height >= ar_fork:height_2_6() of true -> case ar_block:verify_cumulative_diff(B, PrevB) of @@ -460,22 +447,22 @@ pre_validate_cumulative_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timest ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_cumulative_difficulty, B#block.indep_hash, Peer}), - ok; + invalid; true -> case SolutionResigned of true -> gen_server:cast(?MODULE, {enqueue, {B, PrevB, true, Peer, - Timestamp, ReadBodyTime, BodySize}}); + Timestamp}}), + ok; false -> - pre_validate_quick_pow(B, PrevB, false, Peer, Timestamp, - ReadBodyTime, BodySize) + pre_validate_quick_pow(B, PrevB, false, Peer, Timestamp) end end; false -> - pre_validate_pow(B, BDS, PrevB, Peer, Timestamp, ReadBodyTime, BodySize) + pre_validate_pow(B, BDS, PrevB, Peer, Timestamp) end. -pre_validate_quick_pow(B, PrevB, SolutionResigned, Peer, Timestamp, ReadBodyTime, BodySize) -> +pre_validate_quick_pow(B, PrevB, SolutionResigned, Peer, Timestamp) -> #block{ hash_preimage = HashPreimage, diff = Diff, nonce_limiter_info = NonceLimiterInfo, partition_number = PartitionNumber, reward_addr = RewardAddr } = B, PrevNonceLimiterInfo = get_prev_nonce_limiter_info(PrevB), @@ -494,7 +481,7 @@ pre_validate_quick_pow(B, PrevB, SolutionResigned, Peer, Timestamp, ReadBodyTime not_found -> %% The new blocks should have been applied in the meantime since we %% looked for the previous block in the block cache. - ok; + skipped; _ -> case binary:decode_unsigned(SolutionHash, big) > Diff of false -> @@ -502,10 +489,11 @@ pre_validate_quick_pow(B, PrevB, SolutionResigned, Peer, Timestamp, ReadBodyTime ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_hash_preimage, B#block.indep_hash, Peer}), - ok; + invalid; true -> gen_server:cast(?MODULE, {enqueue, {B, PrevB, SolutionResigned, Peer, - Timestamp, ReadBodyTime, BodySize}}) + Timestamp}}), + ok end end. @@ -527,8 +515,7 @@ get_prev_nonce_limiter_info(#block{ indep_hash = PrevH, height = PrevHeight } = PrevB#block.nonce_limiter_info end. -pre_validate_nonce_limiter_seed_data(B, PrevB, SolutionResigned, Peer, Timestamp, ReadBodyTime, - BodySize) -> +pre_validate_nonce_limiter_seed_data(B, PrevB, SolutionResigned, Peer, Timestamp) -> Info = B#block.nonce_limiter_info, #nonce_limiter_info{ global_step_number = StepNumber, seed = Seed, next_seed = NextSeed, partition_upper_bound = PartitionUpperBound, @@ -538,7 +525,7 @@ pre_validate_nonce_limiter_seed_data(B, PrevB, SolutionResigned, Peer, Timestamp not_found -> %% The new blocks should have been applied in the meantime since we %% looked for the previous block in the block cache. - ok; + skipped; PrevNonceLimiterInfo -> ExpectedSeedData = ar_nonce_limiter:get_seed_data(StepNumber, PrevNonceLimiterInfo, PrevB#block.indep_hash, PrevB#block.weave_size), @@ -546,18 +533,17 @@ pre_validate_nonce_limiter_seed_data(B, PrevB, SolutionResigned, Peer, Timestamp NextPartitionUpperBound} of true -> pre_validate_partition_number(B, PrevB, PartitionUpperBound, - SolutionResigned, Peer, Timestamp, ReadBodyTime, BodySize); + SolutionResigned, Peer, Timestamp); false -> post_block_reject_warn(B, check_nonce_limiter_seed_data, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_nonce_limiter_seed_data, B#block.indep_hash, Peer}), - ok + invalid end end. -pre_validate_partition_number(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, Timestamp, - ReadBodyTime, BodySize) -> +pre_validate_partition_number(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, Timestamp) -> Max = max(0, PartitionUpperBound div ?PARTITION_SIZE - 1), case B#block.partition_number > Max of true -> @@ -565,34 +551,32 @@ pre_validate_partition_number(B, PrevB, PartitionUpperBound, SolutionResigned, P ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_partition_number, B#block.indep_hash, Peer}), - ok; + invalid; false -> - pre_validate_nonce(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, - Timestamp, ReadBodyTime, BodySize) + pre_validate_nonce(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, Timestamp) end. -pre_validate_nonce(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, Timestamp, - ReadBodyTime, BodySize) -> +pre_validate_nonce(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, Timestamp) -> Max = max(0, (?RECALL_RANGE_SIZE) div ?DATA_CHUNK_SIZE - 1), case B#block.nonce > Max of true -> post_block_reject_warn(B, check_nonce, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_nonce, B#block.indep_hash, Peer}), - ok; + invalid; false -> case SolutionResigned of true -> - accept_block(B, Peer, ReadBodyTime, BodySize, Timestamp, false); + accept_block(B, Peer, Timestamp, false); false -> pre_validate_may_be_fetch_first_chunk(B, PrevB, PartitionUpperBound, Peer, - Timestamp, ReadBodyTime, BodySize) + Timestamp) end end. pre_validate_may_be_fetch_first_chunk(#block{ recall_byte = RecallByte, - poa = #poa{ chunk = <<>> } } = B, PrevB, PartitionUpperBound, Peer, Timestamp, - ReadBodyTime, BodySize) when RecallByte /= undefined -> + poa = #poa{ chunk = <<>> } } = B, PrevB, PartitionUpperBound, Peer, Timestamp) + when RecallByte /= undefined -> case ar_data_sync:get_chunk(RecallByte + 1, #{ pack => true, packing => {spora_2_6, B#block.reward_addr}, bucket_based_offset => true }) of {ok, #{ chunk := Chunk, data_path := DataPath, tx_path := TXPath }} -> @@ -600,39 +584,34 @@ pre_validate_may_be_fetch_first_chunk(#block{ recall_byte = RecallByte, B2 = B#block{ poa = #poa{ chunk = Chunk, data_path = DataPath, tx_path = TXPath } }, pre_validate_may_be_fetch_second_chunk(B2, PrevB, PartitionUpperBound, - Peer, Timestamp, ReadBodyTime, BodySize); + Peer, Timestamp); _ -> ar_events:send(block, {rejected, failed_to_fetch_first_chunk, B#block.indep_hash, Peer}), - ok + invalid end; -pre_validate_may_be_fetch_first_chunk(B, PrevB, PartitionUpperBound, Peer, Timestamp, - ReadBodyTime, BodySize) -> - pre_validate_may_be_fetch_second_chunk(B, PrevB, PartitionUpperBound, Peer, Timestamp, - ReadBodyTime, BodySize). +pre_validate_may_be_fetch_first_chunk(B, PrevB, PartitionUpperBound, Peer, Timestamp) -> + pre_validate_may_be_fetch_second_chunk(B, PrevB, PartitionUpperBound, Peer, Timestamp). pre_validate_may_be_fetch_second_chunk(#block{ recall_byte2 = RecallByte2, - poa2 = #poa{ chunk = <<>> } } = B, PrevB, PartitionUpperBound, Peer, Timestamp, - ReadBodyTime, BodySize) when RecallByte2 /= undefined -> + poa2 = #poa{ chunk = <<>> } } = B, PrevB, PartitionUpperBound, Peer, Timestamp) + when RecallByte2 /= undefined -> case ar_data_sync:get_chunk(RecallByte2 + 1, #{ pack => true, packing => {spora_2_6, B#block.reward_addr}, bucket_based_offset => true }) of {ok, #{ chunk := Chunk, data_path := DataPath, tx_path := TXPath }} -> prometheus_counter:inc(block2_fetched_chunks), B2 = B#block{ poa2 = #poa{ chunk = Chunk, data_path = DataPath, tx_path = TXPath } }, - pre_validate_pow_2_6(B2, PrevB, PartitionUpperBound, Peer, Timestamp, - ReadBodyTime, BodySize); + pre_validate_pow_2_6(B2, PrevB, PartitionUpperBound, Peer, Timestamp); _ -> ar_events:send(block, {rejected, failed_to_fetch_second_chunk, B#block.indep_hash, Peer}), - ok + invalid end; -pre_validate_may_be_fetch_second_chunk(B, PrevB, PartitionUpperBound, Peer, Timestamp, - ReadBodyTime, BodySize) -> - pre_validate_pow_2_6(B, PrevB, PartitionUpperBound, Peer, Timestamp, ReadBodyTime, - BodySize). +pre_validate_may_be_fetch_second_chunk(B, PrevB, PartitionUpperBound, Peer, Timestamp) -> + pre_validate_pow_2_6(B, PrevB, PartitionUpperBound, Peer, Timestamp). -pre_validate_pow_2_6(B, PrevB, PartitionUpperBound, Peer, Timestamp, ReadBodyTime, BodySize) -> +pre_validate_pow_2_6(B, PrevB, PartitionUpperBound, Peer, Timestamp) -> NonceLimiterInfo = B#block.nonce_limiter_info, NonceLimiterOutput = NonceLimiterInfo#nonce_limiter_info.output, PrevNonceLimiterInfo = get_prev_nonce_limiter_info(PrevB), @@ -645,26 +624,23 @@ pre_validate_pow_2_6(B, PrevB, PartitionUpperBound, Peer, Timestamp, ReadBodyTim andalso Preimage1 == B#block.hash_preimage andalso B#block.recall_byte2 == undefined of true -> - pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, Timestamp, - ReadBodyTime, BodySize); + pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, Timestamp); false -> Chunk2 = (B#block.poa2)#poa.chunk, {H2, Preimage2} = ar_block:compute_h2(H1, Chunk2, H0), case H2 == B#block.hash andalso binary:decode_unsigned(H2, big) > B#block.diff andalso Preimage2 == B#block.hash_preimage of true -> - pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, Timestamp, - ReadBodyTime, BodySize); + pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, Timestamp); false -> post_block_reject_warn(B, check_pow, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_pow, B#block.indep_hash, Peer}), - ok + invalid end end. -pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, Timestamp, ReadBodyTime, - BodySize) -> +pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, Timestamp) -> {RecallRange1Start, RecallRange2Start} = ar_block:get_recall_range(H0, B#block.partition_number, PartitionUpperBound), RecallByte1 = RecallRange1Start + B#block.nonce * ?DATA_CHUNK_SIZE, @@ -675,17 +651,17 @@ pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, Timestamp, ReadBod andalso RecallByte1 == B#block.recall_byte of error -> ?LOG_ERROR([{event, failed_to_validate_proof_of_access}, - {block, ar_util:encode(B#block.indep_hash)}]); + {block, ar_util:encode(B#block.indep_hash)}]), + invalid; false -> post_block_reject_warn(B, check_poa, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_poa, B#block.indep_hash, Peer}), - ok; + invalid; true -> case B#block.hash == H1 of true -> - pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp, ReadBodyTime, - BodySize); + pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp); false -> RecallByte2 = RecallRange2Start + B#block.nonce * ?DATA_CHUNK_SIZE, {BlockStart2, BlockEnd2, TXRoot2} = ar_block_index:get_block_bounds( @@ -697,65 +673,65 @@ pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, Timestamp, ReadBod andalso RecallByte2 == B#block.recall_byte2 of error -> ?LOG_ERROR([{event, failed_to_validate_proof_of_access}, - {block, ar_util:encode(B#block.indep_hash)}]); + {block, ar_util:encode(B#block.indep_hash)}]), + invalid; false -> post_block_reject_warn(B, check_poa2, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_poa2, B#block.indep_hash, Peer}), - ok; + invalid; true -> - pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp, ReadBodyTime, - BodySize) + pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp) end end end. -pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp, ReadBodyTime, BodySize) -> +pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp) -> PrevOutput = get_last_step_prev_output(B), case ar_nonce_limiter:validate_last_step_checkpoints(B, PrevB, PrevOutput) of {false, cache_mismatch} -> ar_ignore_registry:add(B#block.indep_hash), post_block_reject_warn(B, check_nonce_limiter, Peer), ar_events:send(block, {rejected, invalid_nonce_limiter, B#block.indep_hash, Peer}), - ok; + invalid; false -> post_block_reject_warn(B, check_nonce_limiter, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_nonce_limiter, B#block.indep_hash, Peer}), - ok; + invalid; {true, cache_match} -> - accept_block(B, Peer, ReadBodyTime, BodySize, Timestamp, true); + accept_block(B, Peer, Timestamp, true); true -> - accept_block(B, Peer, ReadBodyTime, BodySize, Timestamp, false) + accept_block(B, Peer, Timestamp, false) end. -accept_block(B, Peer, ReadBodyTime, BodySize, Timestamp, Gossip) -> +accept_block(B, Peer, Timestamp, Gossip) -> ar_ignore_registry:add(B#block.indep_hash), ar_events:send(block, {new, B, #{ source => {peer, Peer}, gossip => Gossip }}), - % ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), + % ar_peers:gossiped_block(Peer), record_block_pre_validation_time(Timestamp), ?LOG_INFO([{event, accepted_block}, {height, B#block.height}, - {indep_hash, ar_util:encode(B#block.indep_hash)}]). + {indep_hash, ar_util:encode(B#block.indep_hash)}]), + ok. pre_validate_may_be_fetch_chunk(#block{ recall_byte = RecallByte, - poa = #poa{ chunk = <<>> } } = B, PrevB, Peer, Timestamp, ReadBodyTime, - BodySize) when RecallByte /= undefined -> + poa = #poa{ chunk = <<>> } } = B, PrevB, Peer, Timestamp) when RecallByte /= undefined -> Options = #{ pack => false, packing => spora_2_5, bucket_based_offset => true }, case ar_data_sync:get_chunk(RecallByte + 1, Options) of {ok, #{ chunk := Chunk, data_path := DataPath, tx_path := TXPath }} -> prometheus_counter:inc(block2_fetched_chunks), B2 = B#block{ poa = #poa{ chunk = Chunk, tx_path = TXPath, data_path = DataPath } }, - pre_validate_indep_hash(B2, PrevB, Peer, Timestamp, ReadBodyTime, BodySize); + pre_validate_indep_hash(B2, PrevB, Peer, Timestamp); _ -> ar_events:send(block, {rejected, failed_to_fetch_chunk, B#block.indep_hash, Peer}), - ok + invalid end; -pre_validate_may_be_fetch_chunk(B, PrevB, Peer, Timestamp, ReadBodyTime, BodySize) -> - pre_validate_indep_hash(B, PrevB, Peer, Timestamp, ReadBodyTime, BodySize). +pre_validate_may_be_fetch_chunk(B, PrevB, Peer, Timestamp) -> + pre_validate_indep_hash(B, PrevB, Peer, Timestamp). -pre_validate_pow(B, BDS, PrevB, Peer, Timestamp, ReadBodyTime, BodySize) -> +pre_validate_pow(B, BDS, PrevB, Peer, Timestamp) -> #block{ indep_hash = PrevH } = PrevB, MaybeValid = case ar_node:get_recent_partition_upper_bound_by_prev_h(PrevH) of @@ -768,7 +744,7 @@ pre_validate_pow(B, BDS, PrevB, Peer, Timestamp, ReadBodyTime, BodySize) -> not_found -> %% The new blocks should have been applied in the meantime since we %% looked for the previous block in the block cache. - ok; + skipped; {true, RecallByte} -> H = B#block.indep_hash, %% Include all transactions found in the mempool in place of the @@ -777,7 +753,7 @@ pre_validate_pow(B, BDS, PrevB, Peer, Timestamp, ReadBodyTime, BodySize) -> B2 = B#block{ txs = include_transactions(B#block.txs) }, ar_events:send(block, {new, B2, #{ source => {peer, Peer}, recall_byte => RecallByte }}), - % ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), + % ar_peers:gossiped_block(Peer), record_block_pre_validation_time(Timestamp), prometheus_counter:inc(block2_received_transactions, count_received_transactions(B#block.txs)), @@ -787,7 +763,7 @@ pre_validate_pow(B, BDS, PrevB, Peer, Timestamp, ReadBodyTime, BodySize) -> post_block_reject_warn(B, check_pow, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_pow, B#block.indep_hash, Peer}), - ok + invalid end. compute_hash(B, PrevCDiff) -> diff --git a/apps/arweave/src/ar_data_sync.erl b/apps/arweave/src/ar_data_sync.erl index ff63d7961..b019be584 100644 --- a/apps/arweave/src/ar_data_sync.erl +++ b/apps/arweave/src/ar_data_sync.erl @@ -913,7 +913,7 @@ handle_cast(sync_intervals, State) -> sync_intervals_queue_intervals = I2 }} end; -handle_cast({store_fetched_chunk, Peer, Time, TransferSize, Byte, Proof} = Cast, State) -> +handle_cast({store_fetched_chunk, Peer, Byte, Proof} = Cast, State) -> #sync_data_state{ packing_map = PackingMap } = State, #{ data_path := DataPath, tx_path := TXPath, chunk := Chunk, packing := Packing } = Proof, SeekByte = get_chunk_seek_offset(Byte + 1) - 1, diff --git a/apps/arweave/src/ar_data_sync_worker.erl b/apps/arweave/src/ar_data_sync_worker.erl index 5b88a11ee..9a8f8bf89 100644 --- a/apps/arweave/src/ar_data_sync_worker.erl +++ b/apps/arweave/src/ar_data_sync_worker.erl @@ -220,7 +220,7 @@ sync_range({Start, End, Peer, TargetStoreID, RetryCount} = Args) -> ok; false -> case ar_http_iface_client:get_chunk_binary(Peer, Start2, any) of - {ok, #{ chunk := Chunk } = Proof, Time, TransferSize} -> + {ok, #{ chunk := Chunk } = Proof} -> %% In case we fetched a packed small chunk, %% we may potentially skip some chunks by %% continuing with Start2 + byte_size(Chunk) - the skip @@ -228,8 +228,7 @@ sync_range({Start, End, Peer, TargetStoreID, RetryCount} = Args) -> Start3 = ar_data_sync:get_chunk_padded_offset( Start2 + byte_size(Chunk)) + 1, gen_server:cast(list_to_atom("ar_data_sync_" ++ TargetStoreID), - {store_fetched_chunk, Peer, Time, TransferSize, Start2 - 1, - Proof}), + {store_fetched_chunk, Peer, Start2 - 1, Proof}), ar_data_sync:increment_chunk_cache_size(), sync_range({Start3, End, Peer, TargetStoreID, RetryCount}); {error, timeout} -> diff --git a/apps/arweave/src/ar_events_sup.erl b/apps/arweave/src/ar_events_sup.erl index 634029f66..d08907795 100644 --- a/apps/arweave/src/ar_events_sup.erl +++ b/apps/arweave/src/ar_events_sup.erl @@ -40,8 +40,7 @@ init([]) -> ?CHILD(ar_events, block, worker), %% Events: unpacked, packed. ?CHILD(ar_events, chunk, worker), - %% Events: made_request, bad_response, served_tx, served_block, served_chunk, - %% gossiped_tx, gossiped_block, banned + %% Events: made_request, bad_response, served_tx, served_block, served_chunk, banned ?CHILD(ar_events, peer, worker), %% Events: initializing, initialized, validated_pre_fork_2_6_block, new_tip, %% checkpoint_block, search_space_upper_bound. diff --git a/apps/arweave/src/ar_http_iface_client.erl b/apps/arweave/src/ar_http_iface_client.erl index 83ef0a49b..970e6eeff 100644 --- a/apps/arweave/src/ar_http_iface_client.erl +++ b/apps/arweave/src/ar_http_iface_client.erl @@ -10,7 +10,7 @@ get_wallet_list_chunk/2, get_wallet_list_chunk/3, get_wallet_list/2, add_peer/1, get_info/1, get_info/2, get_peers/1, get_time/2, get_height/1, get_block_index/3, get_sync_record/1, get_sync_record/3, - get_chunk_json/3, get_chunk_binary/3, get_mempool/1, get_sync_buckets/1, + get_chunk_binary/3, get_mempool/1, get_sync_buckets/1, get_recent_hash_list/1, get_recent_hash_list_diff/2, get_reward_history/3, push_nonce_limiter_update/2, get_vdf_update/1, get_vdf_session/1, get_previous_vdf_session/1]). @@ -302,9 +302,6 @@ get_sync_record(Peer, Start, Limit) -> headers => Headers }), Start, Limit). -get_chunk_json(Peer, Offset, RequestedPacking) -> - get_chunk(Peer, Offset, RequestedPacking, json). - get_chunk_binary(Peer, Offset, RequestedPacking) -> get_chunk(Peer, Offset, RequestedPacking, binary). @@ -562,7 +559,7 @@ handle_sync_record_response({ok, {{<<"200">>, _}, _, Body, _, _}}, Start, Limit) handle_sync_record_response(Reply, _, _) -> {error, Reply}. -handle_chunk_response(Encoding, {ok, {{<<"200">>, _}, _, Body, Start, End}}) -> +handle_chunk_response(Encoding, {ok, {{<<"200">>, _}, _, Body, _, _}}) -> DecodeFun = case Encoding of json -> @@ -591,7 +588,7 @@ handle_chunk_response(Encoding, {ok, {{<<"200">>, _}, _, Body, Start, End}}) -> Chunk when byte_size(Chunk) > ?DATA_CHUNK_SIZE -> {error, chunk_bigger_than_256kib}; _ -> - {ok, Proof, End - Start, byte_size(term_to_binary(Proof))} + {ok, Proof} end end; handle_chunk_response(_Encoding, {error, _} = Response) -> diff --git a/apps/arweave/src/ar_http_iface_middleware.erl b/apps/arweave/src/ar_http_iface_middleware.erl index f5808a353..b44917794 100644 --- a/apps/arweave/src/ar_http_iface_middleware.erl +++ b/apps/arweave/src/ar_http_iface_middleware.erl @@ -1830,8 +1830,7 @@ handle_post_tx_accepted(Req, TX, Peer) -> %% of excessive transaction volumes. {A, B, C, D, _} = Peer, ar_blacklist_middleware:decrement_ip_addr({A, B, C, D}, Req), - % ar_events:send(peer, {gossiped_tx, Peer, erlang:get(read_body_time), - % erlang:get(body_size)}), + ar_peers:gossiped_tx(Peer), ar_events:send(tx, {new, TX, Peer}), TXID = TX#tx.id, ar_ignore_registry:remove_temporary(TXID), @@ -2318,9 +2317,6 @@ post_block(read_body, Peer, {Req, Pid, Encoding}, ReceiveTimestamp) -> {error, _} -> {400, #{}, <<"Invalid block.">>, Req2}; {ok, BShadow} -> - ReadBodyTime = timer:now_diff(erlang:timestamp(), ReceiveTimestamp), - erlang:put(read_body_time, ReadBodyTime), - erlang:put(body_size, byte_size(term_to_binary(BShadow))), post_block(check_transactions_are_present, {BShadow, Peer}, Req2, ReceiveTimestamp) end; @@ -2343,7 +2339,7 @@ post_block(check_transactions_are_present, {BShadow, Peer}, Req, ReceiveTimestam _ -> % POST /block; do not reject for backwards-compatibility post_block(enqueue_block, {BShadow, Peer}, Req, ReceiveTimestamp) end; -post_block(enqueue_block, {B, Peer}, Req, Timestamp) -> +post_block(enqueue_block, {B, Peer}, Req, ReceiveTimestamp) -> B2 = case B#block.height >= ar_fork:height_2_6() of true -> @@ -2362,8 +2358,11 @@ post_block(enqueue_block, {B, Peer}, Req, Timestamp) -> end end, ?LOG_INFO([{event, received_block}, {block, ar_util:encode(B#block.indep_hash)}]), - ar_block_pre_validator:pre_validate(B2, Peer, Timestamp, erlang:get(read_body_time), - erlang:get(body_size)), + %% ReadBodyTime in microseconds, measure elapsed time before validation since the validation + %% operation can take some time. + ReadBodyTime = timer:now_diff(erlang:timestamp(), ReceiveTimestamp), + ValidationStatus = ar_block_pre_validator:pre_validate(B2, Peer, ReceiveTimestamp), + ar_peers:gossiped_block(Peer, B2, ValidationStatus, ReadBodyTime), {200, #{}, <<"OK">>, Req}. encode_txids([]) -> diff --git a/apps/arweave/src/ar_http_iface_server.erl b/apps/arweave/src/ar_http_iface_server.erl index cdb6cc2a1..e8430775a 100644 --- a/apps/arweave/src/ar_http_iface_server.erl +++ b/apps/arweave/src/ar_http_iface_server.erl @@ -5,7 +5,7 @@ -module(ar_http_iface_server). -export([start/0, stop/0]). --export([split_path/1, label_http_path/1]). +-export([split_path/1, label_http_path/1, label_req/1]). -include_lib("arweave/include/ar.hrl"). -include_lib("arweave/include/ar_config.hrl"). @@ -45,6 +45,10 @@ label_http_path(Path) when is_list(Path) -> label_http_path(Path) -> label_http_path(split_path(Path)). +label_req(Req) -> + SplitPath = ar_http_iface_server:split_path(cowboy_req:path(Req)), + ar_http_iface_server:label_http_path(SplitPath). + %%%=================================================================== %%% Private functions. %%%=================================================================== @@ -309,10 +313,14 @@ name_route([<<"hash_list">>, _From, _To]) -> name_route([<<"hash_list2">>, _From, _To]) -> "/hash_list2/{from}/{to}"; +name_route([<<"block">>]) -> + "/block"; name_route([<<"block">>, <<"hash">>, _IndepHash]) -> "/block/hash/{indep_hash}"; name_route([<<"block">>, <<"height">>, _Height]) -> "/block/height/{height}"; +name_route([<<"block2">>]) -> + "/block2"; name_route([<<"block2">>, <<"hash">>, _IndepHash]) -> "/block2/hash/{indep_hash}"; name_route([<<"block2">>, <<"height">>, _Height]) -> diff --git a/apps/arweave/src/ar_p3_config.erl b/apps/arweave/src/ar_p3_config.erl index 1ab5388b5..7ae3034fc 100644 --- a/apps/arweave/src/ar_p3_config.erl +++ b/apps/arweave/src/ar_p3_config.erl @@ -80,8 +80,7 @@ get_payments_value(P3Config, Asset, Field) when end. get_service_config(P3Config, Req) -> - SplitPath = ar_http_iface_server:split_path(cowboy_req:path(Req)), - Path = ar_http_iface_server:label_http_path(SplitPath), + Path = ar_http_iface_server:label_req(Req), case Path of undefined -> undefined; diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 47dc02aa0..93fbdbb89 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -10,7 +10,7 @@ -export([start_link/0, get_peers/0, get_trusted_peers/0, is_public_peer/1, get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, - resolve_and_cache_peer/2, start_request/3, end_request/4]). + resolve_and_cache_peer/2, start_request/3, end_request/4, gossiped_block/4, gossiped_tx/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -170,6 +170,20 @@ end_request(Peer, PathLabel, get, Response) -> end_request(Peer, PathLabel, _, Response) -> ok. +gossiped_block(Peer, B, ok, ReadBodyTime) -> + gen_server:cast(?MODULE, { + gossiped_data, block, Peer, ReadBodyTime, byte_size(term_to_binary(B)) + }); +gossiped_block(_Peer, _B, _ValidationStatus, _ReadBodyTime) -> + %% Ignore skipped or invalid blocks for now (consistent with old behavior, but may need to + %% be revisited) + ok. + + +gossiped_tx(Peer) -> + gen_server:cast(?MODULE, { + gossiped_data, tx, Peer, erlang:get(read_body_time), erlang:get(body_size) + }). %% @doc Print statistics about the current peers. stats() -> @@ -290,6 +304,7 @@ handle_cast({start_request, Peer, PathLabel, Method}, State) -> handle_cast({end_request, Peer, PathLabel, _Method, Status, ElapsedMicroseconds, Size}, State) -> ?LOG_DEBUG([ {event, update_rating}, + {update_type, request}, {path, PathLabel}, {status, Status}, {peer, ar_util:format_peer(Peer)}, @@ -299,6 +314,23 @@ handle_cast({end_request, Peer, PathLabel, _Method, Status, ElapsedMicroseconds, update_rating(Peer, ElapsedMicroseconds, Size), {noreply, State}; +handle_cast({gossiped_data, DataType, Peer, ElapsedMicroseconds, Size}, State) -> + case check_external_peer(Peer) of + ok -> + ?LOG_DEBUG([ + {event, update_rating}, + {update_type, DataType}, + {peer, ar_util:format_peer(Peer)}, + {time_delta, ElapsedMicroseconds}, + {size, Size} + ]), + update_rating(Peer, ElapsedMicroseconds, Size); + _ -> + ok + end, + + {noreply, State}; + handle_cast(Cast, State) -> ?LOG_WARNING("event: unhandled_cast, cast: ~p", [Cast]), {noreply, State}. @@ -332,35 +364,6 @@ handle_info({event, peer, {served_block, Peer, TimeDelta, Size}}, State) -> % update_rating(Peer, TimeDelta, Size), {noreply, State}; -handle_info({event, peer, {gossiped_tx, Peer, TimeDelta, Size}}, State) -> - %% Only the first peer who sent the given transaction is rated. - %% Otherwise, one may exploit the endpoint to gain reputation. - % case check_external_peer(Peer) of - % ok -> - % ?LOG_DEBUG([{event, update_rating}, {type, gossiped_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - % update_rating(Peer, TimeDelta, Size); - % _ -> - % ok - % end, - {noreply, State}; - -handle_info({event, peer, {gossiped_block, Peer, TimeDelta, Size}}, State) -> - %% Only the first peer who sent the given block is rated. - %% Otherwise, one may exploit the endpoint to gain reputation. - % case check_external_peer(Peer) of - % ok -> - % ?LOG_DEBUG([{event, update_rating}, {type, gossiped_block}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - % update_rating(Peer, TimeDelta, Size); - % _ -> - % ok - % end, - {noreply, State}; - -handle_info({event, peer, {served_chunk, Peer, TimeDelta, Size}}, State) -> - % ?LOG_DEBUG([{event, update_rating}, {type, served_chunk}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - % update_rating(Peer, TimeDelta, Size), - {noreply, State}; - handle_info({event, peer, {bad_response, {Peer, _Type, _Reason}}}, State) -> issue_warning(Peer), {noreply, State}; @@ -430,10 +433,10 @@ discover_peers([Peer | Peers]) -> format_stats(Peer, Perf) -> KB = Perf#performance.bytes / 1024, - Seconds = (Perf#performance.time + 1) / 1000000, + Seconds = (Perf#performance.time + 1) / 1000, io:format("\t~s ~.2f kB/s (~.2f kB, ~.2f s, ~p transfers, ~B failures)~n", - [string:pad(ar_util:format_peer(Peer), 20, trailing, $ ), - KB / Seconds, KB, Seconds, + [string:pad(ar_util:format_peer(Peer), 21, trailing, $ ), + float(Perf#performance.rating), KB, Seconds, Perf#performance.transfers, Perf#performance.failures]). load_peers() -> diff --git a/apps/arweave/src/ar_poller.erl b/apps/arweave/src/ar_poller.erl index 95fb7cd07..c4079b659 100644 --- a/apps/arweave/src/ar_poller.erl +++ b/apps/arweave/src/ar_poller.erl @@ -142,7 +142,19 @@ handle_info({event, block, {discovered, Peer, B, Time, Size}}, State) -> true -> ok end, - ar_block_pre_validator:pre_validate(B, Peer, erlang:timestamp(), Time, Size), + %% How we rank peers changed in June 2023 + %% + %% Previous Behavior: + %% - throughput metrics (block size and time to download) were recorded in pre_validate + %% for valid blocks only + %% Current Behavior: + %% - throughput metrics are recorded for all outbound web requests to peers (including the + %% GET /block/hash request that triggers the block/discovered event) + %% + %% The new behavior is slightly different, but I believe it still results in a valid ranking. + %% Future work may change the behavior further (e.g. regarding when penalties are recorded + %% for errors or invalid blocks) + ar_block_pre_validator:pre_validate(B, Peer, erlang:timestamp()), {noreply, State}; handle_info({event, block, _}, State) -> {noreply, State}; From d3e28ebd4695caef681769a667b4822202b7f1c5 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Wed, 28 Jun 2023 06:31:05 +0000 Subject: [PATCH 09/30] remove erlang:put/get read_body_time and body_size --- apps/arweave/src/ar_http_iface_middleware.erl | 41 +++++++++---------- apps/arweave/src/ar_peers.erl | 6 +-- 2 files changed, 22 insertions(+), 25 deletions(-) diff --git a/apps/arweave/src/ar_http_iface_middleware.erl b/apps/arweave/src/ar_http_iface_middleware.erl index b44917794..aa76afa9a 100644 --- a/apps/arweave/src/ar_http_iface_middleware.erl +++ b/apps/arweave/src/ar_http_iface_middleware.erl @@ -544,8 +544,10 @@ handle(<<"POST">>, [<<"unsigned_tx">>], Req, Pid) -> {false, _} -> not_joined(Req); {true, pass} -> + Timestamp = erlang:timestamp(), case read_complete_body(Req, Pid) of {ok, Body, Req2} -> + ReadBodyTime = timer:now_diff(erlang:timestamp(), Timestamp), {UnsignedTXProps} = ar_serialize:dejsonify(Body), WalletAccessCode = proplists:get_value(<<"wallet_access_code">>, UnsignedTXProps), @@ -581,7 +583,7 @@ handle(<<"POST">>, [<<"unsigned_tx">>], Req, Pid) -> Peer = ar_http_util:arweave_peer(Req), Reply = ar_serialize:jsonify({[{<<"id">>, ar_util:encode(SignedTX#tx.id)}]}), - case handle_post_tx(Req2, Peer, SignedTX) of + case handle_post_tx(Req2, Peer, SignedTX, ReadBodyTime) of ok -> {200, #{}, Reply, Req2}; {error_response, {Status, Headers, ErrBody}} -> @@ -1789,9 +1791,9 @@ handle_post_tx({Req, Pid, Encoding}) -> {413, #{}, <<"Payload too large">>, Req2}; {error, timeout} -> {503, #{}, <<>>, Req}; - {ok, TX, Req2} -> + {ok, TX, Req2, ReadBodyTime} -> Peer = ar_http_util:arweave_peer(Req), - case handle_post_tx(Req2, Peer, TX) of + case handle_post_tx(Req2, Peer, TX, ReadBodyTime) of ok -> {200, #{}, <<"OK">>, Req2}; {error_response, {Status, Headers, Body}} -> @@ -1802,7 +1804,7 @@ handle_post_tx({Req, Pid, Encoding}) -> end end. -handle_post_tx(Req, Peer, TX) -> +handle_post_tx(Req, Peer, TX, ReadBodyTime) -> case ar_tx_validator:validate(TX) of {invalid, tx_verification_failed} -> handle_post_tx_verification_response(); @@ -1821,16 +1823,16 @@ handle_post_tx(Req, Peer, TX) -> {valid, TX2} -> ar_data_sync:add_data_root_to_disk_pool(TX2#tx.data_root, TX2#tx.data_size, TX#tx.id), - handle_post_tx_accepted(Req, TX, Peer) + handle_post_tx_accepted(Req, TX, Peer, ReadBodyTime) end. -handle_post_tx_accepted(Req, TX, Peer) -> +handle_post_tx_accepted(Req, TX, Peer, ReadBodyTime) -> %% Exclude successful requests with valid transactions from the %% IP-based throttling, to avoid connectivity issues at the times %% of excessive transaction volumes. {A, B, C, D, _} = Peer, ar_blacklist_middleware:decrement_ip_addr({A, B, C, D}, Req), - ar_peers:gossiped_tx(Peer), + ar_peers:gossiped_tx(Peer, TX, ReadBodyTime), ar_events:send(tx, {new, TX, Peer}), TXID = TX#tx.id, ar_ignore_registry:remove_temporary(TXID), @@ -2708,18 +2710,19 @@ post_tx_parse_id(read_body, {TXID, Req, Pid, Encoding}) -> Timestamp = erlang:timestamp(), case read_complete_body(Req, Pid) of {ok, Body, Req2} -> + ReadBodyTime = timer:now_diff(erlang:timestamp(), Timestamp), case Encoding of json -> - post_tx_parse_id(parse_json, {TXID, Req2, Body, Timestamp}); + post_tx_parse_id(parse_json, {TXID, Req2, Body, ReadBodyTime}); binary -> - post_tx_parse_id(parse_binary, {TXID, Req2, Body, Timestamp}) + post_tx_parse_id(parse_binary, {TXID, Req2, Body, ReadBodyTime}) end; {error, body_size_too_large} -> {error, body_size_too_large, Req}; {error, timeout} -> {error, timeout} end; -post_tx_parse_id(parse_json, {TXID, Req, Body, Timestamp}) -> +post_tx_parse_id(parse_json, {TXID, Req, Body, ReadBodyTime}) -> case catch ar_serialize:json_struct_to_tx(Body) of {'EXIT', _} -> case TXID of @@ -2747,12 +2750,9 @@ post_tx_parse_id(parse_json, {TXID, Req, Body, Timestamp}) -> end, {error, invalid_json, Req}; TX -> - Time = timer:now_diff(erlang:timestamp(), Timestamp), - erlang:put(read_body_time, Time), - erlang:put(body_size, byte_size(term_to_binary(TX))), - post_tx_parse_id(verify_id_match, {TXID, Req, TX}) + post_tx_parse_id(verify_id_match, {TXID, Req, TX, ReadBodyTime}) end; -post_tx_parse_id(parse_binary, {TXID, Req, Body, Timestamp}) -> +post_tx_parse_id(parse_binary, {TXID, Req, Body, ReadBodyTime}) -> case catch ar_serialize:binary_to_tx(Body) of {'EXIT', _} -> case TXID of @@ -2771,16 +2771,13 @@ post_tx_parse_id(parse_binary, {TXID, Req, Body, Timestamp}) -> end, {error, invalid_json, Req}; {ok, TX} -> - Time = timer:now_diff(erlang:timestamp(), Timestamp), - erlang:put(read_body_time, Time), - erlang:put(body_size, byte_size(term_to_binary(TX))), - post_tx_parse_id(verify_id_match, {TXID, Req, TX}) + post_tx_parse_id(verify_id_match, {TXID, Req, TX, ReadBodyTime}) end; -post_tx_parse_id(verify_id_match, {MaybeTXID, Req, TX}) -> +post_tx_parse_id(verify_id_match, {MaybeTXID, Req, TX, ReadBodyTime}) -> TXID = TX#tx.id, case MaybeTXID of TXID -> - {ok, TX, Req}; + {ok, TX, Req, ReadBodyTime}; MaybeNotSet -> case MaybeNotSet of not_set -> @@ -2797,7 +2794,7 @@ post_tx_parse_id(verify_id_match, {MaybeTXID, Req, TX}) -> {error, tx_already_processed, TXID, Req}; false -> ar_ignore_registry:add_temporary(TXID, 5000), - {ok, TX, Req} + {ok, TX, Req, ReadBodyTime} end end end. diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 93fbdbb89..5aad3e342 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -10,7 +10,7 @@ -export([start_link/0, get_peers/0, get_trusted_peers/0, is_public_peer/1, get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, - resolve_and_cache_peer/2, start_request/3, end_request/4, gossiped_block/4, gossiped_tx/1]). + resolve_and_cache_peer/2, start_request/3, end_request/4, gossiped_block/4, gossiped_tx/3]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -180,9 +180,9 @@ gossiped_block(_Peer, _B, _ValidationStatus, _ReadBodyTime) -> ok. -gossiped_tx(Peer) -> +gossiped_tx(Peer, TX, ReadBodyTime) -> gen_server:cast(?MODULE, { - gossiped_data, tx, Peer, erlang:get(read_body_time), erlang:get(body_size) + gossiped_data, tx, Peer, ReadBodyTime, byte_size(term_to_binary(TX)) }). %% @doc Print statistics about the current peers. From a7f66fb4bf500208e5314e94cc4a4cad86913d77 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Thu, 29 Jun 2023 07:00:56 +0000 Subject: [PATCH 10/30] EMA for performance metrics --- apps/arweave/src/ar_peers.erl | 49 +++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 17 deletions(-) diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 5aad3e342..6c2323248 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -36,6 +36,11 @@ %% The number of failed requests in a row we tolerate before dropping the peer. -define(TOLERATE_FAILURE_COUNT, 20). +-define(MINIMUM_SUCCESS, 0.5). +-define(LATENCY_ALPHA, 0.1). +-define(SIZE_ALPHA, 0.1). +-define(SUCCESS_ALPHA, 0.01). +-define(STARTING_LATENCY_EMA, 1000). %% initial value to avoid over-weighting the first response %% We only do scoring of this many TCP ports per IP address. When there are not enough slots, %% we remove the peer from the first slot. @@ -44,9 +49,9 @@ -record(performance, { bytes = 0, - time = 0, + latency = ?STARTING_LATENCY_EMA, transfers = 0, - failures = 0, + success = 1.0, rating = 0, release = -1 }). @@ -433,11 +438,11 @@ discover_peers([Peer | Peers]) -> format_stats(Peer, Perf) -> KB = Perf#performance.bytes / 1024, - Seconds = (Perf#performance.time + 1) / 1000, - io:format("\t~s ~.2f kB/s (~.2f kB, ~.2f s, ~p transfers, ~B failures)~n", + Seconds = Perf#performance.latency / 1000, + io:format("\t~s ~.2f kB/s (~.2f kB, ~.2f s, ~.2f success, ~p transfers)~n", [string:pad(ar_util:format_peer(Peer), 21, trailing, $ ), float(Perf#performance.rating), KB, Seconds, - Perf#performance.transfers, Perf#performance.failures]). + Perf#performance.success, Perf#performance.transfers]). load_peers() -> case ar_storage:read_term(peers) of @@ -477,14 +482,14 @@ load_peer({Peer, Performance}) -> <> -> may_be_rotate_peer_ports(Peer), case Performance of - {performance, Bytes, Time, Transfers, Failures, Rating} -> + {performance, Bytes, Latency, Transfers, _Failures, Rating} -> %% For compatibility with a few nodes already storing the records %% without the release field. ets:insert(?MODULE, {{peer, Peer}, #performance{ bytes = Bytes, - time = Time, transfers = Transfers, failures = Failures, - rating = Rating, release = -1 }}); + latency = Latency, transfers = Transfers, + success = 1.0, rating = Rating, release = -1 }}); _ -> - ets:insert(?MODULE, {{peer, Peer}, Performance}) + ets:insert(?MODULE, {{peer, Peer}, Performance#performance{ success = 1.0 }}) end, ok; Network -> @@ -622,16 +627,22 @@ check_external_peer(Peer) -> update_rating(Peer, TimeDelta, Size) -> Performance = get_or_init_performance(Peer), Total = get_total_rating(), - #performance{ bytes = Bytes, time = Time, + #performance{ bytes = Bytes, latency = Latency, success = Success, rating = Rating, transfers = N } = Performance, - Bytes2 = Bytes + Size, - Time2 = Time + TimeDelta / 1000, - Performance2 = Performance#performance{ bytes = Bytes2, time = Time2, - rating = Rating2 = Bytes2 / (Time2 + 1), failures = 0, transfers = N + 1 }, + Bytes2 = calculate_ema(Bytes, Size, ?SIZE_ALPHA), + Latency2 = calculate_ema(Latency, TimeDelta / 1000, ?LATENCY_ALPHA), + Success2 = calculate_ema(Success, 1, ?SUCCESS_ALPHA), + Rating2 = Bytes2 / Latency2, + Performance2 = Performance#performance{ + bytes = Bytes2, latency = Latency2, success = Success2, + rating = Rating2, transfers = N + 1 }, Total2 = Total - Rating + Rating2, may_be_rotate_peer_ports(Peer), ets:insert(?MODULE, [{{peer, Peer}, Performance2}, {rating_total, Total2}]). +calculate_ema(OldEMA, Value, Alpha) -> + Alpha * Value + (1 - Alpha) * OldEMA. + get_or_init_performance(Peer) -> case ets:lookup(?MODULE, {peer, Peer}) of [] -> @@ -651,6 +662,10 @@ get_total_rating() -> remove_peer(RemovedPeer) -> Total = get_total_rating(), Performance = get_or_init_performance(RemovedPeer), + ?LOG_DEBUG([ + {event, remove_peer}, + {peer, ar_util:format_peer(RemovedPeer)}, + {performance, format_stats(RemovedPeer, Performance)}]), ets:insert(?MODULE, {rating_total, Total - Performance#performance.rating}), ets:delete(?MODULE, {peer, RemovedPeer}), remove_peer_port(RemovedPeer). @@ -708,12 +723,12 @@ store_peers() -> issue_warning(Peer) -> Performance = get_or_init_performance(Peer), - Failures = Performance#performance.failures, - case Failures + 1 > ?TOLERATE_FAILURE_COUNT of + Success = calculate_ema(Performance#performance.success, 0, ?SUCCESS_ALPHA), + case Success < ?MINIMUM_SUCCESS of true -> remove_peer(Peer); false -> - Performance2 = Performance#performance{ failures = Failures + 1 }, + Performance2 = Performance#performance{ success = Success }, may_be_rotate_peer_ports(Peer), ets:insert(?MODULE, {{peer, Peer}, Performance2}) end. From 852971476ce49aec84823eae778acc0aaddaeed7 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Thu, 29 Jun 2023 07:13:07 +0000 Subject: [PATCH 11/30] format as ms --- apps/arweave/src/ar_peers.erl | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 6c2323248..ffd8ce860 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -313,7 +313,7 @@ handle_cast({end_request, Peer, PathLabel, _Method, Status, ElapsedMicroseconds, {path, PathLabel}, {status, Status}, {peer, ar_util:format_peer(Peer)}, - {time_delta, ElapsedMicroseconds}, + {latency_ms, ElapsedMicroseconds / 1000}, {size, Size} ]), update_rating(Peer, ElapsedMicroseconds, Size), @@ -326,7 +326,7 @@ handle_cast({gossiped_data, DataType, Peer, ElapsedMicroseconds, Size}, State) - {event, update_rating}, {update_type, DataType}, {peer, ar_util:format_peer(Peer)}, - {time_delta, ElapsedMicroseconds}, + {latency_ms, ElapsedMicroseconds / 1000}, {size, Size} ]), update_rating(Peer, ElapsedMicroseconds, Size); @@ -438,10 +438,9 @@ discover_peers([Peer | Peers]) -> format_stats(Peer, Perf) -> KB = Perf#performance.bytes / 1024, - Seconds = Perf#performance.latency / 1000, - io:format("\t~s ~.2f kB/s (~.2f kB, ~.2f s, ~.2f success, ~p transfers)~n", + io:format("\t~s ~.2f kB/s (~.2f kB, ~B latency, ~.2f success, ~p transfers)~n", [string:pad(ar_util:format_peer(Peer), 21, trailing, $ ), - float(Perf#performance.rating), KB, Seconds, + float(Perf#performance.rating), KB, trunc(Perf#performance.latency), Perf#performance.success, Perf#performance.transfers]). load_peers() -> @@ -624,13 +623,13 @@ check_external_peer(Peer) -> ok end. -update_rating(Peer, TimeDelta, Size) -> +update_rating(Peer, LatencyMicroseconds, Size) -> Performance = get_or_init_performance(Peer), Total = get_total_rating(), #performance{ bytes = Bytes, latency = Latency, success = Success, rating = Rating, transfers = N } = Performance, Bytes2 = calculate_ema(Bytes, Size, ?SIZE_ALPHA), - Latency2 = calculate_ema(Latency, TimeDelta / 1000, ?LATENCY_ALPHA), + Latency2 = calculate_ema(Latency, LatencyMicroseconds / 1000, ?LATENCY_ALPHA), Success2 = calculate_ema(Success, 1, ?SUCCESS_ALPHA), Rating2 = Bytes2 / Latency2, Performance2 = Performance#performance{ From 68e676037fe9a9c7126e9b0f390a4108ce9950a2 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Fri, 30 Jun 2023 12:48:51 +0000 Subject: [PATCH 12/30] don't let gossiped data skew the throughput metrics --- apps/arweave/src/ar_http_iface_middleware.erl | 41 ++++++++----------- apps/arweave/src/ar_http_req.erl | 2 + apps/arweave/src/ar_peers.erl | 28 +++++++------ 3 files changed, 34 insertions(+), 37 deletions(-) diff --git a/apps/arweave/src/ar_http_iface_middleware.erl b/apps/arweave/src/ar_http_iface_middleware.erl index aa76afa9a..ddcd0ed61 100644 --- a/apps/arweave/src/ar_http_iface_middleware.erl +++ b/apps/arweave/src/ar_http_iface_middleware.erl @@ -544,10 +544,8 @@ handle(<<"POST">>, [<<"unsigned_tx">>], Req, Pid) -> {false, _} -> not_joined(Req); {true, pass} -> - Timestamp = erlang:timestamp(), case read_complete_body(Req, Pid) of {ok, Body, Req2} -> - ReadBodyTime = timer:now_diff(erlang:timestamp(), Timestamp), {UnsignedTXProps} = ar_serialize:dejsonify(Body), WalletAccessCode = proplists:get_value(<<"wallet_access_code">>, UnsignedTXProps), @@ -583,7 +581,7 @@ handle(<<"POST">>, [<<"unsigned_tx">>], Req, Pid) -> Peer = ar_http_util:arweave_peer(Req), Reply = ar_serialize:jsonify({[{<<"id">>, ar_util:encode(SignedTX#tx.id)}]}), - case handle_post_tx(Req2, Peer, SignedTX, ReadBodyTime) of + case handle_post_tx(Req2, Peer, SignedTX) of ok -> {200, #{}, Reply, Req2}; {error_response, {Status, Headers, ErrBody}} -> @@ -1791,9 +1789,9 @@ handle_post_tx({Req, Pid, Encoding}) -> {413, #{}, <<"Payload too large">>, Req2}; {error, timeout} -> {503, #{}, <<>>, Req}; - {ok, TX, Req2, ReadBodyTime} -> + {ok, TX, Req2} -> Peer = ar_http_util:arweave_peer(Req), - case handle_post_tx(Req2, Peer, TX, ReadBodyTime) of + case handle_post_tx(Req2, Peer, TX) of ok -> {200, #{}, <<"OK">>, Req2}; {error_response, {Status, Headers, Body}} -> @@ -1804,7 +1802,7 @@ handle_post_tx({Req, Pid, Encoding}) -> end end. -handle_post_tx(Req, Peer, TX, ReadBodyTime) -> +handle_post_tx(Req, Peer, TX) -> case ar_tx_validator:validate(TX) of {invalid, tx_verification_failed} -> handle_post_tx_verification_response(); @@ -1823,16 +1821,16 @@ handle_post_tx(Req, Peer, TX, ReadBodyTime) -> {valid, TX2} -> ar_data_sync:add_data_root_to_disk_pool(TX2#tx.data_root, TX2#tx.data_size, TX#tx.id), - handle_post_tx_accepted(Req, TX, Peer, ReadBodyTime) + handle_post_tx_accepted(Req, TX, Peer) end. -handle_post_tx_accepted(Req, TX, Peer, ReadBodyTime) -> +handle_post_tx_accepted(Req, TX, Peer) -> %% Exclude successful requests with valid transactions from the %% IP-based throttling, to avoid connectivity issues at the times %% of excessive transaction volumes. {A, B, C, D, _} = Peer, ar_blacklist_middleware:decrement_ip_addr({A, B, C, D}, Req), - ar_peers:gossiped_tx(Peer, TX, ReadBodyTime), + ar_peers:gossiped_tx(Peer), ar_events:send(tx, {new, TX, Peer}), TXID = TX#tx.id, ar_ignore_registry:remove_temporary(TXID), @@ -2360,11 +2358,8 @@ post_block(enqueue_block, {B, Peer}, Req, ReceiveTimestamp) -> end end, ?LOG_INFO([{event, received_block}, {block, ar_util:encode(B#block.indep_hash)}]), - %% ReadBodyTime in microseconds, measure elapsed time before validation since the validation - %% operation can take some time. - ReadBodyTime = timer:now_diff(erlang:timestamp(), ReceiveTimestamp), ValidationStatus = ar_block_pre_validator:pre_validate(B2, Peer, ReceiveTimestamp), - ar_peers:gossiped_block(Peer, B2, ValidationStatus, ReadBodyTime), + ar_peers:gossiped_block(Peer, ValidationStatus), {200, #{}, <<"OK">>, Req}. encode_txids([]) -> @@ -2707,22 +2702,20 @@ post_tx_parse_id(check_ignore_list, {TXID, Req, Pid, Encoding}) -> post_tx_parse_id(read_body, {TXID, Req, Pid, Encoding}) end; post_tx_parse_id(read_body, {TXID, Req, Pid, Encoding}) -> - Timestamp = erlang:timestamp(), case read_complete_body(Req, Pid) of {ok, Body, Req2} -> - ReadBodyTime = timer:now_diff(erlang:timestamp(), Timestamp), case Encoding of json -> - post_tx_parse_id(parse_json, {TXID, Req2, Body, ReadBodyTime}); + post_tx_parse_id(parse_json, {TXID, Req2, Body}); binary -> - post_tx_parse_id(parse_binary, {TXID, Req2, Body, ReadBodyTime}) + post_tx_parse_id(parse_binary, {TXID, Req2, Body}) end; {error, body_size_too_large} -> {error, body_size_too_large, Req}; {error, timeout} -> {error, timeout} end; -post_tx_parse_id(parse_json, {TXID, Req, Body, ReadBodyTime}) -> +post_tx_parse_id(parse_json, {TXID, Req, Body}) -> case catch ar_serialize:json_struct_to_tx(Body) of {'EXIT', _} -> case TXID of @@ -2750,9 +2743,9 @@ post_tx_parse_id(parse_json, {TXID, Req, Body, ReadBodyTime}) -> end, {error, invalid_json, Req}; TX -> - post_tx_parse_id(verify_id_match, {TXID, Req, TX, ReadBodyTime}) + post_tx_parse_id(verify_id_match, {TXID, Req, TX}) end; -post_tx_parse_id(parse_binary, {TXID, Req, Body, ReadBodyTime}) -> +post_tx_parse_id(parse_binary, {TXID, Req, Body}) -> case catch ar_serialize:binary_to_tx(Body) of {'EXIT', _} -> case TXID of @@ -2771,13 +2764,13 @@ post_tx_parse_id(parse_binary, {TXID, Req, Body, ReadBodyTime}) -> end, {error, invalid_json, Req}; {ok, TX} -> - post_tx_parse_id(verify_id_match, {TXID, Req, TX, ReadBodyTime}) + post_tx_parse_id(verify_id_match, {TXID, Req, TX}) end; -post_tx_parse_id(verify_id_match, {MaybeTXID, Req, TX, ReadBodyTime}) -> +post_tx_parse_id(verify_id_match, {MaybeTXID, Req, TX}) -> TXID = TX#tx.id, case MaybeTXID of TXID -> - {ok, TX, Req, ReadBodyTime}; + {ok, TX, Req}; MaybeNotSet -> case MaybeNotSet of not_set -> @@ -2794,7 +2787,7 @@ post_tx_parse_id(verify_id_match, {MaybeTXID, Req, TX, ReadBodyTime}) -> {error, tx_already_processed, TXID, Req}; false -> ar_ignore_registry:add_temporary(TXID, 5000), - {ok, TX, Req, ReadBodyTime} + {ok, TX, Req} end end end. diff --git a/apps/arweave/src/ar_http_req.erl b/apps/arweave/src/ar_http_req.erl index ffe3a03c6..158ad871e 100644 --- a/apps/arweave/src/ar_http_req.erl +++ b/apps/arweave/src/ar_http_req.erl @@ -10,6 +10,7 @@ body(Req, SizeLimit) -> not_set -> read_complete_body(Req, #{ acc => [], counter => 0, size_limit => SizeLimit }); Body -> + ?LOG_DEBUG([{event, cached_completed_body}, {size, byte_size(Body)}, {path, ar_http_iface_server:label_req(Req)}]), {ok, Body, Req} end. @@ -42,6 +43,7 @@ read_complete_body(more, Data, Req) -> read_complete_body(Req, Data); read_complete_body(ok, #{ acc := Acc }, Req) -> Body = iolist_to_binary(Acc), + ?LOG_DEBUG([{event, read_completed_body}, {size, byte_size(Body)}, {path, ar_http_iface_server:label_req(Req)}]), {ok, Body, with_body_req_field(Req, Body)}. with_body_req_field(Req, Body) -> diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index ffd8ce860..494d6f13d 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -10,7 +10,7 @@ -export([start_link/0, get_peers/0, get_trusted_peers/0, is_public_peer/1, get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, - resolve_and_cache_peer/2, start_request/3, end_request/4, gossiped_block/4, gossiped_tx/3]). + resolve_and_cache_peer/2, start_request/3, end_request/4, gossiped_block/2, gossiped_tx/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -175,19 +175,18 @@ end_request(Peer, PathLabel, get, Response) -> end_request(Peer, PathLabel, _, Response) -> ok. -gossiped_block(Peer, B, ok, ReadBodyTime) -> +gossiped_block(Peer, ok) -> gen_server:cast(?MODULE, { - gossiped_data, block, Peer, ReadBodyTime, byte_size(term_to_binary(B)) + gossiped_data, Peer }); -gossiped_block(_Peer, _B, _ValidationStatus, _ReadBodyTime) -> +gossiped_block(_Peer, _ValidationStatus) -> %% Ignore skipped or invalid blocks for now (consistent with old behavior, but may need to %% be revisited) ok. - -gossiped_tx(Peer, TX, ReadBodyTime) -> +gossiped_tx(Peer) -> gen_server:cast(?MODULE, { - gossiped_data, tx, Peer, ReadBodyTime, byte_size(term_to_binary(TX)) + gossiped_data, Peer }). %% @doc Print statistics about the current peers. @@ -319,17 +318,15 @@ handle_cast({end_request, Peer, PathLabel, _Method, Status, ElapsedMicroseconds, update_rating(Peer, ElapsedMicroseconds, Size), {noreply, State}; -handle_cast({gossiped_data, DataType, Peer, ElapsedMicroseconds, Size}, State) -> +handle_cast({gossiped_data, Peer}, State) -> case check_external_peer(Peer) of ok -> ?LOG_DEBUG([ {event, update_rating}, - {update_type, DataType}, - {peer, ar_util:format_peer(Peer)}, - {latency_ms, ElapsedMicroseconds / 1000}, - {size, Size} + {update_type, gossiped_data}, + {peer, ar_util:format_peer(Peer)} ]), - update_rating(Peer, ElapsedMicroseconds, Size); + update_rating(Peer); _ -> ok end, @@ -623,6 +620,11 @@ check_external_peer(Peer) -> ok end. +update_rating(Peer) -> + Performance = get_or_init_performance(Peer), + %% Pass in the current latecny and bytes values in order to hold them constant. + %% Only the success average should be updated. + update_rating(Peer, Performance#performance.latency, Performance#performance.bytes). update_rating(Peer, LatencyMicroseconds, Size) -> Performance = get_or_init_performance(Peer), Total = get_total_rating(), From cf17ed7126c7a6aaad59f07a94e8b7c4fea1bfe1 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Fri, 30 Jun 2023 12:57:20 +0000 Subject: [PATCH 13/30] print the peer stats after ranking --- apps/arweave/src/ar_peers.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 494d6f13d..bf252de2f 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -203,7 +203,6 @@ stats(Peers) -> Peers). discover_peers() -> - stats(), case ets:lookup(?MODULE, peers) of [] -> ok; @@ -295,6 +294,7 @@ handle_cast(rank_peers, State) -> prometheus_gauge:set(arweave_peer_count, length(Peers)), ets:insert(?MODULE, {peers, lists:sublist(rank_peers(Peers), ?MAX_PEERS)}), ar_util:cast_after(?RANK_PEERS_FREQUENCY_MS, ?MODULE, rank_peers), + stats(), {noreply, State}; handle_cast(ping_peers, State) -> From a2711aff6d4cee4d89abfa44405d25fc730bac20 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Sat, 1 Jul 2023 06:58:32 +0000 Subject: [PATCH 14/30] penalized bad fetched data --- apps/arweave/src/ar_data_sync.erl | 1 + apps/arweave/src/ar_http_iface_client.erl | 6 ++- apps/arweave/src/ar_peers.erl | 66 +++++++++++++++++------ 3 files changed, 54 insertions(+), 19 deletions(-) diff --git a/apps/arweave/src/ar_data_sync.erl b/apps/arweave/src/ar_data_sync.erl index b019be584..6a3975186 100644 --- a/apps/arweave/src/ar_data_sync.erl +++ b/apps/arweave/src/ar_data_sync.erl @@ -2586,6 +2586,7 @@ process_invalid_fetched_chunk(Peer, Byte, State) -> #sync_data_state{ weave_size = WeaveSize } = State, ?LOG_WARNING([{event, got_invalid_proof_from_peer}, {peer, ar_util:format_peer(Peer)}, {byte, Byte}, {weave_size, WeaveSize}]), + ar_peers:rate_fetched_data(Peer, invalid), %% Not necessarily a malicious peer, it might happen %% if the chunk is recent and from a different fork. {noreply, State}. diff --git a/apps/arweave/src/ar_http_iface_client.erl b/apps/arweave/src/ar_http_iface_client.erl index 970e6eeff..ed93a2c13 100644 --- a/apps/arweave/src/ar_http_iface_client.erl +++ b/apps/arweave/src/ar_http_iface_client.erl @@ -576,7 +576,7 @@ handle_chunk_response(Encoding, {ok, {{<<"200">>, _}, _, Body, _, _}}) -> end end end, - case catch DecodeFun(Body) of + Result = case catch DecodeFun(Body) of {'EXIT', Reason} -> {error, Reason}; {error, Reason} -> @@ -590,7 +590,9 @@ handle_chunk_response(Encoding, {ok, {{<<"200">>, _}, _, Body, _, _}}) -> _ -> {ok, Proof} end - end; + end, + ar_peers:rate_fetched_data(Result), + Result; handle_chunk_response(_Encoding, {error, _} = Response) -> Response; handle_chunk_response(_Encoding, Response) -> diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index bf252de2f..82fbbdbf0 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -10,7 +10,8 @@ -export([start_link/0, get_peers/0, get_trusted_peers/0, is_public_peer/1, get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, - resolve_and_cache_peer/2, start_request/3, end_request/4, gossiped_block/2, gossiped_tx/1]). + resolve_and_cache_peer/2, + start_request/3, end_request/4, rate_fetched_data/2, gossiped_block/2, gossiped_tx/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -37,8 +38,7 @@ %% The number of failed requests in a row we tolerate before dropping the peer. -define(TOLERATE_FAILURE_COUNT, 20). -define(MINIMUM_SUCCESS, 0.5). --define(LATENCY_ALPHA, 0.1). --define(SIZE_ALPHA, 0.1). +-define(THROUGHPUT_ALPHA, 0.1). -define(SUCCESS_ALPHA, 0.01). -define(STARTING_LATENCY_EMA, 1000). %% initial value to avoid over-weighting the first response @@ -164,17 +164,27 @@ start_request(_Peer, _PathLabel, _) -> end_request({_Host, _Port}, _, _, _) -> %% Only track requests for IP-based peers as the rest of the stack assumes an IP-based peer. ok; -end_request(Peer, PathLabel, get, {ok, {{<<"200">>, _}, _, Body, Start, End}} = Response) -> +end_request(Peer, PathLabel, get, {_, _, _, Body, Start, End} = Response) -> gen_server:cast(?MODULE, {end_request, Peer, PathLabel, get, ar_metrics:get_status_class(Response), End-Start, byte_size(term_to_binary(Body))}); -end_request(Peer, PathLabel, get, Response) -> - %% TODO: error response - ok; -end_request(Peer, PathLabel, _, Response) -> +end_request(_Peer, _PathLabel, _, _Response) -> ok. +rate_fetched_data(_Peer, {ok, _}) -> + %% The fetched data is valid so the rating was already captured as part of + %% the start/end request pair. Nothing more to do. + ok; +rate_fetched_data(Peer, {error, _}) -> + %% The fetched data is invalid, so we need to reverse the rating that was applied + %% in end_request, and then apply a penalty + gen_server:cast(?MODULE, {invalid_fetched_data, Peer}); +rate_fetched_data(Peer, invalid) -> + %% The fetched data is invalid, so we need to reverse the rating that was applied + %% in end_request, and then apply a penalty + gen_server:cast(?MODULE, {invalid_fetched_data, Peer}). + gossiped_block(Peer, ok) -> gen_server:cast(?MODULE, { gossiped_data, Peer @@ -305,7 +315,7 @@ handle_cast(ping_peers, State) -> handle_cast({start_request, Peer, PathLabel, Method}, State) -> {noreply, State}; -handle_cast({end_request, Peer, PathLabel, _Method, Status, ElapsedMicroseconds, Size}, State) -> +handle_cast({end_request, Peer, PathLabel, get, Status, ElapsedMicroseconds, Size}, State) -> ?LOG_DEBUG([ {event, update_rating}, {update_type, request}, @@ -315,9 +325,31 @@ handle_cast({end_request, Peer, PathLabel, _Method, Status, ElapsedMicroseconds, {latency_ms, ElapsedMicroseconds / 1000}, {size, Size} ]), - update_rating(Peer, ElapsedMicroseconds, Size), + case Status of + "success" -> + update_rating(Peer, ElapsedMicroseconds, Size, true); + _ -> + update_rating(Peer, false) + end, {noreply, State}; +handle_cast({invalid_fetched_data, Peer}, State) -> + ?LOG_DEBUG([ + {event, update_rating}, + {update_type, invalid_fetched_data}, + {peer, ar_util:format_peer(Peer)} + ]), + %% log 2 failures - first is to reverse the success that was previously recorded by end_request + %% (since end_request only considers whether or not the HTTP request was successful and does not + %% consider the validity of the data it may be overly permissive), and the second is to + %% penalize the peer for serving invalid data. + %% Note: this is an approximation as due to the nature of the EMA this won't exactly reverse + %% the prior success. + update_rating(Peer, false), + update_rating(Peer, false), + {noreply, State}; + + handle_cast({gossiped_data, Peer}, State) -> case check_external_peer(Peer) of ok -> @@ -326,7 +358,7 @@ handle_cast({gossiped_data, Peer}, State) -> {update_type, gossiped_data}, {peer, ar_util:format_peer(Peer)} ]), - update_rating(Peer); + update_rating(Peer, true); _ -> ok end, @@ -620,19 +652,19 @@ check_external_peer(Peer) -> ok end. -update_rating(Peer) -> +update_rating(Peer, IsSuccess) -> Performance = get_or_init_performance(Peer), %% Pass in the current latecny and bytes values in order to hold them constant. %% Only the success average should be updated. - update_rating(Peer, Performance#performance.latency, Performance#performance.bytes). -update_rating(Peer, LatencyMicroseconds, Size) -> + update_rating(Peer, Performance#performance.latency, Performance#performance.bytes, IsSuccess). +update_rating(Peer, LatencyMicroseconds, Size, IsSuccess) -> Performance = get_or_init_performance(Peer), Total = get_total_rating(), #performance{ bytes = Bytes, latency = Latency, success = Success, rating = Rating, transfers = N } = Performance, - Bytes2 = calculate_ema(Bytes, Size, ?SIZE_ALPHA), - Latency2 = calculate_ema(Latency, LatencyMicroseconds / 1000, ?LATENCY_ALPHA), - Success2 = calculate_ema(Success, 1, ?SUCCESS_ALPHA), + Bytes2 = calculate_ema(Bytes, Size, ?THROUGHPUT_ALPHA), + Latency2 = calculate_ema(Latency, LatencyMicroseconds / 1000, ?THROUGHPUT_ALPHA), + Success2 = calculate_ema(Success, ar_util:bool_to_int(IsSuccess), ?SUCCESS_ALPHA), Rating2 = Bytes2 / Latency2, Performance2 = Performance#performance{ bytes = Bytes2, latency = Latency2, success = Success2, From 21630143092eeb9a5bde0b33a7ca4468adeb753e Mon Sep 17 00:00:00 2001 From: James Piechota Date: Sat, 1 Jul 2023 07:05:20 +0000 Subject: [PATCH 15/30] fixup --- apps/arweave/src/ar_peers.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 82fbbdbf0..ceb1293cc 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -164,7 +164,7 @@ start_request(_Peer, _PathLabel, _) -> end_request({_Host, _Port}, _, _, _) -> %% Only track requests for IP-based peers as the rest of the stack assumes an IP-based peer. ok; -end_request(Peer, PathLabel, get, {_, _, _, Body, Start, End} = Response) -> +end_request(Peer, PathLabel, get, {_, {_, _, Body, Start, End}} = Response) -> gen_server:cast(?MODULE, {end_request, Peer, PathLabel, get, ar_metrics:get_status_class(Response), From 999079264b186e4e836e6388bf3a40d542ce3fac Mon Sep 17 00:00:00 2001 From: James Piechota Date: Sun, 2 Jul 2023 12:23:58 +0900 Subject: [PATCH 16/30] track concurrent requests in ar_peers rating concurrent requests increase the peer throughput which increases its rating --- apps/arweave/src/ar_peers.erl | 66 +++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 26 deletions(-) diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index ceb1293cc..fe32d95d1 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -53,7 +53,8 @@ transfers = 0, success = 1.0, rating = 0, - release = -1 + release = -1, + active_requests = 0 }). -record(state, {}). @@ -269,10 +270,9 @@ handle_cast({add_peer, Peer, Release}, State) -> [{_, #performance{ release = Release }}] -> ok; [{_, Performance}] -> - ets:insert(?MODULE, {{peer, Peer}, - Performance#performance{ release = Release }}); + set_performance(Peer, Performance#performance{ release = Release }); [] -> - ets:insert(?MODULE, {{peer, Peer}, #performance{ release = Release }}) + set_performance(Peer, #performance{ release = Release }) end, {noreply, State}; @@ -312,25 +312,32 @@ handle_cast(ping_peers, State) -> ping_peers(lists:sublist(Peers, 100)), {noreply, State}; -handle_cast({start_request, Peer, PathLabel, Method}, State) -> +handle_cast({start_request, Peer, _PathLabel, _Method}, State) -> + Performance = get_or_init_performance(Peer), + ActiveRequests = Performance#performance.active_requests, + set_performance(Peer, Performance#performance{ active_requests = ActiveRequests+1 }), {noreply, State}; handle_cast({end_request, Peer, PathLabel, get, Status, ElapsedMicroseconds, Size}, State) -> + case Status of + "success" -> + update_rating(Peer, ElapsedMicroseconds, Size, true); + _ -> + update_rating(Peer, false) + end, + Performance = get_or_init_performance(Peer), + ActiveRequests = Performance#performance.active_requests, + set_performance(Peer, Performance#performance{ active_requests = ActiveRequests-1 }), ?LOG_DEBUG([ {event, update_rating}, {update_type, request}, {path, PathLabel}, {status, Status}, {peer, ar_util:format_peer(Peer)}, + {active_requests, ActiveRequests}, {latency_ms, ElapsedMicroseconds / 1000}, {size, Size} ]), - case Status of - "success" -> - update_rating(Peer, ElapsedMicroseconds, Size, true); - _ -> - update_rating(Peer, false) - end, {noreply, State}; handle_cast({invalid_fetched_data, Peer}, State) -> @@ -375,13 +382,11 @@ handle_info({event, peer, {made_request, Peer, Release}}, State) -> [{_, #performance{ release = Release }}] -> ok; [{_, Performance}] -> - ets:insert(?MODULE, {{peer, Peer}, - Performance#performance{ release = Release }}); + set_performance(Peer, Performance#performance{ release = Release }); [] -> case check_external_peer(Peer) of ok -> - ets:insert(?MODULE, {{peer, Peer}, - #performance{ release = Release }}); + set_performance(Peer, #performance{ release = Release }); _ -> ok end @@ -434,7 +439,6 @@ terminate(_Reason, _State) -> %%%=================================================================== %%% Private functions. %%%=================================================================== - get_peer_peers(Peer) -> case ar_http_iface_client:get_peers(Peer) of unavailable -> []; @@ -467,10 +471,10 @@ discover_peers([Peer | Peers]) -> format_stats(Peer, Perf) -> KB = Perf#performance.bytes / 1024, - io:format("\t~s ~.2f kB/s (~.2f kB, ~B latency, ~.2f success, ~p transfers)~n", + io:format("\t~s ~.2f kB/s (~.2f kB, ~B latency, ~.2f success, ~p transfers, ~B requests)~n", [string:pad(ar_util:format_peer(Peer), 21, trailing, $ ), float(Perf#performance.rating), KB, trunc(Perf#performance.latency), - Perf#performance.success, Perf#performance.transfers]). + Perf#performance.success, Perf#performance.transfers, Perf#performance.active_requests]). load_peers() -> case ar_storage:read_term(peers) of @@ -513,11 +517,13 @@ load_peer({Peer, Performance}) -> {performance, Bytes, Latency, Transfers, _Failures, Rating} -> %% For compatibility with a few nodes already storing the records %% without the release field. - ets:insert(?MODULE, {{peer, Peer}, #performance{ bytes = Bytes, - latency = Latency, transfers = Transfers, - success = 1.0, rating = Rating, release = -1 }}); + set_performance(Peer, #performance{ + bytes = Bytes, latency = Latency, transfers = Transfers, + rating = Rating, release = -1 }); _ -> - ets:insert(?MODULE, {{peer, Peer}, Performance#performance{ success = 1.0 }}) + %% Always reset success and active_requests when loading a peer from disk + set_performance(Peer, + Performance#performance{ success = 1.0, active_requests = 0 }) end, ok; Network -> @@ -661,17 +667,17 @@ update_rating(Peer, LatencyMicroseconds, Size, IsSuccess) -> Performance = get_or_init_performance(Peer), Total = get_total_rating(), #performance{ bytes = Bytes, latency = Latency, success = Success, - rating = Rating, transfers = N } = Performance, + rating = Rating, transfers = N, active_requests = ActiveRequests } = Performance, Bytes2 = calculate_ema(Bytes, Size, ?THROUGHPUT_ALPHA), Latency2 = calculate_ema(Latency, LatencyMicroseconds / 1000, ?THROUGHPUT_ALPHA), Success2 = calculate_ema(Success, ar_util:bool_to_int(IsSuccess), ?SUCCESS_ALPHA), - Rating2 = Bytes2 / Latency2, + Rating2 = (Bytes2 / Latency2) * Success2 * ActiveRequests, Performance2 = Performance#performance{ bytes = Bytes2, latency = Latency2, success = Success2, rating = Rating2, transfers = N + 1 }, Total2 = Total - Rating + Rating2, may_be_rotate_peer_ports(Peer), - ets:insert(?MODULE, [{{peer, Peer}, Performance2}, {rating_total, Total2}]). + set_performance(Peer, Performance2, Total2). calculate_ema(OldEMA, Value, Alpha) -> Alpha * Value + (1 - Alpha) * OldEMA. @@ -684,6 +690,14 @@ get_or_init_performance(Peer) -> Performance end. +set_performance(Peer, Performance, TotalRating) -> + ets:insert(?MODULE, [ + {{peer, Peer}, Performance}, + {rating_total, TotalRating}]). + +set_performance(Peer, Performance) -> + ets:insert(?MODULE, [{{peer, Peer}, Performance}]). + get_total_rating() -> case ets:lookup(?MODULE, rating_total) of [] -> @@ -763,7 +777,7 @@ issue_warning(Peer) -> false -> Performance2 = Performance#performance{ success = Success }, may_be_rotate_peer_ports(Peer), - ets:insert(?MODULE, {{peer, Peer}, Performance2}) + set_performance(Peer, Performance2) end. %%%=================================================================== From 93ab2f12e68a55822f5d1e9edf9036de2ba3e131 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Sun, 2 Jul 2023 18:03:45 -0400 Subject: [PATCH 17/30] WIP --- apps/arweave/include/ar_peers.hrl | 32 ++++++ apps/arweave/src/ar_data_sync_worker.erl | 9 +- .../src/ar_data_sync_worker_master.erl | 108 +++++++----------- apps/arweave/src/ar_peers.erl | 53 +++++---- 4 files changed, 104 insertions(+), 98 deletions(-) create mode 100644 apps/arweave/include/ar_peers.hrl diff --git a/apps/arweave/include/ar_peers.hrl b/apps/arweave/include/ar_peers.hrl new file mode 100644 index 000000000..13f5e0253 --- /dev/null +++ b/apps/arweave/include/ar_peers.hrl @@ -0,0 +1,32 @@ +-ifndef(AR_PEERS_HRL). +-define(AR_PEERS_HRL, true). + +-include_lib("ar.hrl"). + +-define(STARTING_LATENCY_EMA, 1000). %% initial value to avoid over-weighting the first response + +-record(overall_performance, { + bytes = 0, + latency = ?STARTING_LATENCY_EMA, + transfers = 0, + success = 1.0, + rating = 0 +}). + +-record(sync_performance, { + bytes = 0, + latency = ?STARTING_LATENCY_EMA, + transfers = 0, + success = 1.0, + max_active = 0, + rating = 0 +}). + +-record(performance, { + version = 3, + release = -1, + overall = #overall_performance{}, + sync = #sync_performance{} +}). + +-endif. \ No newline at end of file diff --git a/apps/arweave/src/ar_data_sync_worker.erl b/apps/arweave/src/ar_data_sync_worker.erl index 9a8f8bf89..a8d8d3454 100644 --- a/apps/arweave/src/ar_data_sync_worker.erl +++ b/apps/arweave/src/ar_data_sync_worker.erl @@ -55,15 +55,12 @@ handle_cast({read_range, Args}, State) -> handle_cast({sync_range, Args}, State) -> {_Start, _End, Peer, _TargetStoreID, _RetryCount} = Args, - StartTime = erlang:monotonic_time(), - SyncResult = sync_range(Args), - EndTime = erlang:monotonic_time(), - case SyncResult of + case sync_range(Args) of recast -> ok; - _ -> + SyncResult -> gen_server:cast(ar_data_sync_worker_master, - {task_completed, {sync_range, {State#state.name, SyncResult, Peer, EndTime-StartTime}}}) + {task_completed, {sync_range, {State#state.name, SyncResult, Peer}}}) end, {noreply, State}; diff --git a/apps/arweave/src/ar_data_sync_worker_master.erl b/apps/arweave/src/ar_data_sync_worker_master.erl index 5abe89ec0..dda2dbc8d 100644 --- a/apps/arweave/src/ar_data_sync_worker_master.erl +++ b/apps/arweave/src/ar_data_sync_worker_master.erl @@ -12,6 +12,7 @@ -include_lib("arweave/include/ar_consensus.hrl"). -include_lib("arweave/include/ar_config.hrl"). -include_lib("arweave/include/ar_data_sync.hrl"). +-include_lib("arweave/include/ar_peers.hrl"). -include_lib("eunit/include/eunit.hrl"). -define(REBALANCE_FREQUENCY_MS, 60*1000). @@ -19,17 +20,15 @@ -define(MIN_MAX_ACTIVE, 8). -define(LATENCY_ALPHA, 0.1). -define(SUCCESS_ALPHA, 0.1). --define(STARTING_LATENCY_EMA, 1000). %% initial value to avoid over-weighting the first response --define(STARTING_LATENCY_TARGET, 2000). %% initial value to avoid over-weighting the first response -record(peer_tasks, { peer = undefined, task_queue = queue:new(), task_queue_len = 0, active_count = 0, - max_active = ?MIN_MAX_ACTIVE, - latency_ema = ?STARTING_LATENCY_EMA, - success_ema = 1.0 + max_active = ?MIN_MAX_ACTIVE + % latency_ema = ?STARTING_LATENCY_EMA, + % success_ema = 1.0 }). -record(state, { @@ -40,7 +39,7 @@ workers = queue:new(), worker_count = 0, worker_loads = #{}, - latency_target = ?STARTING_LATENCY_TARGET, + throughput_target = 0, peer_tasks = #{} }). @@ -111,18 +110,21 @@ handle_cast({task_completed, {read_range, {Worker, _, _}}}, State) -> State2 = update_scheduled_task_count(Worker, read_range, "localhost", -1, State), {noreply, State2}; -handle_cast({task_completed, {sync_range, {Worker, Result, Peer, Duration}}}, State) -> +handle_cast({task_completed, {sync_range, {Worker, _Result, Peer}}}, State) -> State2 = update_scheduled_task_count(Worker, sync_range, ar_util:format_peer(Peer), -1, State), PeerTasks = get_peer_tasks(Peer, State2), - {PeerTasks2, State3} = complete_sync_range(PeerTasks, Result, Duration, State2), + {PeerTasks2, State3} = complete_sync_range(PeerTasks, State2), {PeerTasks3, State4} = process_peer_queue(PeerTasks2, State3), {noreply, set_peer_tasks(PeerTasks3, State4)}; handle_cast(rebalance_peers, State) -> ar_util:cast_after(?REBALANCE_FREQUENCY_MS, ?MODULE, rebalance_peers), ?LOG_DEBUG([{event, rebalance_peers}]), - AllPeerTasks = maps:values(State#state.peer_tasks), - {noreply, rebalance_peers(AllPeerTasks, State)}; + Peers = maps:keys(State#state.peer_tasks), + AllPeerTasks =[ maps:get(Peer, State#state.peer_tasks) || Peer <- Peers], + AllPeerPerformances = ar_peers:get_peer_performances(Peers), + ThroughputTarget = lists:sum([ Performance#performance.rating || Performance <- AllPeerPerformances ]) / length(Peers), + {noreply, rebalance_peers(AllPeerTasks, AllPeerPerformances, ThroughputTarget, State)}; handle_cast(Cast, State) -> ?LOG_WARNING("event: unhandled_cast, cast: ~p", [Cast]), @@ -208,20 +210,20 @@ max_tasks() -> Config#config.sync_jobs * 50. %% @doc The maximum number of tasks we can have queued for a given peer. -max_peer_queue(_PeerTasks, #state{ scheduled_task_count = 0 } = _State) -> +max_peer_queue(_PeerTasks, _Performance, #state{ scheduled_task_count = 0 } = _State) -> undefined; -max_peer_queue(_PeerTasks, #state{ latency_target = 0 } = _State) -> +max_peer_queue(_PeerTasks, _Peformance, #state{ latency_target = 0 } = _State) -> undefined; -max_peer_queue(_PeerTasks, #state{ latency_target = 0.0 } = _State) -> +max_peer_queue(_PeerTasks, _Performance, #state{ latency_target = 0.0 } = _State) -> undefined; -max_peer_queue(#peer_tasks{ latency_ema = 0 } = _PeerTasks, _State) -> +max_peer_queue(_PeerTasks, #performance{ latency = 0 } = _Performance, _State) -> undefined; -max_peer_queue(#peer_tasks{ latency_ema = 0.0 } = _PeerTasks, _State) -> +max_peer_queue(_PeerTasks, #performance{ latency = 0.0 } = _Performance, _State) -> undefined; -max_peer_queue(PeerTasks, State) -> +max_peer_queue(PeerTasks, Performance, State) -> CurActive = PeerTasks#peer_tasks.active_count, - LatencyEMA = PeerTasks#peer_tasks.latency_ema, - SuccessEMA = PeerTasks#peer_tasks.success_ema, + LatencyEMA = Performance#performance.latency, + SuccessEMA = Performance#performance.success, LatencyTarget = State#state.latency_target, ScheduledTasks = State#state.scheduled_task_count, %% estimate of our current total throughput @@ -254,8 +256,6 @@ cut_peer_queue(MaxQueue, PeerTasks, State) -> {peer, ar_util:format_peer(Peer)}, {active_count, PeerTasks#peer_tasks.active_count}, {scheduled_tasks, State#state.scheduled_task_count}, - {success_ema, PeerTasks#peer_tasks.success_ema}, - {latency_ema, PeerTasks#peer_tasks.latency_ema}, {latency_target, State#state.latency_target}, {max_queue, MaxQueue}, {tasks_to_cut, TasksToCut}]), {TaskQueue2, _} = queue:split(MaxQueue, TaskQueue), @@ -324,44 +324,30 @@ schedule_task(Task, Args, State) -> %% Stage 3: record a completed task and update related values (i.e. %% EMA, max_active, peer queue length) %%-------------------------------------------------------------------- -complete_sync_range(PeerTasks, Result, Duration, State) -> - Milliseconds = erlang:convert_time_unit(Duration, native, millisecond) / 1.0, - - IsOK = (Result == ok andalso Milliseconds > 10), - LatencyEMA = trunc(calculate_ema( - PeerTasks#peer_tasks.latency_ema, IsOK, Milliseconds, ?LATENCY_ALPHA)), - SuccessEMA = calculate_ema( - PeerTasks#peer_tasks.success_ema, true, ar_util:bool_to_int(IsOK) / 1.0, - ?SUCCESS_ALPHA), - %% Target Latency is the EMA of all peers' latencies - LatencyTargetAlpha = 2.0 / (State#state.worker_count + 1), %% heuristic - update as needed. - LatencyTarget = trunc(calculate_ema( - State#state.latency_target, IsOK, Milliseconds, LatencyTargetAlpha)), - +complete_sync_range(PeerTasks, State) -> PeerTasks2 = PeerTasks#peer_tasks{ - latency_ema = LatencyEMA, - success_ema = SuccessEMA, active_count = PeerTasks#peer_tasks.active_count - 1 }, - {PeerTasks2, State#state{ latency_target = LatencyTarget }}. + {PeerTasks2, State}. -rebalance_peers([], State) -> +rebalance_peers([], [], _, State) -> State; -rebalance_peers([PeerTasks | Rest], State) -> - {PeerTasks2, State2} = rebalance_peer(PeerTasks, State), +rebalance_peers( + [PeerTasks | AllPeerTasks], + [Performance | AllPeerPerformances], + ThroughputTarget, + State) -> + {PeerTasks2, State2} = rebalance_peer(PeerTasks, Performance, ThroughputTarget, State), State3 = set_peer_tasks(PeerTasks2, State2), - rebalance_peers(Rest, State3). + rebalance_peers(AllPeerTasks, AllPeerPerformances, ThroughputTarget, State3). -rebalance_peer(PeerTasks, State) -> +rebalance_peer(PeerTasks, Performance, ThroughputTarget, State) -> {PeerTasks2, State2} = cut_peer_queue( - max_peer_queue(PeerTasks, State), + max_peer_queue(PeerTasks, Performance, State), PeerTasks, State), - IsOK = true, - Milliseconds = PeerTasks2#peer_tasks.latency_ema, WorkerCount = State2#state.worker_count, - LatencyTarget = State2#state.latency_target, - PeerTasks3 = update_active(PeerTasks2, IsOK, Milliseconds, WorkerCount, LatencyTarget), + PeerTasks3 = update_active(PeerTasks2, Performance, WorkerCount, ThroughputTarget), ?LOG_DEBUG([ {event, update_active}, {peer, ar_util:format_peer(PeerTasks3#peer_tasks.peer)}, @@ -369,8 +355,8 @@ rebalance_peer(PeerTasks, State) -> {after_max, PeerTasks3#peer_tasks.max_active}, {worker_count, WorkerCount}, {active_count, PeerTasks2#peer_tasks.active_count}, - {latency_target, LatencyTarget}, - {latency_ema, Milliseconds} + {throughput_target, ThroughputTarget}, + {latency_ema, Performance#performance.latency} ]), {PeerTasks3, State2}. @@ -424,9 +410,9 @@ format_peer(Task, Args) -> ar_util:format_peer(element(3, Args)) end. -update_active(PeerTasks, IsOK, Milliseconds, WorkerCount, LatencyTarget) -> +update_active(PeerTasks, Performance, WorkerCount, ThroughputTarget) -> %% Determine target max_active: - %% 1. Increase max_active when the EMA is less than the threshold + %% 1. Increase max_active when the EthrMA is less than the threshold %% 2. Decrease max_active if the most recent request was slower than the threshold - this %% allows us to respond more quickly to a sudden drop in performance %% @@ -435,23 +421,15 @@ update_active(PeerTasks, IsOK, Milliseconds, WorkerCount, LatencyTarget) -> %% This prevents situations where we have a low number of active tasks and no queue which %% causes each request to complete fast and hikes up the max_active. Then we get a new %% batch of queued tasks and since the max_active is so high we overwhelm the peer. - LatencyEMA = PeerTasks#peer_tasks.latency_ema, MaxActive = PeerTasks#peer_tasks.max_active, ActiveCount = PeerTasks#peer_tasks.active_count, - TargetMaxActive = case { - IsOK, Milliseconds < LatencyTarget, LatencyEMA < LatencyTarget} of - {false, _, _} -> - %% Always reduce if there was an error - MaxActive-1; - {true, false, _} -> - %% Milliseconds > threshold, decrease max_active + TargetMaxActive = case Performance#performance.rating < ThroughputTarget of + false -> + %% throughput > target, decrease max_active MaxActive-1; - {true, true, true} -> - %% Milliseconds < threshold and EMA < threshold, increase max_active. - MaxActive+1; - _ -> - %% Milliseconds < threshold and EMA > threshold, do nothing. - MaxActive + true -> + %% througput < target, increase max_active. + MaxActive+1 end, %% Can't have more active tasks than workers. diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index fe32d95d1..a8a687052 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -5,12 +5,13 @@ -include_lib("arweave/include/ar.hrl"). -include_lib("arweave/include/ar_config.hrl"). +-include_lib("arweave/include/ar_peers.hrl"). -include_lib("eunit/include/eunit.hrl"). --export([start_link/0, get_peers/0, get_trusted_peers/0, is_public_peer/1, +-export([start_link/0, get_peers/0, get_peer_performances/1, get_trusted_peers/0, is_public_peer/1, get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, - resolve_and_cache_peer/2, + resolve_and_cache_peer/2, start_request/3, end_request/4, rate_fetched_data/2, gossiped_block/2, gossiped_tx/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -40,23 +41,12 @@ -define(MINIMUM_SUCCESS, 0.5). -define(THROUGHPUT_ALPHA, 0.1). -define(SUCCESS_ALPHA, 0.01). --define(STARTING_LATENCY_EMA, 1000). %% initial value to avoid over-weighting the first response %% We only do scoring of this many TCP ports per IP address. When there are not enough slots, %% we remove the peer from the first slot. -define(DEFAULT_PEER_PORT_MAP, {empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, empty_slot}). --record(performance, { - bytes = 0, - latency = ?STARTING_LATENCY_EMA, - transfers = 0, - success = 1.0, - rating = 0, - release = -1, - active_requests = 0 -}). - -record(state, {}). %%%=================================================================== @@ -77,6 +67,9 @@ get_peers() -> Peers end. +get_peer_performances(Peers) -> + [ get_or_init_performance(Peer) || Peer <- Peers]. + -if(?NETWORK_NAME == "arweave.N.1"). get_trusted_peers() -> {ok, Config} = application:get_env(arweave, config), @@ -445,6 +438,14 @@ get_peer_peers(Peer) -> Peers -> Peers end. +get_or_init_performance(Peer) -> + case ets:lookup(?MODULE, {peer, Peer}) of + [] -> + #performance{}; + [{_, Performance}] -> + Performance + end. + discover_peers([]) -> ok; discover_peers([Peer | Peers]) -> @@ -517,13 +518,19 @@ load_peer({Peer, Performance}) -> {performance, Bytes, Latency, Transfers, _Failures, Rating} -> %% For compatibility with a few nodes already storing the records %% without the release field. - set_performance(Peer, #performance{ - bytes = Bytes, latency = Latency, transfers = Transfers, - rating = Rating, release = -1 }); - _ -> + % set_performance(Peer, #performance{ + % bytes = Bytes, latency = Latency, transfers = Transfers, + % rating = Rating, release = -1 }); + % XXX TODO + ok; + {performance, Bytes, Latency, Transfers, _Failures, Rating, Release} -> %% Always reset success and active_requests when loading a peer from disk - set_performance(Peer, - Performance#performance{ success = 1.0, active_requests = 0 }) + % set_performance(Peer, + % Performance#performance{ success = 1.0, active_requests = 0 }); + % XX TODO + ok; + {performance, 3, Release, Overall, Sync} -> + set_performance(Peer, Performance) end, ok; Network -> @@ -682,14 +689,6 @@ update_rating(Peer, LatencyMicroseconds, Size, IsSuccess) -> calculate_ema(OldEMA, Value, Alpha) -> Alpha * Value + (1 - Alpha) * OldEMA. -get_or_init_performance(Peer) -> - case ets:lookup(?MODULE, {peer, Peer}) of - [] -> - #performance{}; - [{_, Performance}] -> - Performance - end. - set_performance(Peer, Performance, TotalRating) -> ets:insert(?MODULE, [ {{peer, Peer}, Performance}, From aaae82ee14a1c6f6f8d6b0a7da47a5faf9800e3c Mon Sep 17 00:00:00 2001 From: James Piechota Date: Tue, 4 Jul 2023 20:33:55 +0000 Subject: [PATCH 18/30] WIP --- apps/arweave/include/ar_peers.hrl | 17 +--- apps/arweave/src/ar_data_sync.erl | 8 +- apps/arweave/src/ar_events_sup.erl | 2 +- apps/arweave/src/ar_header_sync.erl | 4 +- apps/arweave/src/ar_http.erl | 3 +- apps/arweave/src/ar_http_iface_client.erl | 2 +- apps/arweave/src/ar_http_iface_middleware.erl | 2 +- apps/arweave/src/ar_peers.erl | 92 +++++++++---------- apps/arweave/src/ar_randomx_state.erl | 2 +- apps/arweave/test/ar_test_node.erl | 2 +- 10 files changed, 62 insertions(+), 72 deletions(-) diff --git a/apps/arweave/include/ar_peers.hrl b/apps/arweave/include/ar_peers.hrl index 13f5e0253..378361a6e 100644 --- a/apps/arweave/include/ar_peers.hrl +++ b/apps/arweave/include/ar_peers.hrl @@ -5,7 +5,7 @@ -define(STARTING_LATENCY_EMA, 1000). %% initial value to avoid over-weighting the first response --record(overall_performance, { +-record(metrics, { bytes = 0, latency = ?STARTING_LATENCY_EMA, transfers = 0, @@ -13,20 +13,13 @@ rating = 0 }). --record(sync_performance, { - bytes = 0, - latency = ?STARTING_LATENCY_EMA, - transfers = 0, - success = 1.0, - max_active = 0, - rating = 0 -}). - -record(performance, { version = 3, release = -1, - overall = #overall_performance{}, - sync = #sync_performance{} + metrics = #{ + overall => #metrics{}, + data_sync => #metrics{} + } }). -endif. \ No newline at end of file diff --git a/apps/arweave/src/ar_data_sync.erl b/apps/arweave/src/ar_data_sync.erl index 6a3975186..1cfef7fdc 100644 --- a/apps/arweave/src/ar_data_sync.erl +++ b/apps/arweave/src/ar_data_sync.erl @@ -944,7 +944,7 @@ handle_cast({store_fetched_chunk, Peer, Byte, Proof} = Cast, State) -> ar_util:cast_after(1000, self(), Cast), {noreply, State}; false -> - % ar_events:send(peer, {served_chunk, Peer, Time, TransferSize}), + % ar_events:send(peer, {fetched_chunk, Peer, Time, TransferSize}), ar_packing_server:request_unpack(AbsoluteOffset, ChunkArgs), ?LOG_DEBUG([{event, requested_fetched_chunk_unpacking}, {data_path_hash, ar_util:encode(crypto:hash(sha256, @@ -964,7 +964,7 @@ handle_cast({store_fetched_chunk, Peer, Byte, Proof} = Cast, State) -> decrement_chunk_cache_size(), process_invalid_fetched_chunk(Peer, Byte, State); {true, DataRoot, TXStartOffset, ChunkEndOffset, TXSize, ChunkSize, ChunkID} -> - % ar_events:send(peer, {served_chunk, Peer, Time, TransferSize}), + % ar_events:send(peer, {fetched_chunk, Peer, Time, TransferSize}), AbsoluteTXStartOffset = BlockStartOffset + TXStartOffset, AbsoluteEndOffset = AbsoluteTXStartOffset + ChunkEndOffset, ChunkArgs = {unpacked, Chunk, AbsoluteEndOffset, TXRoot, ChunkSize}, @@ -1369,7 +1369,7 @@ get_chunk(Offset, SeekOffset, Pack, Packing, StoredPacking, StoreID) -> {error, Reason}; {ok, {Chunk, DataPath}, AbsoluteOffset, TXRoot, ChunkSize, TXPath} -> ChunkID = - case validate_served_chunk({AbsoluteOffset, DataPath, TXPath, TXRoot, + case validate_fetched_chunk({AbsoluteOffset, DataPath, TXPath, TXRoot, ChunkSize, StoreID}) of {true, ID} -> ID; @@ -1498,7 +1498,7 @@ invalidate_bad_data_record({Start, End, ChunksIndex, StoreID, Case}) -> end end. -validate_served_chunk(Args) -> +validate_fetched_chunk(Args) -> {Offset, DataPath, TXPath, TXRoot, ChunkSize, StoreID} = Args, [{_, T}] = ets:lookup(ar_data_sync_state, disk_pool_threshold), case Offset > T orelse not ar_node:is_joined() of diff --git a/apps/arweave/src/ar_events_sup.erl b/apps/arweave/src/ar_events_sup.erl index d08907795..f5022b031 100644 --- a/apps/arweave/src/ar_events_sup.erl +++ b/apps/arweave/src/ar_events_sup.erl @@ -40,7 +40,7 @@ init([]) -> ?CHILD(ar_events, block, worker), %% Events: unpacked, packed. ?CHILD(ar_events, chunk, worker), - %% Events: made_request, bad_response, served_tx, served_block, served_chunk, banned + %% Events: made_request, bad_response, fetched_tx, fetched_block, fetched_chunk, banned ?CHILD(ar_events, peer, worker), %% Events: initializing, initialized, validated_pre_fork_2_6_block, new_tip, %% checkpoint_block, search_space_upper_bound. diff --git a/apps/arweave/src/ar_header_sync.erl b/apps/arweave/src/ar_header_sync.erl index ad947e0a6..fb44c2d71 100644 --- a/apps/arweave/src/ar_header_sync.erl +++ b/apps/arweave/src/ar_header_sync.erl @@ -511,10 +511,10 @@ download_block(Peers, H, H2, TXRoot) -> end, case BH of H when Height >= Fork_2_0 -> - % ar_events:send(peer, {served_block, Peer, Time, Size}), + % ar_events:send(peer, {fetched_block, Peer, Time, Size}), download_txs(Peers, B, TXRoot); H2 when Height < Fork_2_0 -> - % ar_events:send(peer, {served_block, Peer, Time, Size}), + % ar_events:send(peer, {fetched_block, Peer, Time, Size}), download_txs(Peers, B, TXRoot); _ -> ?LOG_WARNING([ diff --git a/apps/arweave/src/ar_http.erl b/apps/arweave/src/ar_http.erl index af342d269..d01a09d00 100644 --- a/apps/arweave/src/ar_http.erl +++ b/apps/arweave/src/ar_http.erl @@ -64,7 +64,6 @@ req(Args, ReestablishedConnection) -> StartTime = erlang:monotonic_time(), #{ peer := Peer, path := Path, method := Method } = Args, PathLabel = ar_http_iface_server:label_http_path(list_to_binary(Path)), - ar_peers:start_request(Peer, PathLabel, Method), Response = case catch gen_server:call(?MODULE, {get_connection, Args}, infinity) of {ok, PID} -> ar_rate_limiter:throttle(Peer, Path), @@ -93,7 +92,7 @@ req(Args, ReestablishedConnection) -> false -> Status = ar_metrics:get_status_class(Response), ElapsedNative = EndTime - StartTime, - ar_peers:end_request(Peer, PathLabel, Method, Response), + ar_peers:rate_response(Peer, PathLabel, Method, Response), %% NOTE: the erlang prometheus client looks at the metric name to determine units. %% If it sees _duration_ it assumes the observed value is in %% native units and it converts it to .To query native units, use: diff --git a/apps/arweave/src/ar_http_iface_client.erl b/apps/arweave/src/ar_http_iface_client.erl index ed93a2c13..7afed44ed 100644 --- a/apps/arweave/src/ar_http_iface_client.erl +++ b/apps/arweave/src/ar_http_iface_client.erl @@ -839,7 +839,7 @@ get_tx_from_remote_peer(Peer, TXID) -> ar_events:send(peer, {bad_response, {Peer, tx, invalid}}), {error, invalid_tx}; true -> - % ar_events:send(peer, {served_tx, Peer, Time, Size}), + % ar_events:send(peer, {fetched_tx, Peer, Time, Size}), TX end; Error -> diff --git a/apps/arweave/src/ar_http_iface_middleware.erl b/apps/arweave/src/ar_http_iface_middleware.erl index ddcd0ed61..2c938e741 100644 --- a/apps/arweave/src/ar_http_iface_middleware.erl +++ b/apps/arweave/src/ar_http_iface_middleware.erl @@ -1915,7 +1915,7 @@ handle_get_chunk(OffsetBinary, Req, Encoding) -> {Packing, ok}; {{true, _}, _StoreID} -> {ok, Config} = application:get_env(arweave, config), - case lists:member(pack_served_chunks, Config#config.enable) of + case lists:member(pack_fetched_chunks, Config#config.enable) of false -> {none, {reply, {404, #{}, <<>>, Req}}}; true -> diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index a8a687052..c61a805c0 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -12,7 +12,7 @@ -export([start_link/0, get_peers/0, get_peer_performances/1, get_trusted_peers/0, is_public_peer/1, get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, resolve_and_cache_peer/2, - start_request/3, end_request/4, rate_fetched_data/2, gossiped_block/2, gossiped_tx/1]). + rate_response/4, rate_fetched_data/2, gossiped_block/2, gossiped_tx/1]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -42,6 +42,10 @@ -define(THROUGHPUT_ALPHA, 0.1). -define(SUCCESS_ALPHA, 0.01). +-define(RATE_SUCCESS, 1). +-define(RATE_ERROR, 0). +-define(RATE_PENALTY, -1). + %% We only do scoring of this many TCP ports per IP address. When there are not enough slots, %% we remove the peer from the first slot. -define(DEFAULT_PEER_PORT_MAP, {empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, @@ -147,23 +151,14 @@ get_peer_release(Peer) -> -1 end. -start_request({_Host, _Port}, _, _) -> +rate_response({_Host, _Port}, _, _, _) -> %% Only track requests for IP-based peers as the rest of the stack assumes an IP-based peer. ok; -start_request(Peer, PathLabel, get) -> - gen_server:cast(?MODULE, {start_request, Peer, PathLabel, get}); -start_request(_Peer, _PathLabel, _) -> - ok. - -end_request({_Host, _Port}, _, _, _) -> - %% Only track requests for IP-based peers as the rest of the stack assumes an IP-based peer. - ok; -end_request(Peer, PathLabel, get, {_, {_, _, Body, Start, End}} = Response) -> - gen_server:cast(?MODULE, {end_request, +rate_response(Peer, PathLabel, get, Response) -> + gen_server:cast(?MODULE, {rate_response, Peer, PathLabel, get, - ar_metrics:get_status_class(Response), - End-Start, byte_size(term_to_binary(Body))}); -end_request(_Peer, _PathLabel, _, _Response) -> + ar_metrics:get_status_class(Response)}); +rate_response(_Peer, _PathLabel, _Method, _Response) -> ok. rate_fetched_data(_Peer, {ok, _}) -> @@ -305,31 +300,23 @@ handle_cast(ping_peers, State) -> ping_peers(lists:sublist(Peers, 100)), {noreply, State}; -handle_cast({start_request, Peer, _PathLabel, _Method}, State) -> - Performance = get_or_init_performance(Peer), - ActiveRequests = Performance#performance.active_requests, - set_performance(Peer, Performance#performance{ active_requests = ActiveRequests+1 }), - {noreply, State}; - -handle_cast({end_request, Peer, PathLabel, get, Status, ElapsedMicroseconds, Size}, State) -> +handle_cast({rate_response, Peer, PathLabel, get, Status}, State) -> case Status of "success" -> - update_rating(Peer, ElapsedMicroseconds, Size, true); + update_rating(Peer, ?RATE_SUCCESS); + "redirection" -> + ok; %% don't update rating + "client-error" -> + ok; %% don't update rating _ -> - update_rating(Peer, false) + update_rating(Peer, ?RATE_ERROR) end, - Performance = get_or_init_performance(Peer), - ActiveRequests = Performance#performance.active_requests, - set_performance(Peer, Performance#performance{ active_requests = ActiveRequests-1 }), ?LOG_DEBUG([ {event, update_rating}, - {update_type, request}, + {update_type, response}, {path, PathLabel}, {status, Status}, - {peer, ar_util:format_peer(Peer)}, - {active_requests, ActiveRequests}, - {latency_ms, ElapsedMicroseconds / 1000}, - {size, Size} + {peer, ar_util:format_peer(Peer)} ]), {noreply, State}; @@ -386,13 +373,13 @@ handle_info({event, peer, {made_request, Peer, Release}}, State) -> end, {noreply, State}; -handle_info({event, peer, {served_tx, Peer, TimeDelta, Size}}, State) -> - % ?LOG_DEBUG([{event, update_rating}, {type, served_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), +handle_info({event, peer, {fetched_tx, Peer, TimeDelta, Size}}, State) -> + % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), % update_rating(Peer, TimeDelta, Size), {noreply, State}; -handle_info({event, peer, {served_block, Peer, TimeDelta, Size}}, State) -> - % ?LOG_DEBUG([{event, update_rating}, {type, served_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), +handle_info({event, peer, {fetched_block, Peer, TimeDelta, Size}}, State) -> + % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), % update_rating(Peer, TimeDelta, Size), {noreply, State}; @@ -518,18 +505,29 @@ load_peer({Peer, Performance}) -> {performance, Bytes, Latency, Transfers, _Failures, Rating} -> %% For compatibility with a few nodes already storing the records %% without the release field. - % set_performance(Peer, #performance{ - % bytes = Bytes, latency = Latency, transfers = Transfers, - % rating = Rating, release = -1 }); - % XXX TODO - ok; + Overall = #metrics{ + bytes = Bytes, latency = Latency, transfers = Transfers, + rating = Rating }, + Performance2 = #performance{ + metrics = #{ + overall => Overall, + data_sync => #metrics{} + } + }, + set_performance(Peer, Performance2); {performance, Bytes, Latency, Transfers, _Failures, Rating, Release} -> - %% Always reset success and active_requests when loading a peer from disk - % set_performance(Peer, - % Performance#performance{ success = 1.0, active_requests = 0 }); - % XX TODO - ok; - {performance, 3, Release, Overall, Sync} -> + Overall = #metrics{ + bytes = Bytes, latency = Latency, transfers = Transfers, + rating = Rating }, + Performance2 = #performance{ + release = Release, + metrics = #{ + overall => Overall, + data_sync => #metrics{} + } + }, + set_performance(Peer, Performance2); + _ -> set_performance(Peer, Performance) end, ok; diff --git a/apps/arweave/src/ar_randomx_state.erl b/apps/arweave/src/ar_randomx_state.erl index 96cdd13c2..cf9c12e3a 100644 --- a/apps/arweave/src/ar_randomx_state.erl +++ b/apps/arweave/src/ar_randomx_state.erl @@ -331,7 +331,7 @@ get_block2(BH, Peers, RetryCount) -> {Peer, B, Time, Size} -> case ar_block:indep_hash(B) of BH -> - % ar_events:send(peer, {served_block, Peer, Time, Size}), + % ar_events:send(peer, {fetched_block, Peer, Time, Size}), {ok, B}; InvalidBH -> ?LOG_WARNING([ diff --git a/apps/arweave/test/ar_test_node.erl b/apps/arweave/test/ar_test_node.erl index 315d4beb0..819f490f0 100644 --- a/apps/arweave/test/ar_test_node.erl +++ b/apps/arweave/test/ar_test_node.erl @@ -84,7 +84,7 @@ start(B0, RewardAddr, Config, StorageModules) -> header_sync_jobs = 2, enable = [search_in_rocksdb_when_mining, serve_tx_data_without_limits, double_check_nonce_limiter, legacy_storage_repacking, serve_wallet_lists, - pack_served_chunks | Config#config.enable], + pack_fetched_chunks | Config#config.enable], mining_server_chunk_cache_size_limit = 4, debug = true }), From 1bb198934f98b20b280aaf03ad7d7e5e7e988ff3 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Wed, 5 Jul 2023 18:28:11 +0000 Subject: [PATCH 19/30] WIP moving to {Peer, Metric, Performance} format --- apps/arweave/include/ar_peers.hrl | 15 +- apps/arweave/src/ar_block_pre_validator.erl | 4 +- apps/arweave/src/ar_http_iface_middleware.erl | 4 +- apps/arweave/src/ar_peers.erl | 1381 +++++++++-------- apps/arweave/test/ar_data_sync_tests.erl | 90 +- 5 files changed, 780 insertions(+), 714 deletions(-) diff --git a/apps/arweave/include/ar_peers.hrl b/apps/arweave/include/ar_peers.hrl index 378361a6e..c583769a6 100644 --- a/apps/arweave/include/ar_peers.hrl +++ b/apps/arweave/include/ar_peers.hrl @@ -5,7 +5,11 @@ -define(STARTING_LATENCY_EMA, 1000). %% initial value to avoid over-weighting the first response --record(metrics, { +-define(AVAILABLE_METRICS, [overall, data_sync]). %% the performance metrics currently tracked + +-record(performance, { + version = 3, + release = -1, bytes = 0, latency = ?STARTING_LATENCY_EMA, transfers = 0, @@ -13,13 +17,4 @@ rating = 0 }). --record(performance, { - version = 3, - release = -1, - metrics = #{ - overall => #metrics{}, - data_sync => #metrics{} - } -}). - -endif. \ No newline at end of file diff --git a/apps/arweave/src/ar_block_pre_validator.erl b/apps/arweave/src/ar_block_pre_validator.erl index f1ac1ac6a..49be0a6af 100644 --- a/apps/arweave/src/ar_block_pre_validator.erl +++ b/apps/arweave/src/ar_block_pre_validator.erl @@ -709,7 +709,7 @@ pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp) -> accept_block(B, Peer, Timestamp, Gossip) -> ar_ignore_registry:add(B#block.indep_hash), ar_events:send(block, {new, B, #{ source => {peer, Peer}, gossip => Gossip }}), - % ar_peers:gossiped_block(Peer), + % ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), record_block_pre_validation_time(Timestamp), ?LOG_INFO([{event, accepted_block}, {height, B#block.height}, {indep_hash, ar_util:encode(B#block.indep_hash)}]), @@ -753,7 +753,7 @@ pre_validate_pow(B, BDS, PrevB, Peer, Timestamp) -> B2 = B#block{ txs = include_transactions(B#block.txs) }, ar_events:send(block, {new, B2, #{ source => {peer, Peer}, recall_byte => RecallByte }}), - % ar_peers:gossiped_block(Peer), + % ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), record_block_pre_validation_time(Timestamp), prometheus_counter:inc(block2_received_transactions, count_received_transactions(B#block.txs)), diff --git a/apps/arweave/src/ar_http_iface_middleware.erl b/apps/arweave/src/ar_http_iface_middleware.erl index 2c938e741..90483ad4c 100644 --- a/apps/arweave/src/ar_http_iface_middleware.erl +++ b/apps/arweave/src/ar_http_iface_middleware.erl @@ -1830,7 +1830,7 @@ handle_post_tx_accepted(Req, TX, Peer) -> %% of excessive transaction volumes. {A, B, C, D, _} = Peer, ar_blacklist_middleware:decrement_ip_addr({A, B, C, D}, Req), - ar_peers:gossiped_tx(Peer), + ar_peers:gossiped_data(Peer, Tx), ar_events:send(tx, {new, TX, Peer}), TXID = TX#tx.id, ar_ignore_registry:remove_temporary(TXID), @@ -2359,7 +2359,7 @@ post_block(enqueue_block, {B, Peer}, Req, ReceiveTimestamp) -> end, ?LOG_INFO([{event, received_block}, {block, ar_util:encode(B#block.indep_hash)}]), ValidationStatus = ar_block_pre_validator:pre_validate(B2, Peer, ReceiveTimestamp), - ar_peers:gossiped_block(Peer, ValidationStatus), + ar_peers:gossiped_data(Peer, B2, ValidationStatus), {200, #{}, <<"OK">>, Req}. encode_txids([]) -> diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index c61a805c0..29f9daa3c 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -10,9 +10,9 @@ -include_lib("eunit/include/eunit.hrl"). -export([start_link/0, get_peers/0, get_peer_performances/1, get_trusted_peers/0, is_public_peer/1, - get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, - resolve_and_cache_peer/2, - rate_response/4, rate_fetched_data/2, gossiped_block/2, gossiped_tx/1]). + get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, resolve_and_cache_peer/2, + rate_response/4, rate_fetched_data/2, gossiped_data/3, gossiped_data/2 +]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -48,8 +48,10 @@ %% We only do scoring of this many TCP ports per IP address. When there are not enough slots, %% we remove the peer from the first slot. --define(DEFAULT_PEER_PORT_MAP, {empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, - empty_slot, empty_slot, empty_slot, empty_slot, empty_slot}). +-define(DEFAULT_PEER_PORT_MAP, + {empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, + empty_slot, empty_slot} +). -record(state, {}). @@ -59,554 +61,620 @@ %% @doc Start the server. start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). get_peers() -> - case catch ets:lookup(?MODULE, peers) of - {'EXIT', _} -> - []; - [] -> - []; - [{_, Peers}] -> - Peers - end. + case catch ets:lookup(?MODULE, peers) of + {'EXIT', _} -> + []; + [] -> + []; + [{_, Peers}] -> + Peers + end. get_peer_performances(Peers) -> - [ get_or_init_performance(Peer) || Peer <- Peers]. + [get_or_init_performance(Peer) || Peer <- Peers]. -if(?NETWORK_NAME == "arweave.N.1"). get_trusted_peers() -> - {ok, Config} = application:get_env(arweave, config), - case Config#config.peers of - [] -> - ArweavePeers = ["sfo-1.na-west-1.arweave.net", "ams-1.eu-central-1.arweave.net", - "fra-1.eu-central-2.arweave.net", "blr-1.ap-central-1.arweave.net", - "sgp-1.ap-central-2.arweave.net"], - resolve_peers(ArweavePeers); - Peers -> - Peers - end. + {ok, Config} = application:get_env(arweave, config), + case Config#config.peers of + [] -> + ArweavePeers = ["sfo-1.na-west-1.arweave.net", "ams-1.eu-central-1.arweave.net", + "fra-1.eu-central-2.arweave.net", "blr-1.ap-central-1.arweave.net", + "sgp-1.ap-central-2.arweave.net" + ], + resolve_peers(ArweavePeers); + Peers -> + Peers + end. -else. get_trusted_peers() -> - {ok, Config} = application:get_env(arweave, config), - Config#config.peers. + {ok, Config} = application:get_env(arweave, config), + Config#config.peers. -endif. resolve_peers([]) -> - []; + []; resolve_peers([RawPeer | Peers]) -> - case ar_util:safe_parse_peer(RawPeer) of - {ok, Peer} -> - [Peer | resolve_peers(Peers)]; - {error, invalid} -> - ?LOG_WARNING([{event, failed_to_resolve_trusted_peer}, - {peer, RawPeer}]), - resolve_peers(Peers) - end. + case ar_util:safe_parse_peer(RawPeer) of + {ok, Peer} -> + [Peer | resolve_peers(Peers)]; + {error, invalid} -> + ?LOG_WARNING([ + {event, failed_to_resolve_trusted_peer}, + {peer, RawPeer} + ]), + resolve_peers(Peers) + end. %% @doc Return true if the given peer has a public IPv4 address. %% https://en.wikipedia.org/wiki/Reserved_IP_addresses. is_public_peer({Oct1, Oct2, Oct3, Oct4, _Port}) -> - is_public_peer({Oct1, Oct2, Oct3, Oct4}); + is_public_peer({Oct1, Oct2, Oct3, Oct4}); is_public_peer({0, _, _, _}) -> - false; + false; is_public_peer({10, _, _, _}) -> - false; + false; is_public_peer({127, _, _, _}) -> - false; + false; is_public_peer({100, Oct2, _, _}) when Oct2 >= 64 andalso Oct2 =< 127 -> - false; + false; is_public_peer({169, 254, _, _}) -> - false; + false; is_public_peer({172, Oct2, _, _}) when Oct2 >= 16 andalso Oct2 =< 31 -> - false; + false; is_public_peer({192, 0, 0, _}) -> - false; + false; is_public_peer({192, 0, 2, _}) -> - false; + false; is_public_peer({192, 88, 99, _}) -> - false; + false; is_public_peer({192, 168, _, _}) -> - false; + false; is_public_peer({198, 18, _, _}) -> - false; + false; is_public_peer({198, 19, _, _}) -> - false; + false; is_public_peer({198, 51, 100, _}) -> - false; + false; is_public_peer({203, 0, 113, _}) -> - false; + false; is_public_peer({Oct1, _, _, _}) when Oct1 >= 224 -> - false; + false; is_public_peer(_) -> - true. + true. %% @doc Return the release nubmer reported by the peer. %% Return -1 if the release is not known. get_peer_release(Peer) -> - case catch ets:lookup(?MODULE, {peer, Peer}) of - [{_, #performance{ release = Release }}] -> - Release; - _ -> - -1 - end. + case catch ets:lookup(?MODULE, {peer, Peer}) of + [{_, #performance{release = Release}}] -> + Release; + _ -> + -1 + end. rate_response({_Host, _Port}, _, _, _) -> - %% Only track requests for IP-based peers as the rest of the stack assumes an IP-based peer. - ok; + %% Only track requests for IP-based peers as the rest of the stack assumes an IP-based peer. + ok; rate_response(Peer, PathLabel, get, Response) -> - gen_server:cast(?MODULE, {rate_response, - Peer, PathLabel, get, - ar_metrics:get_status_class(Response)}); + gen_server:cast( + ?MODULE, {rate_response, Peer, PathLabel, get, ar_metrics:get_status_class(Response)} + ); rate_response(_Peer, _PathLabel, _Method, _Response) -> - ok. + ok. rate_fetched_data(_Peer, {ok, _}) -> - %% The fetched data is valid so the rating was already captured as part of - %% the start/end request pair. Nothing more to do. - ok; + %% The fetched data is valid so the rating was already captured as part of + %% the start/end request pair. Nothing more to do. + ok; rate_fetched_data(Peer, {error, _}) -> - %% The fetched data is invalid, so we need to reverse the rating that was applied - %% in end_request, and then apply a penalty - gen_server:cast(?MODULE, {invalid_fetched_data, Peer}); + %% The fetched data is invalid, so we need to reverse the rating that was applied + %% in end_request, and then apply a penalty + gen_server:cast(?MODULE, {invalid_fetched_data, Peer}); rate_fetched_data(Peer, invalid) -> - %% The fetched data is invalid, so we need to reverse the rating that was applied - %% in end_request, and then apply a penalty - gen_server:cast(?MODULE, {invalid_fetched_data, Peer}). - -gossiped_block(Peer, ok) -> - gen_server:cast(?MODULE, { - gossiped_data, Peer - }); -gossiped_block(_Peer, _ValidationStatus) -> - %% Ignore skipped or invalid blocks for now (consistent with old behavior, but may need to - %% be revisited) - ok. - -gossiped_tx(Peer) -> - gen_server:cast(?MODULE, { - gossiped_data, Peer - }). + %% The fetched data is invalid, so we need to reverse the rating that was applied + %% in end_request, and then apply a penalty + gen_server:cast(?MODULE, {invalid_fetched_data, Peer}). + +gossiped_data(Peer, Data) -> + gossiped_data(Peer, Data, ok). + +gossiped_data(Peer, Data, ok) -> + gen_server:cast(?MODULE, { + gossiped_data, Peer, Data + }); +gossiped_data(_Peer, _Data, _ValidationStatus) -> + %% Ignore skipped or invalid blocks for now (consistent with old behavior, but may need to + %% be revisited) + ok. %% @doc Print statistics about the current peers. stats() -> - Connected = get_peers(), - io:format("Connected peers, in preference order:~n"), - stats(Connected), - io:format("Other known peers:~n"), - All = ets:foldl(fun({{peer, Peer}, _}, Acc) -> [Peer | Acc]; - (_, Acc) -> Acc end, [], ?MODULE), - stats(All -- Connected). + Connected = get_peers(), + io:format("Connected peers, in preference order:~n"), + stats(Connected), + io:format("Other known peers:~n"), + All = ets:foldl( + fun + ({{peer, Peer}, _}, Acc) -> [Peer | Acc]; + (_, Acc) -> Acc + end, + [], + ?MODULE + ), + stats(All -- Connected). stats(Peers) -> - lists:foreach(fun(Peer) -> format_stats(Peer, get_or_init_performance(Peer)) end, - Peers). + lists:foreach( + fun(Peer) -> format_stats(Peer, get_or_init_performance(Peer)) end, + Peers + ). discover_peers() -> - case ets:lookup(?MODULE, peers) of - [] -> - ok; - [{_, []}] -> - ok; - [{_, Peers}] -> - Peer = ar_util:pick_random(Peers), - discover_peers(get_peer_peers(Peer)) - end. + case ets:lookup(?MODULE, peers) of + [] -> + ok; + [{_, []}] -> + ok; + [{_, Peers}] -> + Peer = ar_util:pick_random(Peers), + discover_peers(get_peer_peers(Peer)) + end. %% @doc Resolve the domain name of the given peer (if the given peer is an IP address) %% and cache it. Return {ok, Peer} | {error, Reason}. resolve_and_cache_peer(RawPeer, Type) -> - case ar_util:safe_parse_peer(RawPeer) of - {ok, Peer} -> - case ets:lookup(?MODULE, {raw_peer, RawPeer}) of - [] -> - ets:insert(?MODULE, {{raw_peer, RawPeer}, Peer}), - ets:insert(?MODULE, {{Type, Peer}, RawPeer}); - [{_, Peer}] -> - ok; - [{_, PreviousPeer}] -> - %% This peer is configured with a domain name rather than IP address, - %% and the IP underlying the domain name has changed. - ets:delete(?MODULE, {Type, PreviousPeer}), - ets:insert(?MODULE, {{raw_peer, RawPeer}, Peer}), - ets:insert(?MODULE, {{Type, Peer}, RawPeer}) - end, - {ok, Peer}; - Error -> - Error - end. + case ar_util:safe_parse_peer(RawPeer) of + {ok, Peer} -> + case ets:lookup(?MODULE, {raw_peer, RawPeer}) of + [] -> + ets:insert(?MODULE, {{raw_peer, RawPeer}, Peer}), + ets:insert(?MODULE, {{Type, Peer}, RawPeer}); + [{_, Peer}] -> + ok; + [{_, PreviousPeer}] -> + %% This peer is configured with a domain name rather than IP address, + %% and the IP underlying the domain name has changed. + ets:delete(?MODULE, {Type, PreviousPeer}), + ets:insert(?MODULE, {{raw_peer, RawPeer}, Peer}), + ets:insert(?MODULE, {{Type, Peer}, RawPeer}) + end, + {ok, Peer}; + Error -> + Error + end. %%%=================================================================== %%% Generic server callbacks. %%%=================================================================== init([]) -> - process_flag(trap_exit, true), - [ok, ok] = ar_events:subscribe([peer, block]), - load_peers(), - gen_server:cast(?MODULE, rank_peers), - gen_server:cast(?MODULE, ping_peers), - timer:apply_interval(?GET_MORE_PEERS_FREQUENCY_MS, ?MODULE, discover_peers, []), - {ok, #state{}}. + process_flag(trap_exit, true), + [ok, ok] = ar_events:subscribe([peer, block]), + load_peers(), + gen_server:cast(?MODULE, rank_peers), + gen_server:cast(?MODULE, ping_peers), + timer:apply_interval(?GET_MORE_PEERS_FREQUENCY_MS, ?MODULE, discover_peers, []), + {ok, #state{}}. handle_call(Request, _From, State) -> - ?LOG_WARNING("event: unhandled_call, request: ~p", [Request]), - {reply, ok, State}. + ?LOG_WARNING("event: unhandled_call, request: ~p", [Request]), + {reply, ok, State}. handle_cast({add_peer, Peer, Release}, State) -> - may_be_rotate_peer_ports(Peer), - case ets:lookup(?MODULE, {peer, Peer}) of - [{_, #performance{ release = Release }}] -> - ok; - [{_, Performance}] -> - set_performance(Peer, Performance#performance{ release = Release }); - [] -> - set_performance(Peer, #performance{ release = Release }) - end, - {noreply, State}; - + may_be_rotate_peer_ports(Peer), + case ets:lookup(?MODULE, {peer, Peer}) of + [{_, #performance{release = Release}}] -> + ok; + [{_, Performance}] -> + set_performance(Peer, Performance#performance{release = Release}); + [] -> + set_performance(Peer, #performance{release = Release}) + end, + {noreply, State}; handle_cast(rank_peers, State) -> - Total = - case ets:lookup(?MODULE, rating_total) of - [] -> - 0; - [{_, T}] -> - T - end, - Peers = - ets:foldl( - fun ({{peer, Peer}, Performance}, Acc) -> - %% Bigger score increases the chances to end up on the top - %% of the peer list, but at the same time the ranking is - %% probabilistic to always give everyone a chance to improve - %% in the competition (i.e., reduce the advantage gained by - %% being the first to earn a reputation). - Score = rand:uniform() * Performance#performance.rating - / (Total + 0.0001), - [{Peer, Score} | Acc]; - (_, Acc) -> - Acc - end, - [], - ?MODULE - ), - prometheus_gauge:set(arweave_peer_count, length(Peers)), - ets:insert(?MODULE, {peers, lists:sublist(rank_peers(Peers), ?MAX_PEERS)}), - ar_util:cast_after(?RANK_PEERS_FREQUENCY_MS, ?MODULE, rank_peers), - stats(), - {noreply, State}; - + Total = get_total_rating(), + Peers = + ets:foldl( + fun + ({{peer, Peer}, Performance}, Acc) -> + %% Bigger score increases the chances to end up on the top + %% of the peer list, but at the same time the ranking is + %% probabilistic to always give everyone a chance to improve + %% in the competition (i.e., reduce the advantage gained by + %% being the first to earn a reputation). + Score = + rand:uniform() * Performance#performance.rating / + (Total + 0.0001), + [{Peer, Score} | Acc]; + (_, Acc) -> + Acc + end, + [], + ?MODULE + ), + prometheus_gauge:set(arweave_peer_count, length(Peers)), + ets:insert(?MODULE, {peers, lists:sublist(rank_peers(Peers), ?MAX_PEERS)}), + ar_util:cast_after(?RANK_PEERS_FREQUENCY_MS, ?MODULE, rank_peers), + stats(), + {noreply, State}; handle_cast(ping_peers, State) -> - [{peers, Peers}] = ets:lookup(?MODULE, peers), - ping_peers(lists:sublist(Peers, 100)), - {noreply, State}; - + [{peers, Peers}] = ets:lookup(?MODULE, peers), + ping_peers(lists:sublist(Peers, 100)), + {noreply, State}; handle_cast({rate_response, Peer, PathLabel, get, Status}, State) -> - case Status of - "success" -> - update_rating(Peer, ?RATE_SUCCESS); - "redirection" -> - ok; %% don't update rating - "client-error" -> - ok; %% don't update rating - _ -> - update_rating(Peer, ?RATE_ERROR) - end, - ?LOG_DEBUG([ - {event, update_rating}, - {update_type, response}, - {path, PathLabel}, - {status, Status}, - {peer, ar_util:format_peer(Peer)} - ]), - {noreply, State}; - + case Status of + "success" -> + update_rating(Peer, ?RATE_SUCCESS); + "redirection" -> + %% don't update rating + ok; + "client-error" -> + %% don't update rating + ok; + _ -> + update_rating(Peer, ?RATE_ERROR) + end, + ?LOG_DEBUG([ + {event, update_rating}, + {update_type, response}, + {path, PathLabel}, + {status, Status}, + {peer, ar_util:format_peer(Peer)} + ]), + {noreply, State}; handle_cast({invalid_fetched_data, Peer}, State) -> - ?LOG_DEBUG([ - {event, update_rating}, - {update_type, invalid_fetched_data}, - {peer, ar_util:format_peer(Peer)} - ]), - %% log 2 failures - first is to reverse the success that was previously recorded by end_request - %% (since end_request only considers whether or not the HTTP request was successful and does not - %% consider the validity of the data it may be overly permissive), and the second is to - %% penalize the peer for serving invalid data. - %% Note: this is an approximation as due to the nature of the EMA this won't exactly reverse - %% the prior success. - update_rating(Peer, false), - update_rating(Peer, false), - {noreply, State}; - - -handle_cast({gossiped_data, Peer}, State) -> - case check_external_peer(Peer) of - ok -> - ?LOG_DEBUG([ - {event, update_rating}, - {update_type, gossiped_data}, - {peer, ar_util:format_peer(Peer)} - ]), - update_rating(Peer, true); - _ -> - ok - end, - - {noreply, State}; - + ?LOG_DEBUG([ + {event, update_rating}, + {update_type, invalid_fetched_data}, + {peer, ar_util:format_peer(Peer)} + ]), + %% log 2 failures - first is to reverse the success that was previously recorded by end_request + %% (since end_request only considers whether or not the HTTP request was successful and does not + %% consider the validity of the data it may be overly permissive), and the second is to + %% penalize the peer for serving invalid data. + %% Note: this is an approximation as due to the nature of the EMA this won't exactly reverse + %% the prior success. + update_rating(Peer, false), + update_rating(Peer, false), + {noreply, State}; +handle_cast({gossiped_data, Peer, Data}, State) -> + case check_external_peer(Peer) of + ok -> + ?LOG_DEBUG([ + {event, update_rating}, + {update_type, gossiped_data}, + {peer, ar_util:format_peer(Peer)} + ]), + update_rating(Peer, true); + _ -> + ok + end, + + {noreply, State}; handle_cast(Cast, State) -> - ?LOG_WARNING("event: unhandled_cast, cast: ~p", [Cast]), - {noreply, State}. + ?LOG_WARNING("event: unhandled_cast, cast: ~p", [Cast]), + {noreply, State}. handle_info({event, peer, {made_request, Peer, Release}}, State) -> - may_be_rotate_peer_ports(Peer), - case ets:lookup(?MODULE, {peer, Peer}) of - [{_, #performance{ release = Release }}] -> - ok; - [{_, Performance}] -> - set_performance(Peer, Performance#performance{ release = Release }); - [] -> - case check_external_peer(Peer) of - ok -> - set_performance(Peer, #performance{ release = Release }); - _ -> - ok - end - end, - {noreply, State}; - + may_be_rotate_peer_ports(Peer), + case ets:lookup(?MODULE, {peer, Peer}) of + [{_, #performance{release = Release}}] -> + ok; + [{_, Performance}] -> + set_performance(Peer, Performance#performance{release = Release}); + [] -> + case check_external_peer(Peer) of + ok -> + set_performance(Peer, #performance{release = Release}); + _ -> + ok + end + end, + {noreply, State}; handle_info({event, peer, {fetched_tx, Peer, TimeDelta, Size}}, State) -> - % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - % update_rating(Peer, TimeDelta, Size), - {noreply, State}; - + % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), + % update_rating(Peer, TimeDelta, Size), + {noreply, State}; handle_info({event, peer, {fetched_block, Peer, TimeDelta, Size}}, State) -> - % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - % update_rating(Peer, TimeDelta, Size), - {noreply, State}; - + % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), + % update_rating(Peer, TimeDelta, Size), + {noreply, State}; handle_info({event, peer, {bad_response, {Peer, _Type, _Reason}}}, State) -> - issue_warning(Peer), - {noreply, State}; - + issue_warning(Peer), + {noreply, State}; handle_info({event, peer, {banned, BannedPeer}}, State) -> - remove_peer(BannedPeer), - {noreply, State}; - + remove_peer(BannedPeer), + {noreply, State}; handle_info({event, block, {rejected, failed_to_fetch_first_chunk, _H, Peer}}, State) -> - issue_warning(Peer), - {noreply, State}; - + issue_warning(Peer), + {noreply, State}; handle_info({event, block, {rejected, failed_to_fetch_second_chunk, _H, Peer}}, State) -> - issue_warning(Peer), - {noreply, State}; - + issue_warning(Peer), + {noreply, State}; handle_info({event, block, {rejected, failed_to_fetch_chunk, _H, Peer}}, State) -> - issue_warning(Peer), - {noreply, State}; - + issue_warning(Peer), + {noreply, State}; handle_info({event, block, _}, State) -> - {noreply, State}; - + {noreply, State}; handle_info({'EXIT', _, normal}, State) -> - {noreply, State}; - + {noreply, State}; handle_info(Message, State) -> - ?LOG_WARNING("event: unhandled_info, message: ~p", [Message]), - {noreply, State}. + ?LOG_WARNING("event: unhandled_info, message: ~p", [Message]), + {noreply, State}. terminate(_Reason, _State) -> - store_peers(). + store_peers(). %%%=================================================================== %%% Private functions. %%%=================================================================== get_peer_peers(Peer) -> - case ar_http_iface_client:get_peers(Peer) of - unavailable -> []; - Peers -> Peers - end. + case ar_http_iface_client:get_peers(Peer) of + unavailable -> []; + Peers -> Peers + end. get_or_init_performance(Peer) -> - case ets:lookup(?MODULE, {peer, Peer}) of - [] -> - #performance{}; - [{_, Performance}] -> - Performance - end. + get_or_init_performance(Peer, overall). +get_or_init_performance(Peer, Metric) -> + case ets:lookup(?MODULE, {peer, Peer, Metric}) of + [] -> + #performance{}; + [{_, Performance}] -> + Performance + end. + +set_performance(Peer, Performance) -> + set_performance(Peer, overall, Performance). +set_performance(Peer, Metric, Performance) -> + ets:insert(?MODULE, [{{peer, Peer, Metric}, Performance}]). + +get_total_rating() -> + get_total_rating(overall). +get_total_rating(Metric) -> + case ets:lookup(?MODULE, {rating_total, Metric}) of + [] -> + 0; + [{_, Total}] -> + Total + end. + +set_total_rating(Total) -> + set_total_rating(overall, Total). +set_total_rating(Metric, Total) -> + ets:insert(?MODULE, {{rating_total, Metric}, Total}). discover_peers([]) -> - ok; + ok; discover_peers([Peer | Peers]) -> - case ets:member(?MODULE, {peer, Peer}) of - true -> - ok; - false -> - IsPublic = is_public_peer(Peer), - IsBanned = ar_blacklist_middleware:is_peer_banned(Peer) == banned, - IsBlacklisted = lists:member(Peer, ?PEER_PERMANENT_BLACKLIST), - case IsPublic andalso not IsBanned andalso not IsBlacklisted of - false -> - ok; - true -> - case ar_http_iface_client:get_info(Peer, release) of - {<<"release">>, Release} when is_integer(Release) -> - gen_server:cast(?MODULE, {add_peer, Peer, Release}); - _ -> - ok - end - end - end, - discover_peers(Peers). + case ets:member(?MODULE, {peer, Peer}) of + true -> + ok; + false -> + IsPublic = is_public_peer(Peer), + IsBanned = ar_blacklist_middleware:is_peer_banned(Peer) == banned, + IsBlacklisted = lists:member(Peer, ?PEER_PERMANENT_BLACKLIST), + case IsPublic andalso not IsBanned andalso not IsBlacklisted of + false -> + ok; + true -> + case ar_http_iface_client:get_info(Peer, release) of + {<<"release">>, Release} when is_integer(Release) -> + gen_server:cast(?MODULE, {add_peer, Peer, Release}); + _ -> + ok + end + end + end, + discover_peers(Peers). format_stats(Peer, Perf) -> - KB = Perf#performance.bytes / 1024, - io:format("\t~s ~.2f kB/s (~.2f kB, ~B latency, ~.2f success, ~p transfers, ~B requests)~n", - [string:pad(ar_util:format_peer(Peer), 21, trailing, $ ), - float(Perf#performance.rating), KB, trunc(Perf#performance.latency), - Perf#performance.success, Perf#performance.transfers, Perf#performance.active_requests]). + KB = Perf#performance.bytes / 1024, + io:format( + "\t~s ~.2f kB/s (~.2f kB, ~B latency, ~.2f success, ~p transfers)~n", + [ + string:pad(ar_util:format_peer(Peer), 21, trailing, $\s), + float(Perf#performance.rating), + KB, + trunc(Perf#performance.latency), + Perf#performance.success, + Perf#performance.transfers + ] + ). + +read_peer_records() -> + PeerRecords = case ar_storage:read_term(peers) of + not_found -> + ok; + {ok, {_TotalRating, Records}} -> + %% Legacy format included the TotalRating, but since we always recalculate it when + %% loading the peers, we've dropped it from the saved format. + Records; + {ok, Records} -> + Records + end, -load_peers() -> - case ar_storage:read_term(peers) of - not_found -> - ok; - {ok, {_TotalRating, Records}} -> - ?LOG_INFO([{event, polling_saved_peers}]), - ar:console("Polling saved peers...~n"), - load_peers(Records), - TotalRating = - ets:foldl( - fun ({{peer_ip, _IP}, _}, Acc) -> - Acc; - ({{peer, _Peer}, Performance}, Acc) -> - Acc + Performance#performance.rating - end, - 0, - ?MODULE - ), - ets:insert(?MODULE, {rating_total, TotalRating}), - ?LOG_INFO([{event, polled_saved_peers}]), - ar:console("Polled saved peers.~n") - end. + %% We only want to return records for available peers. However, PeerRecords may contain + %% multiple records for the same peer (one for each tracked metric) and we don't want to + %% ping each peer multiple times. So: + %% 1. Get a set of UniquePeers from PeerRecords + %% 2. Ping those peers to get a set of VaidPeers + %% 3. Filter PeerRecords to only include records for ValidPeers + UniquePeers = sets:from_list([ element(1, Record) || Record <- PeerRecords ]), + + ValidPeers = sets:filter( + fun(Peer) -> + case ar_http_iface_client:get_info(Peer, name) of + info_unavailable -> + ?LOG_DEBUG([{event, peer_unavailable}, {peer, ar_util:format_peer(Peer)}]), + false; + <> -> + true; + Network -> + ?LOG_DEBUG([ + {event, peer_from_the_wrong_network}, + {peer, ar_util:format_peer(Peer)}, + {network, Network} + ]), + false + end + end, + UniquePeers + ), -load_peers(Peers) when length(Peers) < 20 -> - ar_util:pmap(fun load_peer/1, Peers); -load_peers(Peers) -> - {Peers2, Peers3} = lists:split(20, Peers), - ar_util:pmap(fun load_peer/1, Peers2), - load_peers(Peers3). + ValidPeerRecords = lists:filter( + fun(PeerRecord) -> + sets:is_element(element(1, PeerRecord), ValidPeers) + end, + PeerRecords + ), + ValidPeerRecords. + +load_peers() -> + ?LOG_INFO([{event, polling_saved_peers}]), + ar:console("Polling saved peers...~n"), + PeerRecords = read_peer_records(), + load_peers(PeerRecords), + load_totals(), + ?LOG_INFO([{event, polled_saved_peers}]), + ar:console("Polled saved peers.~n"). + +load_peers(PeerRecords) when length(PeerRecords) < 20 -> + ar_util:pmap(fun load_peer/1, PeerRecords); +load_peers(PeerRecords) -> + {PeerRecords2, PeerRecords3} = lists:split(20, PeerRecords), + ar_util:pmap(fun load_peer/1, PeerRecords2), + load_peers(PeerRecords3). load_peer({Peer, Performance}) -> - case ar_http_iface_client:get_info(Peer, name) of - info_unavailable -> - ?LOG_DEBUG([{event, peer_unavailable}, {peer, ar_util:format_peer(Peer)}]), - ok; - <> -> - may_be_rotate_peer_ports(Peer), - case Performance of - {performance, Bytes, Latency, Transfers, _Failures, Rating} -> - %% For compatibility with a few nodes already storing the records - %% without the release field. - Overall = #metrics{ - bytes = Bytes, latency = Latency, transfers = Transfers, - rating = Rating }, - Performance2 = #performance{ - metrics = #{ - overall => Overall, - data_sync => #metrics{} - } - }, - set_performance(Peer, Performance2); - {performance, Bytes, Latency, Transfers, _Failures, Rating, Release} -> - Overall = #metrics{ - bytes = Bytes, latency = Latency, transfers = Transfers, - rating = Rating }, - Performance2 = #performance{ - release = Release, - metrics = #{ - overall => Overall, - data_sync => #metrics{} - } - }, - set_performance(Peer, Performance2); - _ -> - set_performance(Peer, Performance) - end, - ok; - Network -> - ?LOG_DEBUG([{event, peer_from_the_wrong_network}, - {peer, ar_util:format_peer(Peer)}, {network, Network}]), - ok + load_peer({Peer, overall, Performance}); +load_peer({Peer, Metric, Performance}) -> + may_be_rotate_peer_ports(Peer), + case Performance of + {performance, Bytes, Latency, Transfers, _Failures, Rating} -> + %% For compatibility with a few nodes already storing the records + %% without the release field. + set_performance(Peer, Metric, #performance{ + bytes = Bytes, + latency = Latency, + transfers = Transfers, + rating = Rating + }); + {performance, Bytes, Latency, Transfers, _Failures, Rating, Release} -> + %% For compatibility with nodes storing records from before the introduction of + %% the version field + set_performance(Peer, Metric, #performance{ + release = Release, + bytes = Bytes, + latency = Latency, + transfers = Transfers, + rating = Rating + }); + {performance, 3, _Release, _Bytes, _Latency, _Transfers, _Success, _Rating} -> + %% Going forward whenever we change the #performance record we should increment the + %% version field so we can match on it when doing a load. Here we're handling the + %% version 3 format. + set_performance(Peer, Metric, Performance) end. +load_totals() -> + Totals = ets:foldl( + fun + ({{peer, Metric, _Peer}, Performance}, Acc) -> + Total = maps:get(Metric, Acc, 0), + maps:put(Metric, Total + Performance#performance.rating, Acc); + (_, Acc) -> + Acc + end, + #{}, + ?MODULE + ), + + lists:foreach( + fun(Metric) -> + Total = maps:get(Metric, Totals, 0), + set_total_rating(Metric, Total) + end, + ?AVAILABLE_METRICS + ). + may_be_rotate_peer_ports(Peer) -> - {IP, Port} = get_ip_port(Peer), - case ets:lookup(?MODULE, {peer_ip, IP}) of - [] -> - ets:insert(?MODULE, {{peer_ip, IP}, - {erlang:setelement(1, ?DEFAULT_PEER_PORT_MAP, Port), 1}}); - [{_, {PortMap, Position}}] -> - case is_in_port_map(Port, PortMap) of - {true, _} -> - ok; - false -> - MaxSize = erlang:size(?DEFAULT_PEER_PORT_MAP), - case Position < MaxSize of - true -> - ets:insert(?MODULE, {{peer_ip, IP}, - {erlang:setelement(Position + 1, PortMap, Port), - Position + 1}}); - false -> - RemovedPeer = construct_peer(IP, element(1, PortMap)), - PortMap2 = shift_port_map_left(PortMap), - PortMap3 = erlang:setelement(MaxSize, PortMap2, Port), - ets:insert(?MODULE, {{peer_ip, IP}, {PortMap3, MaxSize}}), - remove_peer(RemovedPeer) - end - end - end. + {IP, Port} = get_ip_port(Peer), + case ets:lookup(?MODULE, {peer_ip, IP}) of + [] -> + ets:insert( + ?MODULE, {{peer_ip, IP}, {erlang:setelement(1, ?DEFAULT_PEER_PORT_MAP, Port), 1}} + ); + [{_, {PortMap, Position}}] -> + case is_in_port_map(Port, PortMap) of + {true, _} -> + ok; + false -> + MaxSize = erlang:size(?DEFAULT_PEER_PORT_MAP), + case Position < MaxSize of + true -> + ets:insert( + ?MODULE, + {{peer_ip, IP}, { + erlang:setelement(Position + 1, PortMap, Port), Position + 1 + }} + ); + false -> + RemovedPeer = construct_peer(IP, element(1, PortMap)), + PortMap2 = shift_port_map_left(PortMap), + PortMap3 = erlang:setelement(MaxSize, PortMap2, Port), + ets:insert(?MODULE, {{peer_ip, IP}, {PortMap3, MaxSize}}), + remove_peer(RemovedPeer) + end + end + end. get_ip_port({A, B, C, D, Port}) -> - {{A, B, C, D}, Port}. + {{A, B, C, D}, Port}. construct_peer({A, B, C, D}, Port) -> - {A, B, C, D, Port}. + {A, B, C, D, Port}. is_in_port_map(Port, PortMap) -> - is_in_port_map(Port, PortMap, erlang:size(PortMap), 1). + is_in_port_map(Port, PortMap, erlang:size(PortMap), 1). is_in_port_map(_Port, _PortMap, Max, N) when N > Max -> - false; + false; is_in_port_map(Port, PortMap, Max, N) -> - case element(N, PortMap) == Port of - true -> - {true, N}; - false -> - is_in_port_map(Port, PortMap, Max, N + 1) - end. + case element(N, PortMap) == Port of + true -> + {true, N}; + false -> + is_in_port_map(Port, PortMap, Max, N + 1) + end. shift_port_map_left(PortMap) -> - shift_port_map_left(PortMap, erlang:size(PortMap), 1). + shift_port_map_left(PortMap, erlang:size(PortMap), 1). shift_port_map_left(PortMap, Max, N) when N == Max -> - erlang:setelement(N, PortMap, empty_slot); + erlang:setelement(N, PortMap, empty_slot); shift_port_map_left(PortMap, Max, N) -> - PortMap2 = erlang:setelement(N, PortMap, element(N + 1, PortMap)), - shift_port_map_left(PortMap2, Max, N + 1). + PortMap2 = erlang:setelement(N, PortMap, element(N + 1, PortMap)), + shift_port_map_left(PortMap2, Max, N + 1). ping_peers(Peers) when length(Peers) < 100 -> - ar_util:pmap(fun ar_http_iface_client:add_peer/1, Peers); + ar_util:pmap(fun ar_http_iface_client:add_peer/1, Peers); ping_peers(Peers) -> - {Send, Rest} = lists:split(100, Peers), - ar_util:pmap(fun ar_http_iface_client:add_peer/1, Send), - ping_peers(Rest). + {Send, Rest} = lists:split(100, Peers), + ar_util:pmap(fun ar_http_iface_client:add_peer/1, Send), + ping_peers(Rest). -ifdef(DEBUG). %% Do not filter out loopback IP addresses with custom port in the debug mode %% to allow multiple local VMs to peer with each other. is_loopback_ip({127, _, _, _, Port}) -> - {ok, Config} = application:get_env(arweave, config), - Port == Config#config.port; + {ok, Config} = application:get_env(arweave, config), + Port == Config#config.port; is_loopback_ip({_, _, _, _, _}) -> - false. + false. -else. %% @doc Is the IP address in question a loopback ('us') address? is_loopback_ip({A, B, C, D, _Port}) -> is_loopback_ip({A, B, C, D}); @@ -619,243 +687,274 @@ is_loopback_ip({_, _, _, _}) -> false. %% @doc Return a ranked list of peers. rank_peers(ScoredPeers) -> - SortedReversed = lists:reverse( - lists:sort(fun({_, S1}, {_, S2}) -> S1 >= S2 end, ScoredPeers)), - GroupedBySubnet = - lists:foldl( - fun({{A, B, _C, _D, _Port}, _Score} = Peer, Acc) -> - maps:update_with({A, B}, fun(L) -> [Peer | L] end, [Peer], Acc) - end, - #{}, - SortedReversed - ), - ScoredSubnetPeers = - maps:fold( - fun(_Subnet, SubnetPeers, Acc) -> - element(2, lists:foldl( - fun({Peer, Score}, {N, Acc2}) -> - %% At first we take the best peer from every subnet, - %% then take the second best from every subnet, etc. - {N + 1, [{Peer, {-N, Score}} | Acc2]} - end, - {0, Acc}, - SubnetPeers - )) - end, - [], - GroupedBySubnet - ), - [Peer || {Peer, _} <- lists:sort(fun({_, S1}, {_, S2}) -> S1 >= S2 end, - ScoredSubnetPeers)]. + SortedReversed = lists:reverse( + lists:sort(fun({_, S1}, {_, S2}) -> S1 >= S2 end, ScoredPeers) + ), + GroupedBySubnet = + lists:foldl( + fun({{A, B, _C, _D, _Port}, _Score} = Peer, Acc) -> + maps:update_with({A, B}, fun(L) -> [Peer | L] end, [Peer], Acc) + end, + #{}, + SortedReversed + ), + ScoredSubnetPeers = + maps:fold( + fun(_Subnet, SubnetPeers, Acc) -> + element( + 2, + lists:foldl( + fun({Peer, Score}, {N, Acc2}) -> + %% At first we take the best peer from every subnet, + %% then take the second best from every subnet, etc. + {N + 1, [{Peer, {-N, Score}} | Acc2]} + end, + {0, Acc}, + SubnetPeers + ) + ) + end, + [], + GroupedBySubnet + ), + [ + Peer + || {Peer, _} <- lists:sort( + fun({_, S1}, {_, S2}) -> S1 >= S2 end, + ScoredSubnetPeers + ) + ]. check_external_peer(Peer) -> - IsLoopbackIP = is_loopback_ip(Peer), - IsBlacklisted = lists:member(Peer, ?PEER_PERMANENT_BLACKLIST), - IsBanned = ar_blacklist_middleware:is_peer_banned(Peer) == banned, - case {IsLoopbackIP, IsBlacklisted, IsBanned} of - {true, _, _} -> - reject; - {_, true, _} -> - reject; - {_, _, true} -> - reject; - _ -> - ok - end. + IsLoopbackIP = is_loopback_ip(Peer), + IsBlacklisted = lists:member(Peer, ?PEER_PERMANENT_BLACKLIST), + IsBanned = ar_blacklist_middleware:is_peer_banned(Peer) == banned, + case {IsLoopbackIP, IsBlacklisted, IsBanned} of + {true, _, _} -> + reject; + {_, true, _} -> + reject; + {_, _, true} -> + reject; + _ -> + ok + end. update_rating(Peer, IsSuccess) -> - Performance = get_or_init_performance(Peer), - %% Pass in the current latecny and bytes values in order to hold them constant. - %% Only the success average should be updated. - update_rating(Peer, Performance#performance.latency, Performance#performance.bytes, IsSuccess). + update_rating(Peer, [], IsSuccess). +update_rating(Peer, AdditionalMetrics, IsSuccess) -> + Performance = get_or_init_performance(Peer), + %% Pass in the current latency and bytes values in order to hold them constant. + %% Only the success average should be updated. + update_rating( + Peer, + AdditionalMetrics, + Performance#performance.latency, + Performance#performance.bytes, + IsSuccess + ). + update_rating(Peer, LatencyMicroseconds, Size, IsSuccess) -> - Performance = get_or_init_performance(Peer), - Total = get_total_rating(), - #performance{ bytes = Bytes, latency = Latency, success = Success, - rating = Rating, transfers = N, active_requests = ActiveRequests } = Performance, - Bytes2 = calculate_ema(Bytes, Size, ?THROUGHPUT_ALPHA), - Latency2 = calculate_ema(Latency, LatencyMicroseconds / 1000, ?THROUGHPUT_ALPHA), - Success2 = calculate_ema(Success, ar_util:bool_to_int(IsSuccess), ?SUCCESS_ALPHA), - Rating2 = (Bytes2 / Latency2) * Success2 * ActiveRequests, - Performance2 = Performance#performance{ - bytes = Bytes2, latency = Latency2, success = Success2, - rating = Rating2, transfers = N + 1 }, - Total2 = Total - Rating + Rating2, - may_be_rotate_peer_ports(Peer), - set_performance(Peer, Performance2, Total2). + update_rating(Peer, [], LatencyMicroseconds, Size, IsSuccess). +update_rating(Peer, AdditionalMetrics, LatencyMicroseconds, Size, IsSuccess) -> + %% Update the 'overall' metric plus any additional metrics specified. + lists:foreach( + fun(Metric) -> + update_performance(Peer, Metric, LatencyMicroseconds, Size, IsSuccess) + end, + [overall | AdditionalMetrics] + ). + +update_performance(Peer, Metric, LatencyMicroseconds, Size, IsSuccess) -> + %% only update available metrics + true = lists:member(Metric, ?AVAILABLE_METRICS), + Performance = get_or_init_performance(Peer, Metric), + Total = get_total_rating(Metric), + #performance{ + bytes = Bytes, + latency = Latency, + success = Success, + rating = Rating, + transfers = N + } = Performance, + Bytes2 = calculate_ema(Bytes, Size, ?THROUGHPUT_ALPHA), + Latency2 = calculate_ema(Latency, LatencyMicroseconds / 1000, ?THROUGHPUT_ALPHA), + Success2 = calculate_ema(Success, ar_util:bool_to_int(IsSuccess), ?SUCCESS_ALPHA), + Rating2 = (Bytes2 / Latency2) * Success2, + Performance2 = Performance#performance{ + bytes = Bytes2, + latency = Latency2, + success = Success2, + rating = Rating2, + transfers = N + 1 + }, + Total2 = Total - Rating + Rating2, + may_be_rotate_peer_ports(Peer), + set_performance(Peer, Metric, Performance2), + set_total_rating(Metric, Total2). calculate_ema(OldEMA, Value, Alpha) -> - Alpha * Value + (1 - Alpha) * OldEMA. - -set_performance(Peer, Performance, TotalRating) -> - ets:insert(?MODULE, [ - {{peer, Peer}, Performance}, - {rating_total, TotalRating}]). - -set_performance(Peer, Performance) -> - ets:insert(?MODULE, [{{peer, Peer}, Performance}]). - -get_total_rating() -> - case ets:lookup(?MODULE, rating_total) of - [] -> - 0; - [{_, Total}] -> - Total - end. + Alpha * Value + (1 - Alpha) * OldEMA. remove_peer(RemovedPeer) -> - Total = get_total_rating(), - Performance = get_or_init_performance(RemovedPeer), - ?LOG_DEBUG([ - {event, remove_peer}, - {peer, ar_util:format_peer(RemovedPeer)}, - {performance, format_stats(RemovedPeer, Performance)}]), - ets:insert(?MODULE, {rating_total, Total - Performance#performance.rating}), - ets:delete(?MODULE, {peer, RemovedPeer}), - remove_peer_port(RemovedPeer). + ?LOG_DEBUG([ + {event, remove_peer}, + {peer, ar_util:format_peer(RemovedPeer)} + ]), + lists:foreach( + fun(Metric) -> + Performance = get_or_init_performance(RemovedPeer, Metric), + Total = get_total_rating(Metric), + set_total_rating(Metric, Total - Performance#performance.rating), + ets:delete(?MODULE, {peer, RemovedPeer, Metric}) + end, + ?AVAILABLE_METRICS + ), + remove_peer_port(RemovedPeer). remove_peer_port(Peer) -> - {IP, Port} = get_ip_port(Peer), - case ets:lookup(?MODULE, {peer_ip, IP}) of - [] -> - ok; - [{_, {PortMap, Position}}] -> - case is_in_port_map(Port, PortMap) of - false -> - ok; - {true, N} -> - PortMap2 = erlang:setelement(N, PortMap, empty_slot), - case is_port_map_empty(PortMap2) of - true -> - ets:delete(?MODULE, {peer_ip, IP}); - false -> - ets:insert(?MODULE, {{peer_ip, IP}, {PortMap2, Position}}) - end - end - end. + {IP, Port} = get_ip_port(Peer), + case ets:lookup(?MODULE, {peer_ip, IP}) of + [] -> + ok; + [{_, {PortMap, Position}}] -> + case is_in_port_map(Port, PortMap) of + false -> + ok; + {true, N} -> + PortMap2 = erlang:setelement(N, PortMap, empty_slot), + case is_port_map_empty(PortMap2) of + true -> + ets:delete(?MODULE, {peer_ip, IP}); + false -> + ets:insert(?MODULE, {{peer_ip, IP}, {PortMap2, Position}}) + end + end + end. is_port_map_empty(PortMap) -> - is_port_map_empty(PortMap, erlang:size(PortMap), 1). + is_port_map_empty(PortMap, erlang:size(PortMap), 1). is_port_map_empty(_PortMap, Max, N) when N > Max -> - true; + true; is_port_map_empty(PortMap, Max, N) -> - case element(N, PortMap) of - empty_slot -> - is_port_map_empty(PortMap, Max, N + 1); - _ -> - false - end. + case element(N, PortMap) of + empty_slot -> + is_port_map_empty(PortMap, Max, N + 1); + _ -> + false + end. store_peers() -> - case ets:lookup(?MODULE, rating_total) of - [] -> - ok; - [{_, Total}] -> - Records = - ets:foldl( - fun ({{peer, Peer}, Performance}, Acc) -> - [{Peer, Performance} | Acc]; - (_, Acc) -> - Acc - end, - [], - ?MODULE - ), - ar_storage:write_term(peers, {Total, Records}) - end. + Records = + ets:foldl( + fun + ({{peer, Peer, Metric}, Performance}, Acc) -> + [{Peer, Metric, Performance} | Acc]; + (_, Acc) -> + Acc + end, + [], + ?MODULE + ), + case Records of + [] -> + ok; + _ -> + ar_storage:write_term(peers, Records) + end. issue_warning(Peer) -> - Performance = get_or_init_performance(Peer), - Success = calculate_ema(Performance#performance.success, 0, ?SUCCESS_ALPHA), - case Success < ?MINIMUM_SUCCESS of - true -> - remove_peer(Peer); - false -> - Performance2 = Performance#performance{ success = Success }, - may_be_rotate_peer_ports(Peer), - set_performance(Peer, Performance2) - end. + Performance = get_or_init_performance(Peer), + Success = calculate_ema(Performance#performance.success, 0, ?SUCCESS_ALPHA), + case Success < ?MINIMUM_SUCCESS of + true -> + remove_peer(Peer); + false -> + Performance2 = Performance#performance{success = Success}, + may_be_rotate_peer_ports(Peer), + set_performance(Peer, Performance2) + end. %%%=================================================================== %%% Tests. %%%=================================================================== rotate_peer_ports_test() -> - Peer = {2, 2, 2, 2, 1}, - may_be_rotate_peer_ports(Peer), - [{_, {PortMap, 1}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(1, element(1, PortMap)), - remove_peer(Peer), - ?assertEqual([], ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}})), - may_be_rotate_peer_ports(Peer), - Peer2 = {2, 2, 2, 2, 2}, - may_be_rotate_peer_ports(Peer2), - [{_, {PortMap2, 2}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(1, element(1, PortMap2)), - ?assertEqual(2, element(2, PortMap2)), - remove_peer(Peer), - [{_, {PortMap3, 2}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(empty_slot, element(1, PortMap3)), - ?assertEqual(2, element(2, PortMap3)), - Peer3 = {2, 2, 2, 2, 3}, - Peer4 = {2, 2, 2, 2, 4}, - Peer5 = {2, 2, 2, 2, 5}, - Peer6 = {2, 2, 2, 2, 6}, - Peer7 = {2, 2, 2, 2, 7}, - Peer8 = {2, 2, 2, 2, 8}, - Peer9 = {2, 2, 2, 2, 9}, - Peer10 = {2, 2, 2, 2, 10}, - Peer11 = {2, 2, 2, 2, 11}, - may_be_rotate_peer_ports(Peer3), - may_be_rotate_peer_ports(Peer4), - may_be_rotate_peer_ports(Peer5), - may_be_rotate_peer_ports(Peer6), - may_be_rotate_peer_ports(Peer7), - may_be_rotate_peer_ports(Peer8), - may_be_rotate_peer_ports(Peer9), - may_be_rotate_peer_ports(Peer10), - [{_, {PortMap4, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(empty_slot, element(1, PortMap4)), - ?assertEqual(2, element(2, PortMap4)), - ?assertEqual(10, element(10, PortMap4)), - may_be_rotate_peer_ports(Peer8), - may_be_rotate_peer_ports(Peer9), - may_be_rotate_peer_ports(Peer10), - [{_, {PortMap5, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(empty_slot, element(1, PortMap5)), - ?assertEqual(2, element(2, PortMap5)), - ?assertEqual(3, element(3, PortMap5)), - ?assertEqual(9, element(9, PortMap5)), - ?assertEqual(10, element(10, PortMap5)), - may_be_rotate_peer_ports(Peer11), - [{_, {PortMap6, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(element(2, PortMap5), element(1, PortMap6)), - ?assertEqual(3, element(2, PortMap6)), - ?assertEqual(4, element(3, PortMap6)), - ?assertEqual(5, element(4, PortMap6)), - ?assertEqual(11, element(10, PortMap6)), - may_be_rotate_peer_ports(Peer11), - [{_, {PortMap7, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(element(2, PortMap5), element(1, PortMap7)), - ?assertEqual(3, element(2, PortMap7)), - ?assertEqual(4, element(3, PortMap7)), - ?assertEqual(5, element(4, PortMap7)), - ?assertEqual(11, element(10, PortMap7)), - remove_peer(Peer4), - [{_, {PortMap8, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(empty_slot, element(3, PortMap8)), - ?assertEqual(3, element(2, PortMap8)), - ?assertEqual(5, element(4, PortMap8)), - remove_peer(Peer2), - remove_peer(Peer3), - remove_peer(Peer5), - remove_peer(Peer6), - remove_peer(Peer7), - remove_peer(Peer8), - remove_peer(Peer9), - remove_peer(Peer10), - [{_, {PortMap9, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(11, element(10, PortMap9)), - remove_peer(Peer11), - ?assertEqual([], ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}})). + Peer = {2, 2, 2, 2, 1}, + may_be_rotate_peer_ports(Peer), + [{_, {PortMap, 1}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(1, element(1, PortMap)), + remove_peer(Peer), + ?assertEqual([], ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}})), + may_be_rotate_peer_ports(Peer), + Peer2 = {2, 2, 2, 2, 2}, + may_be_rotate_peer_ports(Peer2), + [{_, {PortMap2, 2}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(1, element(1, PortMap2)), + ?assertEqual(2, element(2, PortMap2)), + remove_peer(Peer), + [{_, {PortMap3, 2}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(empty_slot, element(1, PortMap3)), + ?assertEqual(2, element(2, PortMap3)), + Peer3 = {2, 2, 2, 2, 3}, + Peer4 = {2, 2, 2, 2, 4}, + Peer5 = {2, 2, 2, 2, 5}, + Peer6 = {2, 2, 2, 2, 6}, + Peer7 = {2, 2, 2, 2, 7}, + Peer8 = {2, 2, 2, 2, 8}, + Peer9 = {2, 2, 2, 2, 9}, + Peer10 = {2, 2, 2, 2, 10}, + Peer11 = {2, 2, 2, 2, 11}, + may_be_rotate_peer_ports(Peer3), + may_be_rotate_peer_ports(Peer4), + may_be_rotate_peer_ports(Peer5), + may_be_rotate_peer_ports(Peer6), + may_be_rotate_peer_ports(Peer7), + may_be_rotate_peer_ports(Peer8), + may_be_rotate_peer_ports(Peer9), + may_be_rotate_peer_ports(Peer10), + [{_, {PortMap4, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(empty_slot, element(1, PortMap4)), + ?assertEqual(2, element(2, PortMap4)), + ?assertEqual(10, element(10, PortMap4)), + may_be_rotate_peer_ports(Peer8), + may_be_rotate_peer_ports(Peer9), + may_be_rotate_peer_ports(Peer10), + [{_, {PortMap5, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(empty_slot, element(1, PortMap5)), + ?assertEqual(2, element(2, PortMap5)), + ?assertEqual(3, element(3, PortMap5)), + ?assertEqual(9, element(9, PortMap5)), + ?assertEqual(10, element(10, PortMap5)), + may_be_rotate_peer_ports(Peer11), + [{_, {PortMap6, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(element(2, PortMap5), element(1, PortMap6)), + ?assertEqual(3, element(2, PortMap6)), + ?assertEqual(4, element(3, PortMap6)), + ?assertEqual(5, element(4, PortMap6)), + ?assertEqual(11, element(10, PortMap6)), + may_be_rotate_peer_ports(Peer11), + [{_, {PortMap7, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(element(2, PortMap5), element(1, PortMap7)), + ?assertEqual(3, element(2, PortMap7)), + ?assertEqual(4, element(3, PortMap7)), + ?assertEqual(5, element(4, PortMap7)), + ?assertEqual(11, element(10, PortMap7)), + remove_peer(Peer4), + [{_, {PortMap8, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(empty_slot, element(3, PortMap8)), + ?assertEqual(3, element(2, PortMap8)), + ?assertEqual(5, element(4, PortMap8)), + remove_peer(Peer2), + remove_peer(Peer3), + remove_peer(Peer5), + remove_peer(Peer6), + remove_peer(Peer7), + remove_peer(Peer8), + remove_peer(Peer9), + remove_peer(Peer10), + [{_, {PortMap9, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(11, element(10, PortMap9)), + remove_peer(Peer11), + ?assertEqual([], ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}})). diff --git a/apps/arweave/test/ar_data_sync_tests.erl b/apps/arweave/test/ar_data_sync_tests.erl index f9e7482ac..2c84289f9 100644 --- a/apps/arweave/test/ar_data_sync_tests.erl +++ b/apps/arweave/test/ar_data_sync_tests.erl @@ -682,8 +682,9 @@ test_mines_off_only_second_last_chunks() -> ). packs_chunks_depending_on_packing_threshold_test_() -> - test_with_mocked_functions([{ar_fork, height_2_6, fun() -> 10 end}, - {ar_fork, height_2_6_8, fun() -> 15 end}], + test_with_mocked_functions([{ar_fork, height_2_6, fun() -> 0 end}, + {ar_fork, height_2_6_8, fun() -> 0 end}, + {ar_fork, height_2_7, fun() -> 10 end}], fun test_packs_chunks_depending_on_packing_threshold/0). test_packs_chunks_depending_on_packing_threshold() -> @@ -754,66 +755,37 @@ test_packs_chunks_depending_on_packing_threshold() -> B = read_block_when_stored(H), PoA = B#block.poa, BI = lists:reverse(lists:sublist(lists:reverse(BILast), Height)), - {RecallByte, PartitionUpperBound} = - case B#block.height >= ar_fork:height_2_6() of + PrevNonceLimiterInfo = PrevB#block.nonce_limiter_info, + PrevSeed = + case B#block.height == ar_fork:height_2_6() of true -> - PrevNonceLimiterInfo = PrevB#block.nonce_limiter_info, - PrevSeed = - case B#block.height == ar_fork:height_2_6() of - true -> - element(1, lists:nth(?SEARCH_SPACE_UPPER_BOUND_DEPTH, BI)); - false -> - PrevNonceLimiterInfo#nonce_limiter_info.seed - end, - NonceLimiterInfo = B#block.nonce_limiter_info, - Output = NonceLimiterInfo#nonce_limiter_info.output, - UpperBound = - NonceLimiterInfo#nonce_limiter_info.partition_upper_bound, - H0 = ar_block:compute_h0(Output, B#block.partition_number, PrevSeed, - B#block.reward_addr), - {RecallRange1Start, _} = ar_block:get_recall_range(H0, - B#block.partition_number, UpperBound), - Byte = RecallRange1Start + B#block.nonce * ?DATA_CHUNK_SIZE, - {Byte, UpperBound}; + element(1, lists:nth(?SEARCH_SPACE_UPPER_BOUND_DEPTH, BI)); false -> - UpperBound = element(2, - lists:nth(?SEARCH_SPACE_UPPER_BOUND_DEPTH, BI)), - BDS = ar_block:generate_block_data_segment(B), - {H0, _Entropy} = ar_mine:spora_h0_with_entropy(BDS, B#block.nonce, - Height), - {ok, Byte} = ar_mine:pick_recall_byte(H0, PrevB#block.indep_hash, - UpperBound), - {Byte, UpperBound} + PrevNonceLimiterInfo#nonce_limiter_info.seed end, + NonceLimiterInfo = B#block.nonce_limiter_info, + Output = NonceLimiterInfo#nonce_limiter_info.output, + PartitionUpperBound = + NonceLimiterInfo#nonce_limiter_info.partition_upper_bound, + H0 = ar_block:compute_h0(Output, B#block.partition_number, PrevSeed, + B#block.reward_addr), + {RecallRange1Start, _} = ar_block:get_recall_range(H0, + B#block.partition_number, PartitionUpperBound), + RecallByte = RecallRange1Start + B#block.nonce * ?DATA_CHUNK_SIZE, {BlockStart, BlockEnd, TXRoot} = ar_block_index:get_block_bounds(RecallByte), - case B#block.height >= ar_fork:height_2_6() of - true -> - ?debugFmt("Mined a 2.6 block. " - "Computed recall byte: ~B, block's recall byte: ~p. " - "Height: ~B. Previous block: ~s. " - "Computed search space upper bound: ~B. " - "Block start: ~B. Block end: ~B. TX root: ~s.", - [RecallByte, B#block.recall_byte, Height, - ar_util:encode(PrevB#block.indep_hash), PartitionUpperBound, - BlockStart, BlockEnd, ar_util:encode(TXRoot)]), - ?assertEqual(RecallByte, B#block.recall_byte), - ?assertEqual(true, ar_poa:validate({BlockStart, RecallByte, TXRoot, - BlockEnd - BlockStart, PoA, B#block.strict_data_split_threshold, - {spora_2_6, B#block.reward_addr}})); - false -> - ?debugFmt("Mined a 2.5 block. " - "Computed recall byte: ~B, block's recall byte: ~p. " - "Height: ~B. Previous block: ~s. " - "Computed search space upper bound: ~B. " - "Block start: ~B. Block end: ~B. TX root: ~s.", - [RecallByte, B#block.recall_byte, Height, - ar_util:encode(PrevB#block.indep_hash), PartitionUpperBound, - BlockStart, BlockEnd, ar_util:encode(TXRoot)]), - ?assertEqual(RecallByte, B#block.recall_byte), - ?assertEqual(true, ar_poa:validate({BlockStart, RecallByte, TXRoot, - BlockEnd - BlockStart, PoA, B#block.strict_data_split_threshold, - spora_2_5})) - end, + ?debugFmt("Mined a block. " + "Computed recall byte: ~B, block's recall byte: ~p. " + "Height: ~B. Previous block: ~s. " + "Computed search space upper bound: ~B. " + "Block start: ~B. Block end: ~B. TX root: ~s.", + [RecallByte, B#block.recall_byte, Height, + ar_util:encode(PrevB#block.indep_hash), PartitionUpperBound, + BlockStart, BlockEnd, ar_util:encode(TXRoot)]), + ?assertEqual(RecallByte, B#block.recall_byte), + ?assertEqual(true, ar_poa:validate({BlockStart, RecallByte, TXRoot, + BlockEnd - BlockStart, PoA, B#block.strict_data_split_threshold, + {spora_2_6, B#block.reward_addr}, + B#block.merkle_rebase_support_threshold})), B end, LastB, @@ -888,7 +860,7 @@ setup_nodes() -> setup_nodes(MasterAddr, SlaveAddr) -> Wallet = {_, Pub} = ar_wallet:new(), - [B0] = ar_weave:init([{ar_wallet:to_address(Pub), ?AR(200), <<>>}]), + [B0] = ar_weave:init([{ar_wallet:to_address(Pub), ?AR(20000), <<>>}]), {ok, Config} = application:get_env(arweave, config), {Master, _} = start(B0, MasterAddr, Config), {ok, SlaveConfig} = slave_call(application, get_env, [arweave, config]), From acf42086a37ba95eeb7d2c2474a3409beeef4903 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Wed, 5 Jul 2023 19:02:45 +0000 Subject: [PATCH 20/30] spaces to tabs --- apps/arweave/src/ar_peers.erl | 1206 ++++++++++++++++----------------- 1 file changed, 602 insertions(+), 604 deletions(-) diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 29f9daa3c..29c496234 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -10,8 +10,8 @@ -include_lib("eunit/include/eunit.hrl"). -export([start_link/0, get_peers/0, get_peer_performances/1, get_trusted_peers/0, is_public_peer/1, - get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, resolve_and_cache_peer/2, - rate_response/4, rate_fetched_data/2, gossiped_data/3, gossiped_data/2 + get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, resolve_and_cache_peer/2, + rate_response/4, rate_fetched_data/2, gossiped_data/3, gossiped_data/2 ]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -48,10 +48,8 @@ %% We only do scoring of this many TCP ports per IP address. When there are not enough slots, %% we remove the peer from the first slot. --define(DEFAULT_PEER_PORT_MAP, - {empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, - empty_slot, empty_slot} -). +-define(DEFAULT_PEER_PORT_MAP, {empty_slot, empty_slot, empty_slot, empty_slot, empty_slot, + empty_slot, empty_slot, empty_slot, empty_slot, empty_slot}). -record(state, {}). @@ -61,435 +59,435 @@ %% @doc Start the server. start_link() -> - gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). get_peers() -> - case catch ets:lookup(?MODULE, peers) of - {'EXIT', _} -> - []; - [] -> - []; - [{_, Peers}] -> - Peers - end. + case catch ets:lookup(?MODULE, peers) of + {'EXIT', _} -> + []; + [] -> + []; + [{_, Peers}] -> + Peers + end. get_peer_performances(Peers) -> - [get_or_init_performance(Peer) || Peer <- Peers]. + [get_or_init_performance(Peer) || Peer <- Peers]. -if(?NETWORK_NAME == "arweave.N.1"). get_trusted_peers() -> - {ok, Config} = application:get_env(arweave, config), - case Config#config.peers of - [] -> - ArweavePeers = ["sfo-1.na-west-1.arweave.net", "ams-1.eu-central-1.arweave.net", - "fra-1.eu-central-2.arweave.net", "blr-1.ap-central-1.arweave.net", - "sgp-1.ap-central-2.arweave.net" - ], - resolve_peers(ArweavePeers); - Peers -> - Peers - end. + {ok, Config} = application:get_env(arweave, config), + case Config#config.peers of + [] -> + ArweavePeers = ["sfo-1.na-west-1.arweave.net", "ams-1.eu-central-1.arweave.net", + "fra-1.eu-central-2.arweave.net", "blr-1.ap-central-1.arweave.net", + "sgp-1.ap-central-2.arweave.net" + ], + resolve_peers(ArweavePeers); + Peers -> + Peers + end. -else. get_trusted_peers() -> - {ok, Config} = application:get_env(arweave, config), - Config#config.peers. + {ok, Config} = application:get_env(arweave, config), + Config#config.peers. -endif. resolve_peers([]) -> - []; + []; resolve_peers([RawPeer | Peers]) -> - case ar_util:safe_parse_peer(RawPeer) of - {ok, Peer} -> - [Peer | resolve_peers(Peers)]; - {error, invalid} -> - ?LOG_WARNING([ - {event, failed_to_resolve_trusted_peer}, - {peer, RawPeer} - ]), - resolve_peers(Peers) - end. + case ar_util:safe_parse_peer(RawPeer) of + {ok, Peer} -> + [Peer | resolve_peers(Peers)]; + {error, invalid} -> + ?LOG_WARNING([ + {event, failed_to_resolve_trusted_peer}, + {peer, RawPeer} + ]), + resolve_peers(Peers) + end. %% @doc Return true if the given peer has a public IPv4 address. %% https://en.wikipedia.org/wiki/Reserved_IP_addresses. is_public_peer({Oct1, Oct2, Oct3, Oct4, _Port}) -> - is_public_peer({Oct1, Oct2, Oct3, Oct4}); + is_public_peer({Oct1, Oct2, Oct3, Oct4}); is_public_peer({0, _, _, _}) -> - false; + false; is_public_peer({10, _, _, _}) -> - false; + false; is_public_peer({127, _, _, _}) -> - false; + false; is_public_peer({100, Oct2, _, _}) when Oct2 >= 64 andalso Oct2 =< 127 -> - false; + false; is_public_peer({169, 254, _, _}) -> - false; + false; is_public_peer({172, Oct2, _, _}) when Oct2 >= 16 andalso Oct2 =< 31 -> - false; + false; is_public_peer({192, 0, 0, _}) -> - false; + false; is_public_peer({192, 0, 2, _}) -> - false; + false; is_public_peer({192, 88, 99, _}) -> - false; + false; is_public_peer({192, 168, _, _}) -> - false; + false; is_public_peer({198, 18, _, _}) -> - false; + false; is_public_peer({198, 19, _, _}) -> - false; + false; is_public_peer({198, 51, 100, _}) -> - false; + false; is_public_peer({203, 0, 113, _}) -> - false; + false; is_public_peer({Oct1, _, _, _}) when Oct1 >= 224 -> - false; + false; is_public_peer(_) -> - true. + true. %% @doc Return the release nubmer reported by the peer. %% Return -1 if the release is not known. get_peer_release(Peer) -> - case catch ets:lookup(?MODULE, {peer, Peer}) of - [{_, #performance{release = Release}}] -> - Release; - _ -> - -1 - end. + case catch ets:lookup(?MODULE, {peer, Peer}) of + [{_, #performance{release = Release}}] -> + Release; + _ -> + -1 + end. rate_response({_Host, _Port}, _, _, _) -> - %% Only track requests for IP-based peers as the rest of the stack assumes an IP-based peer. - ok; + %% Only track requests for IP-based peers as the rest of the stack assumes an IP-based peer. + ok; rate_response(Peer, PathLabel, get, Response) -> - gen_server:cast( - ?MODULE, {rate_response, Peer, PathLabel, get, ar_metrics:get_status_class(Response)} - ); + gen_server:cast( + ?MODULE, {rate_response, Peer, PathLabel, get, ar_metrics:get_status_class(Response)} + ); rate_response(_Peer, _PathLabel, _Method, _Response) -> - ok. + ok. rate_fetched_data(_Peer, {ok, _}) -> - %% The fetched data is valid so the rating was already captured as part of - %% the start/end request pair. Nothing more to do. - ok; + %% The fetched data is valid so the rating was already captured as part of + %% the start/end request pair. Nothing more to do. + ok; rate_fetched_data(Peer, {error, _}) -> - %% The fetched data is invalid, so we need to reverse the rating that was applied - %% in end_request, and then apply a penalty - gen_server:cast(?MODULE, {invalid_fetched_data, Peer}); + %% The fetched data is invalid, so we need to reverse the rating that was applied + %% in end_request, and then apply a penalty + gen_server:cast(?MODULE, {invalid_fetched_data, Peer}); rate_fetched_data(Peer, invalid) -> - %% The fetched data is invalid, so we need to reverse the rating that was applied - %% in end_request, and then apply a penalty - gen_server:cast(?MODULE, {invalid_fetched_data, Peer}). + %% The fetched data is invalid, so we need to reverse the rating that was applied + %% in end_request, and then apply a penalty + gen_server:cast(?MODULE, {invalid_fetched_data, Peer}). gossiped_data(Peer, Data) -> - gossiped_data(Peer, Data, ok). + gossiped_data(Peer, Data, ok). gossiped_data(Peer, Data, ok) -> - gen_server:cast(?MODULE, { - gossiped_data, Peer, Data - }); + gen_server:cast(?MODULE, { + gossiped_data, Peer, Data + }); gossiped_data(_Peer, _Data, _ValidationStatus) -> - %% Ignore skipped or invalid blocks for now (consistent with old behavior, but may need to - %% be revisited) - ok. + %% Ignore skipped or invalid blocks for now (consistent with old behavior, but may need to + %% be revisited) + ok. %% @doc Print statistics about the current peers. stats() -> - Connected = get_peers(), - io:format("Connected peers, in preference order:~n"), - stats(Connected), - io:format("Other known peers:~n"), - All = ets:foldl( - fun - ({{peer, Peer}, _}, Acc) -> [Peer | Acc]; - (_, Acc) -> Acc - end, - [], - ?MODULE - ), - stats(All -- Connected). + Connected = get_peers(), + io:format("Connected peers, in preference order:~n"), + stats(Connected), + io:format("Other known peers:~n"), + All = ets:foldl( + fun + ({{peer, Peer}, _}, Acc) -> [Peer | Acc]; + (_, Acc) -> Acc + end, + [], + ?MODULE + ), + stats(All -- Connected). stats(Peers) -> - lists:foreach( - fun(Peer) -> format_stats(Peer, get_or_init_performance(Peer)) end, - Peers - ). + lists:foreach( + fun(Peer) -> format_stats(Peer, get_or_init_performance(Peer)) end, + Peers + ). discover_peers() -> - case ets:lookup(?MODULE, peers) of - [] -> - ok; - [{_, []}] -> - ok; - [{_, Peers}] -> - Peer = ar_util:pick_random(Peers), - discover_peers(get_peer_peers(Peer)) - end. + case ets:lookup(?MODULE, peers) of + [] -> + ok; + [{_, []}] -> + ok; + [{_, Peers}] -> + Peer = ar_util:pick_random(Peers), + discover_peers(get_peer_peers(Peer)) + end. %% @doc Resolve the domain name of the given peer (if the given peer is an IP address) %% and cache it. Return {ok, Peer} | {error, Reason}. resolve_and_cache_peer(RawPeer, Type) -> - case ar_util:safe_parse_peer(RawPeer) of - {ok, Peer} -> - case ets:lookup(?MODULE, {raw_peer, RawPeer}) of - [] -> - ets:insert(?MODULE, {{raw_peer, RawPeer}, Peer}), - ets:insert(?MODULE, {{Type, Peer}, RawPeer}); - [{_, Peer}] -> - ok; - [{_, PreviousPeer}] -> - %% This peer is configured with a domain name rather than IP address, - %% and the IP underlying the domain name has changed. - ets:delete(?MODULE, {Type, PreviousPeer}), - ets:insert(?MODULE, {{raw_peer, RawPeer}, Peer}), - ets:insert(?MODULE, {{Type, Peer}, RawPeer}) - end, - {ok, Peer}; - Error -> - Error - end. + case ar_util:safe_parse_peer(RawPeer) of + {ok, Peer} -> + case ets:lookup(?MODULE, {raw_peer, RawPeer}) of + [] -> + ets:insert(?MODULE, {{raw_peer, RawPeer}, Peer}), + ets:insert(?MODULE, {{Type, Peer}, RawPeer}); + [{_, Peer}] -> + ok; + [{_, PreviousPeer}] -> + %% This peer is configured with a domain name rather than IP address, + %% and the IP underlying the domain name has changed. + ets:delete(?MODULE, {Type, PreviousPeer}), + ets:insert(?MODULE, {{raw_peer, RawPeer}, Peer}), + ets:insert(?MODULE, {{Type, Peer}, RawPeer}) + end, + {ok, Peer}; + Error -> + Error + end. %%%=================================================================== %%% Generic server callbacks. %%%=================================================================== init([]) -> - process_flag(trap_exit, true), - [ok, ok] = ar_events:subscribe([peer, block]), - load_peers(), - gen_server:cast(?MODULE, rank_peers), - gen_server:cast(?MODULE, ping_peers), - timer:apply_interval(?GET_MORE_PEERS_FREQUENCY_MS, ?MODULE, discover_peers, []), - {ok, #state{}}. + process_flag(trap_exit, true), + [ok, ok] = ar_events:subscribe([peer, block]), + load_peers(), + gen_server:cast(?MODULE, rank_peers), + gen_server:cast(?MODULE, ping_peers), + timer:apply_interval(?GET_MORE_PEERS_FREQUENCY_MS, ?MODULE, discover_peers, []), + {ok, #state{}}. handle_call(Request, _From, State) -> - ?LOG_WARNING("event: unhandled_call, request: ~p", [Request]), - {reply, ok, State}. + ?LOG_WARNING("event: unhandled_call, request: ~p", [Request]), + {reply, ok, State}. handle_cast({add_peer, Peer, Release}, State) -> - may_be_rotate_peer_ports(Peer), - case ets:lookup(?MODULE, {peer, Peer}) of - [{_, #performance{release = Release}}] -> - ok; - [{_, Performance}] -> - set_performance(Peer, Performance#performance{release = Release}); - [] -> - set_performance(Peer, #performance{release = Release}) - end, - {noreply, State}; + may_be_rotate_peer_ports(Peer), + case ets:lookup(?MODULE, {peer, Peer}) of + [{_, #performance{release = Release}}] -> + ok; + [{_, Performance}] -> + set_performance(Peer, Performance#performance{release = Release}); + [] -> + set_performance(Peer, #performance{release = Release}) + end, + {noreply, State}; handle_cast(rank_peers, State) -> - Total = get_total_rating(), - Peers = - ets:foldl( - fun - ({{peer, Peer}, Performance}, Acc) -> - %% Bigger score increases the chances to end up on the top - %% of the peer list, but at the same time the ranking is - %% probabilistic to always give everyone a chance to improve - %% in the competition (i.e., reduce the advantage gained by - %% being the first to earn a reputation). - Score = - rand:uniform() * Performance#performance.rating / - (Total + 0.0001), - [{Peer, Score} | Acc]; - (_, Acc) -> - Acc - end, - [], - ?MODULE - ), - prometheus_gauge:set(arweave_peer_count, length(Peers)), - ets:insert(?MODULE, {peers, lists:sublist(rank_peers(Peers), ?MAX_PEERS)}), - ar_util:cast_after(?RANK_PEERS_FREQUENCY_MS, ?MODULE, rank_peers), - stats(), - {noreply, State}; + Total = get_total_rating(), + Peers = + ets:foldl( + fun + ({{peer, Peer}, Performance}, Acc) -> + %% Bigger score increases the chances to end up on the top + %% of the peer list, but at the same time the ranking is + %% probabilistic to always give everyone a chance to improve + %% in the competition (i.e., reduce the advantage gained by + %% being the first to earn a reputation). + Score = + rand:uniform() * Performance#performance.rating / + (Total + 0.0001), + [{Peer, Score} | Acc]; + (_, Acc) -> + Acc + end, + [], + ?MODULE + ), + prometheus_gauge:set(arweave_peer_count, length(Peers)), + ets:insert(?MODULE, {peers, lists:sublist(rank_peers(Peers), ?MAX_PEERS)}), + ar_util:cast_after(?RANK_PEERS_FREQUENCY_MS, ?MODULE, rank_peers), + stats(), + {noreply, State}; handle_cast(ping_peers, State) -> - [{peers, Peers}] = ets:lookup(?MODULE, peers), - ping_peers(lists:sublist(Peers, 100)), - {noreply, State}; + [{peers, Peers}] = ets:lookup(?MODULE, peers), + ping_peers(lists:sublist(Peers, 100)), + {noreply, State}; handle_cast({rate_response, Peer, PathLabel, get, Status}, State) -> - case Status of - "success" -> - update_rating(Peer, ?RATE_SUCCESS); - "redirection" -> - %% don't update rating - ok; - "client-error" -> - %% don't update rating - ok; - _ -> - update_rating(Peer, ?RATE_ERROR) - end, - ?LOG_DEBUG([ - {event, update_rating}, - {update_type, response}, - {path, PathLabel}, - {status, Status}, - {peer, ar_util:format_peer(Peer)} - ]), - {noreply, State}; + case Status of + "success" -> + update_rating(Peer, ?RATE_SUCCESS); + "redirection" -> + %% don't update rating + ok; + "client-error" -> + %% don't update rating + ok; + _ -> + update_rating(Peer, ?RATE_ERROR) + end, + ?LOG_DEBUG([ + {event, update_rating}, + {update_type, response}, + {path, PathLabel}, + {status, Status}, + {peer, ar_util:format_peer(Peer)} + ]), + {noreply, State}; handle_cast({invalid_fetched_data, Peer}, State) -> - ?LOG_DEBUG([ - {event, update_rating}, - {update_type, invalid_fetched_data}, - {peer, ar_util:format_peer(Peer)} - ]), - %% log 2 failures - first is to reverse the success that was previously recorded by end_request - %% (since end_request only considers whether or not the HTTP request was successful and does not - %% consider the validity of the data it may be overly permissive), and the second is to - %% penalize the peer for serving invalid data. - %% Note: this is an approximation as due to the nature of the EMA this won't exactly reverse - %% the prior success. - update_rating(Peer, false), - update_rating(Peer, false), - {noreply, State}; + ?LOG_DEBUG([ + {event, update_rating}, + {update_type, invalid_fetched_data}, + {peer, ar_util:format_peer(Peer)} + ]), + %% log 2 failures - first is to reverse the success that was previously recorded by end_request + %% (since end_request only considers whether or not the HTTP request was successful and does not + %% consider the validity of the data it may be overly permissive), and the second is to + %% penalize the peer for serving invalid data. + %% Note: this is an approximation as due to the nature of the EMA this won't exactly reverse + %% the prior success. + update_rating(Peer, false), + update_rating(Peer, false), + {noreply, State}; handle_cast({gossiped_data, Peer, Data}, State) -> - case check_external_peer(Peer) of - ok -> - ?LOG_DEBUG([ - {event, update_rating}, - {update_type, gossiped_data}, - {peer, ar_util:format_peer(Peer)} - ]), - update_rating(Peer, true); - _ -> - ok - end, - - {noreply, State}; + case check_external_peer(Peer) of + ok -> + ?LOG_DEBUG([ + {event, update_rating}, + {update_type, gossiped_data}, + {peer, ar_util:format_peer(Peer)} + ]), + update_rating(Peer, true); + _ -> + ok + end, + + {noreply, State}; handle_cast(Cast, State) -> - ?LOG_WARNING("event: unhandled_cast, cast: ~p", [Cast]), - {noreply, State}. + ?LOG_WARNING("event: unhandled_cast, cast: ~p", [Cast]), + {noreply, State}. handle_info({event, peer, {made_request, Peer, Release}}, State) -> - may_be_rotate_peer_ports(Peer), - case ets:lookup(?MODULE, {peer, Peer}) of - [{_, #performance{release = Release}}] -> - ok; - [{_, Performance}] -> - set_performance(Peer, Performance#performance{release = Release}); - [] -> - case check_external_peer(Peer) of - ok -> - set_performance(Peer, #performance{release = Release}); - _ -> - ok - end - end, - {noreply, State}; + may_be_rotate_peer_ports(Peer), + case ets:lookup(?MODULE, {peer, Peer}) of + [{_, #performance{release = Release}}] -> + ok; + [{_, Performance}] -> + set_performance(Peer, Performance#performance{release = Release}); + [] -> + case check_external_peer(Peer) of + ok -> + set_performance(Peer, #performance{release = Release}); + _ -> + ok + end + end, + {noreply, State}; handle_info({event, peer, {fetched_tx, Peer, TimeDelta, Size}}, State) -> - % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - % update_rating(Peer, TimeDelta, Size), - {noreply, State}; + % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), + % update_rating(Peer, TimeDelta, Size), + {noreply, State}; handle_info({event, peer, {fetched_block, Peer, TimeDelta, Size}}, State) -> - % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - % update_rating(Peer, TimeDelta, Size), - {noreply, State}; + % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), + % update_rating(Peer, TimeDelta, Size), + {noreply, State}; handle_info({event, peer, {bad_response, {Peer, _Type, _Reason}}}, State) -> - issue_warning(Peer), - {noreply, State}; + issue_warning(Peer), + {noreply, State}; handle_info({event, peer, {banned, BannedPeer}}, State) -> - remove_peer(BannedPeer), - {noreply, State}; + remove_peer(BannedPeer), + {noreply, State}; handle_info({event, block, {rejected, failed_to_fetch_first_chunk, _H, Peer}}, State) -> - issue_warning(Peer), - {noreply, State}; + issue_warning(Peer), + {noreply, State}; handle_info({event, block, {rejected, failed_to_fetch_second_chunk, _H, Peer}}, State) -> - issue_warning(Peer), - {noreply, State}; + issue_warning(Peer), + {noreply, State}; handle_info({event, block, {rejected, failed_to_fetch_chunk, _H, Peer}}, State) -> - issue_warning(Peer), - {noreply, State}; + issue_warning(Peer), + {noreply, State}; handle_info({event, block, _}, State) -> - {noreply, State}; + {noreply, State}; handle_info({'EXIT', _, normal}, State) -> - {noreply, State}; + {noreply, State}; handle_info(Message, State) -> - ?LOG_WARNING("event: unhandled_info, message: ~p", [Message]), - {noreply, State}. + ?LOG_WARNING("event: unhandled_info, message: ~p", [Message]), + {noreply, State}. terminate(_Reason, _State) -> - store_peers(). + store_peers(). %%%=================================================================== %%% Private functions. %%%=================================================================== get_peer_peers(Peer) -> - case ar_http_iface_client:get_peers(Peer) of - unavailable -> []; - Peers -> Peers - end. + case ar_http_iface_client:get_peers(Peer) of + unavailable -> []; + Peers -> Peers + end. get_or_init_performance(Peer) -> - get_or_init_performance(Peer, overall). + get_or_init_performance(Peer, overall). get_or_init_performance(Peer, Metric) -> - case ets:lookup(?MODULE, {peer, Peer, Metric}) of - [] -> - #performance{}; - [{_, Performance}] -> - Performance - end. + case ets:lookup(?MODULE, {peer, Peer, Metric}) of + [] -> + #performance{}; + [{_, Performance}] -> + Performance + end. set_performance(Peer, Performance) -> - set_performance(Peer, overall, Performance). + set_performance(Peer, overall, Performance). set_performance(Peer, Metric, Performance) -> - ets:insert(?MODULE, [{{peer, Peer, Metric}, Performance}]). + ets:insert(?MODULE, [{{peer, Peer, Metric}, Performance}]). get_total_rating() -> - get_total_rating(overall). + get_total_rating(overall). get_total_rating(Metric) -> - case ets:lookup(?MODULE, {rating_total, Metric}) of - [] -> - 0; - [{_, Total}] -> - Total - end. + case ets:lookup(?MODULE, {rating_total, Metric}) of + [] -> + 0; + [{_, Total}] -> + Total + end. set_total_rating(Total) -> - set_total_rating(overall, Total). + set_total_rating(overall, Total). set_total_rating(Metric, Total) -> - ets:insert(?MODULE, {{rating_total, Metric}, Total}). + ets:insert(?MODULE, {{rating_total, Metric}, Total}). discover_peers([]) -> - ok; + ok; discover_peers([Peer | Peers]) -> - case ets:member(?MODULE, {peer, Peer}) of - true -> - ok; - false -> - IsPublic = is_public_peer(Peer), - IsBanned = ar_blacklist_middleware:is_peer_banned(Peer) == banned, - IsBlacklisted = lists:member(Peer, ?PEER_PERMANENT_BLACKLIST), - case IsPublic andalso not IsBanned andalso not IsBlacklisted of - false -> - ok; - true -> - case ar_http_iface_client:get_info(Peer, release) of - {<<"release">>, Release} when is_integer(Release) -> - gen_server:cast(?MODULE, {add_peer, Peer, Release}); - _ -> - ok - end - end - end, - discover_peers(Peers). + case ets:member(?MODULE, {peer, Peer}) of + true -> + ok; + false -> + IsPublic = is_public_peer(Peer), + IsBanned = ar_blacklist_middleware:is_peer_banned(Peer) == banned, + IsBlacklisted = lists:member(Peer, ?PEER_PERMANENT_BLACKLIST), + case IsPublic andalso not IsBanned andalso not IsBlacklisted of + false -> + ok; + true -> + case ar_http_iface_client:get_info(Peer, release) of + {<<"release">>, Release} when is_integer(Release) -> + gen_server:cast(?MODULE, {add_peer, Peer, Release}); + _ -> + ok + end + end + end, + discover_peers(Peers). format_stats(Peer, Perf) -> - KB = Perf#performance.bytes / 1024, - io:format( - "\t~s ~.2f kB/s (~.2f kB, ~B latency, ~.2f success, ~p transfers)~n", - [ - string:pad(ar_util:format_peer(Peer), 21, trailing, $\s), - float(Perf#performance.rating), - KB, - trunc(Perf#performance.latency), - Perf#performance.success, - Perf#performance.transfers - ] - ). + KB = Perf#performance.bytes / 1024, + io:format( + "\t~s ~.2f kB/s (~.2f kB, ~B latency, ~.2f success, ~p transfers)~n", + [ + string:pad(ar_util:format_peer(Peer), 21, trailing, $\s), + float(Perf#performance.rating), + KB, + trunc(Perf#performance.latency), + Perf#performance.success, + Perf#performance.transfers + ] + ). read_peer_records() -> PeerRecords = case ar_storage:read_term(peers) of - not_found -> - ok; - {ok, {_TotalRating, Records}} -> + not_found -> + ok; + {ok, {_TotalRating, Records}} -> %% Legacy format included the TotalRating, but since we always recalculate it when %% loading the peers, we've dropped it from the saved format. Records; @@ -543,11 +541,11 @@ load_peers() -> ar:console("Polled saved peers.~n"). load_peers(PeerRecords) when length(PeerRecords) < 20 -> - ar_util:pmap(fun load_peer/1, PeerRecords); + ar_util:pmap(fun load_peer/1, PeerRecords); load_peers(PeerRecords) -> - {PeerRecords2, PeerRecords3} = lists:split(20, PeerRecords), - ar_util:pmap(fun load_peer/1, PeerRecords2), - load_peers(PeerRecords3). + {PeerRecords2, PeerRecords3} = lists:split(20, PeerRecords), + ar_util:pmap(fun load_peer/1, PeerRecords2), + load_peers(PeerRecords3). load_peer({Peer, Performance}) -> load_peer({Peer, overall, Performance}); @@ -602,79 +600,79 @@ load_totals() -> ). may_be_rotate_peer_ports(Peer) -> - {IP, Port} = get_ip_port(Peer), - case ets:lookup(?MODULE, {peer_ip, IP}) of - [] -> - ets:insert( - ?MODULE, {{peer_ip, IP}, {erlang:setelement(1, ?DEFAULT_PEER_PORT_MAP, Port), 1}} - ); - [{_, {PortMap, Position}}] -> - case is_in_port_map(Port, PortMap) of - {true, _} -> - ok; - false -> - MaxSize = erlang:size(?DEFAULT_PEER_PORT_MAP), - case Position < MaxSize of - true -> - ets:insert( - ?MODULE, - {{peer_ip, IP}, { - erlang:setelement(Position + 1, PortMap, Port), Position + 1 - }} - ); - false -> - RemovedPeer = construct_peer(IP, element(1, PortMap)), - PortMap2 = shift_port_map_left(PortMap), - PortMap3 = erlang:setelement(MaxSize, PortMap2, Port), - ets:insert(?MODULE, {{peer_ip, IP}, {PortMap3, MaxSize}}), - remove_peer(RemovedPeer) - end - end - end. + {IP, Port} = get_ip_port(Peer), + case ets:lookup(?MODULE, {peer_ip, IP}) of + [] -> + ets:insert( + ?MODULE, {{peer_ip, IP}, {erlang:setelement(1, ?DEFAULT_PEER_PORT_MAP, Port), 1}} + ); + [{_, {PortMap, Position}}] -> + case is_in_port_map(Port, PortMap) of + {true, _} -> + ok; + false -> + MaxSize = erlang:size(?DEFAULT_PEER_PORT_MAP), + case Position < MaxSize of + true -> + ets:insert( + ?MODULE, + {{peer_ip, IP}, { + erlang:setelement(Position + 1, PortMap, Port), Position + 1 + }} + ); + false -> + RemovedPeer = construct_peer(IP, element(1, PortMap)), + PortMap2 = shift_port_map_left(PortMap), + PortMap3 = erlang:setelement(MaxSize, PortMap2, Port), + ets:insert(?MODULE, {{peer_ip, IP}, {PortMap3, MaxSize}}), + remove_peer(RemovedPeer) + end + end + end. get_ip_port({A, B, C, D, Port}) -> - {{A, B, C, D}, Port}. + {{A, B, C, D}, Port}. construct_peer({A, B, C, D}, Port) -> - {A, B, C, D, Port}. + {A, B, C, D, Port}. is_in_port_map(Port, PortMap) -> - is_in_port_map(Port, PortMap, erlang:size(PortMap), 1). + is_in_port_map(Port, PortMap, erlang:size(PortMap), 1). is_in_port_map(_Port, _PortMap, Max, N) when N > Max -> - false; + false; is_in_port_map(Port, PortMap, Max, N) -> - case element(N, PortMap) == Port of - true -> - {true, N}; - false -> - is_in_port_map(Port, PortMap, Max, N + 1) - end. + case element(N, PortMap) == Port of + true -> + {true, N}; + false -> + is_in_port_map(Port, PortMap, Max, N + 1) + end. shift_port_map_left(PortMap) -> - shift_port_map_left(PortMap, erlang:size(PortMap), 1). + shift_port_map_left(PortMap, erlang:size(PortMap), 1). shift_port_map_left(PortMap, Max, N) when N == Max -> - erlang:setelement(N, PortMap, empty_slot); + erlang:setelement(N, PortMap, empty_slot); shift_port_map_left(PortMap, Max, N) -> - PortMap2 = erlang:setelement(N, PortMap, element(N + 1, PortMap)), - shift_port_map_left(PortMap2, Max, N + 1). + PortMap2 = erlang:setelement(N, PortMap, element(N + 1, PortMap)), + shift_port_map_left(PortMap2, Max, N + 1). ping_peers(Peers) when length(Peers) < 100 -> - ar_util:pmap(fun ar_http_iface_client:add_peer/1, Peers); + ar_util:pmap(fun ar_http_iface_client:add_peer/1, Peers); ping_peers(Peers) -> - {Send, Rest} = lists:split(100, Peers), - ar_util:pmap(fun ar_http_iface_client:add_peer/1, Send), - ping_peers(Rest). + {Send, Rest} = lists:split(100, Peers), + ar_util:pmap(fun ar_http_iface_client:add_peer/1, Send), + ping_peers(Rest). -ifdef(DEBUG). %% Do not filter out loopback IP addresses with custom port in the debug mode %% to allow multiple local VMs to peer with each other. is_loopback_ip({127, _, _, _, Port}) -> - {ok, Config} = application:get_env(arweave, config), - Port == Config#config.port; + {ok, Config} = application:get_env(arweave, config), + Port == Config#config.port; is_loopback_ip({_, _, _, _, _}) -> - false. + false. -else. %% @doc Is the IP address in question a loopback ('us') address? is_loopback_ip({A, B, C, D, _Port}) -> is_loopback_ip({A, B, C, D}); @@ -687,166 +685,166 @@ is_loopback_ip({_, _, _, _}) -> false. %% @doc Return a ranked list of peers. rank_peers(ScoredPeers) -> - SortedReversed = lists:reverse( - lists:sort(fun({_, S1}, {_, S2}) -> S1 >= S2 end, ScoredPeers) - ), - GroupedBySubnet = - lists:foldl( - fun({{A, B, _C, _D, _Port}, _Score} = Peer, Acc) -> - maps:update_with({A, B}, fun(L) -> [Peer | L] end, [Peer], Acc) - end, - #{}, - SortedReversed - ), - ScoredSubnetPeers = - maps:fold( - fun(_Subnet, SubnetPeers, Acc) -> - element( - 2, - lists:foldl( - fun({Peer, Score}, {N, Acc2}) -> - %% At first we take the best peer from every subnet, - %% then take the second best from every subnet, etc. - {N + 1, [{Peer, {-N, Score}} | Acc2]} - end, - {0, Acc}, - SubnetPeers - ) - ) - end, - [], - GroupedBySubnet - ), - [ - Peer - || {Peer, _} <- lists:sort( - fun({_, S1}, {_, S2}) -> S1 >= S2 end, - ScoredSubnetPeers - ) - ]. + SortedReversed = lists:reverse( + lists:sort(fun({_, S1}, {_, S2}) -> S1 >= S2 end, ScoredPeers) + ), + GroupedBySubnet = + lists:foldl( + fun({{A, B, _C, _D, _Port}, _Score} = Peer, Acc) -> + maps:update_with({A, B}, fun(L) -> [Peer | L] end, [Peer], Acc) + end, + #{}, + SortedReversed + ), + ScoredSubnetPeers = + maps:fold( + fun(_Subnet, SubnetPeers, Acc) -> + element( + 2, + lists:foldl( + fun({Peer, Score}, {N, Acc2}) -> + %% At first we take the best peer from every subnet, + %% then take the second best from every subnet, etc. + {N + 1, [{Peer, {-N, Score}} | Acc2]} + end, + {0, Acc}, + SubnetPeers + ) + ) + end, + [], + GroupedBySubnet + ), + [ + Peer + || {Peer, _} <- lists:sort( + fun({_, S1}, {_, S2}) -> S1 >= S2 end, + ScoredSubnetPeers + ) + ]. check_external_peer(Peer) -> - IsLoopbackIP = is_loopback_ip(Peer), - IsBlacklisted = lists:member(Peer, ?PEER_PERMANENT_BLACKLIST), - IsBanned = ar_blacklist_middleware:is_peer_banned(Peer) == banned, - case {IsLoopbackIP, IsBlacklisted, IsBanned} of - {true, _, _} -> - reject; - {_, true, _} -> - reject; - {_, _, true} -> - reject; - _ -> - ok - end. + IsLoopbackIP = is_loopback_ip(Peer), + IsBlacklisted = lists:member(Peer, ?PEER_PERMANENT_BLACKLIST), + IsBanned = ar_blacklist_middleware:is_peer_banned(Peer) == banned, + case {IsLoopbackIP, IsBlacklisted, IsBanned} of + {true, _, _} -> + reject; + {_, true, _} -> + reject; + {_, _, true} -> + reject; + _ -> + ok + end. update_rating(Peer, IsSuccess) -> - update_rating(Peer, [], IsSuccess). + update_rating(Peer, [], IsSuccess). update_rating(Peer, AdditionalMetrics, IsSuccess) -> - Performance = get_or_init_performance(Peer), - %% Pass in the current latency and bytes values in order to hold them constant. - %% Only the success average should be updated. - update_rating( - Peer, - AdditionalMetrics, - Performance#performance.latency, - Performance#performance.bytes, - IsSuccess - ). + Performance = get_or_init_performance(Peer), + %% Pass in the current latency and bytes values in order to hold them constant. + %% Only the success average should be updated. + update_rating( + Peer, + AdditionalMetrics, + Performance#performance.latency, + Performance#performance.bytes, + IsSuccess + ). update_rating(Peer, LatencyMicroseconds, Size, IsSuccess) -> - update_rating(Peer, [], LatencyMicroseconds, Size, IsSuccess). + update_rating(Peer, [], LatencyMicroseconds, Size, IsSuccess). update_rating(Peer, AdditionalMetrics, LatencyMicroseconds, Size, IsSuccess) -> - %% Update the 'overall' metric plus any additional metrics specified. - lists:foreach( - fun(Metric) -> - update_performance(Peer, Metric, LatencyMicroseconds, Size, IsSuccess) - end, - [overall | AdditionalMetrics] - ). + %% Update the 'overall' metric plus any additional metrics specified. + lists:foreach( + fun(Metric) -> + update_performance(Peer, Metric, LatencyMicroseconds, Size, IsSuccess) + end, + [overall | AdditionalMetrics] + ). update_performance(Peer, Metric, LatencyMicroseconds, Size, IsSuccess) -> - %% only update available metrics - true = lists:member(Metric, ?AVAILABLE_METRICS), - Performance = get_or_init_performance(Peer, Metric), - Total = get_total_rating(Metric), - #performance{ - bytes = Bytes, - latency = Latency, - success = Success, - rating = Rating, - transfers = N - } = Performance, - Bytes2 = calculate_ema(Bytes, Size, ?THROUGHPUT_ALPHA), - Latency2 = calculate_ema(Latency, LatencyMicroseconds / 1000, ?THROUGHPUT_ALPHA), - Success2 = calculate_ema(Success, ar_util:bool_to_int(IsSuccess), ?SUCCESS_ALPHA), - Rating2 = (Bytes2 / Latency2) * Success2, - Performance2 = Performance#performance{ - bytes = Bytes2, - latency = Latency2, - success = Success2, - rating = Rating2, - transfers = N + 1 - }, - Total2 = Total - Rating + Rating2, - may_be_rotate_peer_ports(Peer), - set_performance(Peer, Metric, Performance2), - set_total_rating(Metric, Total2). + %% only update available metrics + true = lists:member(Metric, ?AVAILABLE_METRICS), + Performance = get_or_init_performance(Peer, Metric), + Total = get_total_rating(Metric), + #performance{ + bytes = Bytes, + latency = Latency, + success = Success, + rating = Rating, + transfers = N + } = Performance, + Bytes2 = calculate_ema(Bytes, Size, ?THROUGHPUT_ALPHA), + Latency2 = calculate_ema(Latency, LatencyMicroseconds / 1000, ?THROUGHPUT_ALPHA), + Success2 = calculate_ema(Success, ar_util:bool_to_int(IsSuccess), ?SUCCESS_ALPHA), + Rating2 = (Bytes2 / Latency2) * Success2, + Performance2 = Performance#performance{ + bytes = Bytes2, + latency = Latency2, + success = Success2, + rating = Rating2, + transfers = N + 1 + }, + Total2 = Total - Rating + Rating2, + may_be_rotate_peer_ports(Peer), + set_performance(Peer, Metric, Performance2), + set_total_rating(Metric, Total2). calculate_ema(OldEMA, Value, Alpha) -> - Alpha * Value + (1 - Alpha) * OldEMA. + Alpha * Value + (1 - Alpha) * OldEMA. remove_peer(RemovedPeer) -> - ?LOG_DEBUG([ - {event, remove_peer}, - {peer, ar_util:format_peer(RemovedPeer)} - ]), - lists:foreach( - fun(Metric) -> - Performance = get_or_init_performance(RemovedPeer, Metric), - Total = get_total_rating(Metric), - set_total_rating(Metric, Total - Performance#performance.rating), - ets:delete(?MODULE, {peer, RemovedPeer, Metric}) - end, - ?AVAILABLE_METRICS - ), - remove_peer_port(RemovedPeer). + ?LOG_DEBUG([ + {event, remove_peer}, + {peer, ar_util:format_peer(RemovedPeer)} + ]), + lists:foreach( + fun(Metric) -> + Performance = get_or_init_performance(RemovedPeer, Metric), + Total = get_total_rating(Metric), + set_total_rating(Metric, Total - Performance#performance.rating), + ets:delete(?MODULE, {peer, RemovedPeer, Metric}) + end, + ?AVAILABLE_METRICS + ), + remove_peer_port(RemovedPeer). remove_peer_port(Peer) -> - {IP, Port} = get_ip_port(Peer), - case ets:lookup(?MODULE, {peer_ip, IP}) of - [] -> - ok; - [{_, {PortMap, Position}}] -> - case is_in_port_map(Port, PortMap) of - false -> - ok; - {true, N} -> - PortMap2 = erlang:setelement(N, PortMap, empty_slot), - case is_port_map_empty(PortMap2) of - true -> - ets:delete(?MODULE, {peer_ip, IP}); - false -> - ets:insert(?MODULE, {{peer_ip, IP}, {PortMap2, Position}}) - end - end - end. + {IP, Port} = get_ip_port(Peer), + case ets:lookup(?MODULE, {peer_ip, IP}) of + [] -> + ok; + [{_, {PortMap, Position}}] -> + case is_in_port_map(Port, PortMap) of + false -> + ok; + {true, N} -> + PortMap2 = erlang:setelement(N, PortMap, empty_slot), + case is_port_map_empty(PortMap2) of + true -> + ets:delete(?MODULE, {peer_ip, IP}); + false -> + ets:insert(?MODULE, {{peer_ip, IP}, {PortMap2, Position}}) + end + end + end. is_port_map_empty(PortMap) -> - is_port_map_empty(PortMap, erlang:size(PortMap), 1). + is_port_map_empty(PortMap, erlang:size(PortMap), 1). is_port_map_empty(_PortMap, Max, N) when N > Max -> - true; + true; is_port_map_empty(PortMap, Max, N) -> - case element(N, PortMap) of - empty_slot -> - is_port_map_empty(PortMap, Max, N + 1); - _ -> - false - end. + case element(N, PortMap) of + empty_slot -> + is_port_map_empty(PortMap, Max, N + 1); + _ -> + false + end. store_peers() -> - Records = + Records = ets:foldl( fun ({{peer, Peer, Metric}, Performance}, Acc) -> @@ -857,104 +855,104 @@ store_peers() -> [], ?MODULE ), - case Records of - [] -> - ok; - _ -> - ar_storage:write_term(peers, Records) - end. + case Records of + [] -> + ok; + _ -> + ar_storage:write_term(peers, Records) + end. issue_warning(Peer) -> - Performance = get_or_init_performance(Peer), - Success = calculate_ema(Performance#performance.success, 0, ?SUCCESS_ALPHA), - case Success < ?MINIMUM_SUCCESS of - true -> - remove_peer(Peer); - false -> - Performance2 = Performance#performance{success = Success}, - may_be_rotate_peer_ports(Peer), - set_performance(Peer, Performance2) - end. + Performance = get_or_init_performance(Peer), + Success = calculate_ema(Performance#performance.success, 0, ?SUCCESS_ALPHA), + case Success < ?MINIMUM_SUCCESS of + true -> + remove_peer(Peer); + false -> + Performance2 = Performance#performance{success = Success}, + may_be_rotate_peer_ports(Peer), + set_performance(Peer, Performance2) + end. %%%=================================================================== %%% Tests. %%%=================================================================== rotate_peer_ports_test() -> - Peer = {2, 2, 2, 2, 1}, - may_be_rotate_peer_ports(Peer), - [{_, {PortMap, 1}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(1, element(1, PortMap)), - remove_peer(Peer), - ?assertEqual([], ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}})), - may_be_rotate_peer_ports(Peer), - Peer2 = {2, 2, 2, 2, 2}, - may_be_rotate_peer_ports(Peer2), - [{_, {PortMap2, 2}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(1, element(1, PortMap2)), - ?assertEqual(2, element(2, PortMap2)), - remove_peer(Peer), - [{_, {PortMap3, 2}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(empty_slot, element(1, PortMap3)), - ?assertEqual(2, element(2, PortMap3)), - Peer3 = {2, 2, 2, 2, 3}, - Peer4 = {2, 2, 2, 2, 4}, - Peer5 = {2, 2, 2, 2, 5}, - Peer6 = {2, 2, 2, 2, 6}, - Peer7 = {2, 2, 2, 2, 7}, - Peer8 = {2, 2, 2, 2, 8}, - Peer9 = {2, 2, 2, 2, 9}, - Peer10 = {2, 2, 2, 2, 10}, - Peer11 = {2, 2, 2, 2, 11}, - may_be_rotate_peer_ports(Peer3), - may_be_rotate_peer_ports(Peer4), - may_be_rotate_peer_ports(Peer5), - may_be_rotate_peer_ports(Peer6), - may_be_rotate_peer_ports(Peer7), - may_be_rotate_peer_ports(Peer8), - may_be_rotate_peer_ports(Peer9), - may_be_rotate_peer_ports(Peer10), - [{_, {PortMap4, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(empty_slot, element(1, PortMap4)), - ?assertEqual(2, element(2, PortMap4)), - ?assertEqual(10, element(10, PortMap4)), - may_be_rotate_peer_ports(Peer8), - may_be_rotate_peer_ports(Peer9), - may_be_rotate_peer_ports(Peer10), - [{_, {PortMap5, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(empty_slot, element(1, PortMap5)), - ?assertEqual(2, element(2, PortMap5)), - ?assertEqual(3, element(3, PortMap5)), - ?assertEqual(9, element(9, PortMap5)), - ?assertEqual(10, element(10, PortMap5)), - may_be_rotate_peer_ports(Peer11), - [{_, {PortMap6, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(element(2, PortMap5), element(1, PortMap6)), - ?assertEqual(3, element(2, PortMap6)), - ?assertEqual(4, element(3, PortMap6)), - ?assertEqual(5, element(4, PortMap6)), - ?assertEqual(11, element(10, PortMap6)), - may_be_rotate_peer_ports(Peer11), - [{_, {PortMap7, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(element(2, PortMap5), element(1, PortMap7)), - ?assertEqual(3, element(2, PortMap7)), - ?assertEqual(4, element(3, PortMap7)), - ?assertEqual(5, element(4, PortMap7)), - ?assertEqual(11, element(10, PortMap7)), - remove_peer(Peer4), - [{_, {PortMap8, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(empty_slot, element(3, PortMap8)), - ?assertEqual(3, element(2, PortMap8)), - ?assertEqual(5, element(4, PortMap8)), - remove_peer(Peer2), - remove_peer(Peer3), - remove_peer(Peer5), - remove_peer(Peer6), - remove_peer(Peer7), - remove_peer(Peer8), - remove_peer(Peer9), - remove_peer(Peer10), - [{_, {PortMap9, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), - ?assertEqual(11, element(10, PortMap9)), - remove_peer(Peer11), - ?assertEqual([], ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}})). + Peer = {2, 2, 2, 2, 1}, + may_be_rotate_peer_ports(Peer), + [{_, {PortMap, 1}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(1, element(1, PortMap)), + remove_peer(Peer), + ?assertEqual([], ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}})), + may_be_rotate_peer_ports(Peer), + Peer2 = {2, 2, 2, 2, 2}, + may_be_rotate_peer_ports(Peer2), + [{_, {PortMap2, 2}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(1, element(1, PortMap2)), + ?assertEqual(2, element(2, PortMap2)), + remove_peer(Peer), + [{_, {PortMap3, 2}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(empty_slot, element(1, PortMap3)), + ?assertEqual(2, element(2, PortMap3)), + Peer3 = {2, 2, 2, 2, 3}, + Peer4 = {2, 2, 2, 2, 4}, + Peer5 = {2, 2, 2, 2, 5}, + Peer6 = {2, 2, 2, 2, 6}, + Peer7 = {2, 2, 2, 2, 7}, + Peer8 = {2, 2, 2, 2, 8}, + Peer9 = {2, 2, 2, 2, 9}, + Peer10 = {2, 2, 2, 2, 10}, + Peer11 = {2, 2, 2, 2, 11}, + may_be_rotate_peer_ports(Peer3), + may_be_rotate_peer_ports(Peer4), + may_be_rotate_peer_ports(Peer5), + may_be_rotate_peer_ports(Peer6), + may_be_rotate_peer_ports(Peer7), + may_be_rotate_peer_ports(Peer8), + may_be_rotate_peer_ports(Peer9), + may_be_rotate_peer_ports(Peer10), + [{_, {PortMap4, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(empty_slot, element(1, PortMap4)), + ?assertEqual(2, element(2, PortMap4)), + ?assertEqual(10, element(10, PortMap4)), + may_be_rotate_peer_ports(Peer8), + may_be_rotate_peer_ports(Peer9), + may_be_rotate_peer_ports(Peer10), + [{_, {PortMap5, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(empty_slot, element(1, PortMap5)), + ?assertEqual(2, element(2, PortMap5)), + ?assertEqual(3, element(3, PortMap5)), + ?assertEqual(9, element(9, PortMap5)), + ?assertEqual(10, element(10, PortMap5)), + may_be_rotate_peer_ports(Peer11), + [{_, {PortMap6, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(element(2, PortMap5), element(1, PortMap6)), + ?assertEqual(3, element(2, PortMap6)), + ?assertEqual(4, element(3, PortMap6)), + ?assertEqual(5, element(4, PortMap6)), + ?assertEqual(11, element(10, PortMap6)), + may_be_rotate_peer_ports(Peer11), + [{_, {PortMap7, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(element(2, PortMap5), element(1, PortMap7)), + ?assertEqual(3, element(2, PortMap7)), + ?assertEqual(4, element(3, PortMap7)), + ?assertEqual(5, element(4, PortMap7)), + ?assertEqual(11, element(10, PortMap7)), + remove_peer(Peer4), + [{_, {PortMap8, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(empty_slot, element(3, PortMap8)), + ?assertEqual(3, element(2, PortMap8)), + ?assertEqual(5, element(4, PortMap8)), + remove_peer(Peer2), + remove_peer(Peer3), + remove_peer(Peer5), + remove_peer(Peer6), + remove_peer(Peer7), + remove_peer(Peer8), + remove_peer(Peer9), + remove_peer(Peer10), + [{_, {PortMap9, 10}}] = ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}}), + ?assertEqual(11, element(10, PortMap9)), + remove_peer(Peer11), + ?assertEqual([], ets:lookup(?MODULE, {peer_ip, {2, 2, 2, 2}})). From c8c1a1bfb5809994bac6c55276752c743281d0f3 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Wed, 5 Jul 2023 20:20:49 +0000 Subject: [PATCH 21/30] WIP --- apps/arweave/include/ar_peers.hrl | 4 + apps/arweave/src/ar_peers.erl | 123 +++++++++++++----------------- 2 files changed, 59 insertions(+), 68 deletions(-) diff --git a/apps/arweave/include/ar_peers.hrl b/apps/arweave/include/ar_peers.hrl index c583769a6..cb455419a 100644 --- a/apps/arweave/include/ar_peers.hrl +++ b/apps/arweave/include/ar_peers.hrl @@ -4,8 +4,12 @@ -include_lib("ar.hrl"). -define(STARTING_LATENCY_EMA, 1000). %% initial value to avoid over-weighting the first response +-define(RATE_SUCCESS, 1). +-define(RATE_ERROR, 0). +-define(RATE_PENALTY, -1). -define(AVAILABLE_METRICS, [overall, data_sync]). %% the performance metrics currently tracked +-define(AVAILABLE_SUCCESS_RATINGS, [?RATE_PENALTY, ?RATE_ERROR, ?RATE_SUCCESS]). -record(performance, { version = 3, diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 29c496234..f862965c3 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -42,9 +42,6 @@ -define(THROUGHPUT_ALPHA, 0.1). -define(SUCCESS_ALPHA, 0.01). --define(RATE_SUCCESS, 1). --define(RATE_ERROR, 0). --define(RATE_PENALTY, -1). %% We only do scoring of this many TCP ports per IP address. When there are not enough slots, %% we remove the peer from the first slot. @@ -80,8 +77,8 @@ get_trusted_peers() -> case Config#config.peers of [] -> ArweavePeers = ["sfo-1.na-west-1.arweave.net", "ams-1.eu-central-1.arweave.net", - "fra-1.eu-central-2.arweave.net", "blr-1.ap-central-1.arweave.net", - "sgp-1.ap-central-2.arweave.net" + "fra-1.eu-central-2.arweave.net", "blr-1.ap-central-1.arweave.net", + "sgp-1.ap-central-2.arweave.net" ], resolve_peers(ArweavePeers); Peers -> @@ -100,9 +97,8 @@ resolve_peers([RawPeer | Peers]) -> {ok, Peer} -> [Peer | resolve_peers(Peers)]; {error, invalid} -> - ?LOG_WARNING([ - {event, failed_to_resolve_trusted_peer}, - {peer, RawPeer} + ?LOG_WARNING([{event, failed_to_resolve_trusted_peer}, + {peer, RawPeer} ]), resolve_peers(Peers) end. @@ -179,7 +175,6 @@ rate_fetched_data(Peer, invalid) -> gossiped_data(Peer, Data) -> gossiped_data(Peer, Data, ok). - gossiped_data(Peer, Data, ok) -> gen_server:cast(?MODULE, { gossiped_data, Peer, Data @@ -272,6 +267,7 @@ handle_cast({add_peer, Peer, Release}, State) -> set_performance(Peer, #performance{release = Release}) end, {noreply, State}; + handle_cast(rank_peers, State) -> Total = get_total_rating(), Peers = @@ -298,14 +294,16 @@ handle_cast(rank_peers, State) -> ar_util:cast_after(?RANK_PEERS_FREQUENCY_MS, ?MODULE, rank_peers), stats(), {noreply, State}; + handle_cast(ping_peers, State) -> [{peers, Peers}] = ets:lookup(?MODULE, peers), ping_peers(lists:sublist(Peers, 100)), {noreply, State}; + handle_cast({rate_response, Peer, PathLabel, get, Status}, State) -> case Status of "success" -> - update_rating(Peer, ?RATE_SUCCESS); + update_rating(Peer, overall, ?RATE_SUCCESS); "redirection" -> %% don't update rating ok; @@ -313,31 +311,29 @@ handle_cast({rate_response, Peer, PathLabel, get, Status}, State) -> %% don't update rating ok; _ -> - update_rating(Peer, ?RATE_ERROR) + update_rating(Peer, overall, ?RATE_ERROR) end, ?LOG_DEBUG([ {event, update_rating}, {update_type, response}, + {metric, overall}, {path, PathLabel}, {status, Status}, {peer, ar_util:format_peer(Peer)} ]), {noreply, State}; + handle_cast({invalid_fetched_data, Peer}, State) -> ?LOG_DEBUG([ {event, update_rating}, {update_type, invalid_fetched_data}, {peer, ar_util:format_peer(Peer)} ]), - %% log 2 failures - first is to reverse the success that was previously recorded by end_request - %% (since end_request only considers whether or not the HTTP request was successful and does not - %% consider the validity of the data it may be overly permissive), and the second is to - %% penalize the peer for serving invalid data. - %% Note: this is an approximation as due to the nature of the EMA this won't exactly reverse - %% the prior success. - update_rating(Peer, false), - update_rating(Peer, false), + %% Log a penalty in order to reverse the SUCESS_RATING that was applied in rate_response + %% (When the data was successfully fetched, but before it was validated) + update_rating(Peer, overall, ?RATE_PENALTY), {noreply, State}; + handle_cast({gossiped_data, Peer, Data}, State) -> case check_external_peer(Peer) of ok -> @@ -346,12 +342,13 @@ handle_cast({gossiped_data, Peer, Data}, State) -> {update_type, gossiped_data}, {peer, ar_util:format_peer(Peer)} ]), - update_rating(Peer, true); + update_rating(Peer, overall, ?RATE_SUCCESS); _ -> ok end, {noreply, State}; + handle_cast(Cast, State) -> ?LOG_WARNING("event: unhandled_cast, cast: ~p", [Cast]), {noreply, State}. @@ -372,33 +369,43 @@ handle_info({event, peer, {made_request, Peer, Release}}, State) -> end end, {noreply, State}; + handle_info({event, peer, {fetched_tx, Peer, TimeDelta, Size}}, State) -> % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), % update_rating(Peer, TimeDelta, Size), {noreply, State}; + handle_info({event, peer, {fetched_block, Peer, TimeDelta, Size}}, State) -> % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), % update_rating(Peer, TimeDelta, Size), {noreply, State}; + handle_info({event, peer, {bad_response, {Peer, _Type, _Reason}}}, State) -> issue_warning(Peer), {noreply, State}; + handle_info({event, peer, {banned, BannedPeer}}, State) -> remove_peer(BannedPeer), {noreply, State}; + handle_info({event, block, {rejected, failed_to_fetch_first_chunk, _H, Peer}}, State) -> issue_warning(Peer), {noreply, State}; + handle_info({event, block, {rejected, failed_to_fetch_second_chunk, _H, Peer}}, State) -> issue_warning(Peer), {noreply, State}; + handle_info({event, block, {rejected, failed_to_fetch_chunk, _H, Peer}}, State) -> issue_warning(Peer), {noreply, State}; + handle_info({event, block, _}, State) -> {noreply, State}; + handle_info({'EXIT', _, normal}, State) -> {noreply, State}; + handle_info(Message, State) -> ?LOG_WARNING("event: unhandled_info, message: ~p", [Message]), {noreply, State}. @@ -409,6 +416,7 @@ terminate(_Reason, _State) -> %%%=================================================================== %%% Private functions. %%%=================================================================== + get_peer_peers(Peer) -> case ar_http_iface_client:get_peers(Peer) of unavailable -> []; @@ -473,15 +481,9 @@ format_stats(Peer, Perf) -> KB = Perf#performance.bytes / 1024, io:format( "\t~s ~.2f kB/s (~.2f kB, ~B latency, ~.2f success, ~p transfers)~n", - [ - string:pad(ar_util:format_peer(Peer), 21, trailing, $\s), - float(Perf#performance.rating), - KB, - trunc(Perf#performance.latency), - Perf#performance.success, - Perf#performance.transfers - ] - ). + [string:pad(ar_util:format_peer(Peer), 21, trailing, $\s), + float(Perf#performance.rating), KB, trunc(Perf#performance.latency), + Perf#performance.success, Perf#performance.transfers]). read_peer_records() -> PeerRecords = case ar_storage:read_term(peers) of @@ -603,8 +605,8 @@ may_be_rotate_peer_ports(Peer) -> {IP, Port} = get_ip_port(Peer), case ets:lookup(?MODULE, {peer_ip, IP}) of [] -> - ets:insert( - ?MODULE, {{peer_ip, IP}, {erlang:setelement(1, ?DEFAULT_PEER_PORT_MAP, Port), 1}} + ets:insert(?MODULE, {{peer_ip, IP}, + {erlang:setelement(1, ?DEFAULT_PEER_PORT_MAP, Port), 1}} ); [{_, {PortMap, Position}}] -> case is_in_port_map(Port, PortMap) of @@ -614,12 +616,9 @@ may_be_rotate_peer_ports(Peer) -> MaxSize = erlang:size(?DEFAULT_PEER_PORT_MAP), case Position < MaxSize of true -> - ets:insert( - ?MODULE, - {{peer_ip, IP}, { - erlang:setelement(Position + 1, PortMap, Port), Position + 1 - }} - ); + ets:insert(?MODULE, {{peer_ip, IP}, + {erlang:setelement(Position + 1, PortMap, Port), + Position + 1}}); false -> RemovedPeer = construct_peer(IP, element(1, PortMap)), PortMap2 = shift_port_map_left(PortMap), @@ -738,34 +737,12 @@ check_external_peer(Peer) -> ok end. -update_rating(Peer, IsSuccess) -> - update_rating(Peer, [], IsSuccess). -update_rating(Peer, AdditionalMetrics, IsSuccess) -> - Performance = get_or_init_performance(Peer), - %% Pass in the current latency and bytes values in order to hold them constant. - %% Only the success average should be updated. - update_rating( - Peer, - AdditionalMetrics, - Performance#performance.latency, - Performance#performance.bytes, - IsSuccess - ). - -update_rating(Peer, LatencyMicroseconds, Size, IsSuccess) -> - update_rating(Peer, [], LatencyMicroseconds, Size, IsSuccess). -update_rating(Peer, AdditionalMetrics, LatencyMicroseconds, Size, IsSuccess) -> - %% Update the 'overall' metric plus any additional metrics specified. - lists:foreach( - fun(Metric) -> - update_performance(Peer, Metric, LatencyMicroseconds, Size, IsSuccess) - end, - [overall | AdditionalMetrics] - ). - -update_performance(Peer, Metric, LatencyMicroseconds, Size, IsSuccess) -> +update_rating(Peer, Metric, SuccessRating) -> + update_rating(Peer, Metric, undefined, undefined, SuccessRating). +update_rating(Peer, Metric, LatencyMicroseconds, Size, SuccessRating) -> %% only update available metrics true = lists:member(Metric, ?AVAILABLE_METRICS), + true = lists:member(SuccessRating, ?AVAILABLE_SUCCESS_RATINGS), Performance = get_or_init_performance(Peer, Metric), Total = get_total_rating(Metric), #performance{ @@ -773,18 +750,28 @@ update_performance(Peer, Metric, LatencyMicroseconds, Size, IsSuccess) -> latency = Latency, success = Success, rating = Rating, - transfers = N + transfers = Transfers } = Performance, - Bytes2 = calculate_ema(Bytes, Size, ?THROUGHPUT_ALPHA), - Latency2 = calculate_ema(Latency, LatencyMicroseconds / 1000, ?THROUGHPUT_ALPHA), - Success2 = calculate_ema(Success, ar_util:bool_to_int(IsSuccess), ?SUCCESS_ALPHA), + Bytes2 = case Size of + undefined -> Bytes; + _ -> calculate_ema(Bytes, Size, ?THROUGHPUT_ALPHA) + end, + Latency2 = case LatencyMicroseconds of + undefined -> Latency; + _ -> calculate_ema(Latency, LatencyMicroseconds / 1000, ?THROUGHPUT_ALPHA) + end, + Transfers2 = case Size of + undefined -> Transfers; + _ -> Transfers + 1 + end, + Success2 = calculate_ema(Success, SuccessRating, ?SUCCESS_ALPHA), Rating2 = (Bytes2 / Latency2) * Success2, Performance2 = Performance#performance{ bytes = Bytes2, latency = Latency2, success = Success2, rating = Rating2, - transfers = N + 1 + transfers = Transfers2 }, Total2 = Total - Rating + Rating2, may_be_rotate_peer_ports(Peer), From a6362aaa70d08b16a3f8d09b681959e17383c99e Mon Sep 17 00:00:00 2001 From: James Piechota Date: Wed, 5 Jul 2023 20:59:32 +0000 Subject: [PATCH 22/30] WIP --- apps/arweave/src/ar_data_sync.erl | 1 - apps/arweave/src/ar_data_sync_worker.erl | 14 ++++--- apps/arweave/src/ar_http.erl | 3 +- apps/arweave/src/ar_http_iface_client.erl | 42 ++++--------------- apps/arweave/src/ar_http_iface_middleware.erl | 4 +- apps/arweave/src/ar_peers.erl | 25 ++++++----- 6 files changed, 33 insertions(+), 56 deletions(-) diff --git a/apps/arweave/src/ar_data_sync.erl b/apps/arweave/src/ar_data_sync.erl index 1cfef7fdc..ed4f62df2 100644 --- a/apps/arweave/src/ar_data_sync.erl +++ b/apps/arweave/src/ar_data_sync.erl @@ -2586,7 +2586,6 @@ process_invalid_fetched_chunk(Peer, Byte, State) -> #sync_data_state{ weave_size = WeaveSize } = State, ?LOG_WARNING([{event, got_invalid_proof_from_peer}, {peer, ar_util:format_peer(Peer)}, {byte, Byte}, {weave_size, WeaveSize}]), - ar_peers:rate_fetched_data(Peer, invalid), %% Not necessarily a malicious peer, it might happen %% if the chunk is recent and from a different fork. {noreply, State}. diff --git a/apps/arweave/src/ar_data_sync_worker.erl b/apps/arweave/src/ar_data_sync_worker.erl index a8d8d3454..5b88a11ee 100644 --- a/apps/arweave/src/ar_data_sync_worker.erl +++ b/apps/arweave/src/ar_data_sync_worker.erl @@ -55,12 +55,15 @@ handle_cast({read_range, Args}, State) -> handle_cast({sync_range, Args}, State) -> {_Start, _End, Peer, _TargetStoreID, _RetryCount} = Args, - case sync_range(Args) of + StartTime = erlang:monotonic_time(), + SyncResult = sync_range(Args), + EndTime = erlang:monotonic_time(), + case SyncResult of recast -> ok; - SyncResult -> + _ -> gen_server:cast(ar_data_sync_worker_master, - {task_completed, {sync_range, {State#state.name, SyncResult, Peer}}}) + {task_completed, {sync_range, {State#state.name, SyncResult, Peer, EndTime-StartTime}}}) end, {noreply, State}; @@ -217,7 +220,7 @@ sync_range({Start, End, Peer, TargetStoreID, RetryCount} = Args) -> ok; false -> case ar_http_iface_client:get_chunk_binary(Peer, Start2, any) of - {ok, #{ chunk := Chunk } = Proof} -> + {ok, #{ chunk := Chunk } = Proof, Time, TransferSize} -> %% In case we fetched a packed small chunk, %% we may potentially skip some chunks by %% continuing with Start2 + byte_size(Chunk) - the skip @@ -225,7 +228,8 @@ sync_range({Start, End, Peer, TargetStoreID, RetryCount} = Args) -> Start3 = ar_data_sync:get_chunk_padded_offset( Start2 + byte_size(Chunk)) + 1, gen_server:cast(list_to_atom("ar_data_sync_" ++ TargetStoreID), - {store_fetched_chunk, Peer, Start2 - 1, Proof}), + {store_fetched_chunk, Peer, Time, TransferSize, Start2 - 1, + Proof}), ar_data_sync:increment_chunk_cache_size(), sync_range({Start3, End, Peer, TargetStoreID, RetryCount}); {error, timeout} -> diff --git a/apps/arweave/src/ar_http.erl b/apps/arweave/src/ar_http.erl index d01a09d00..10c297bf7 100644 --- a/apps/arweave/src/ar_http.erl +++ b/apps/arweave/src/ar_http.erl @@ -92,7 +92,8 @@ req(Args, ReestablishedConnection) -> false -> Status = ar_metrics:get_status_class(Response), ElapsedNative = EndTime - StartTime, - ar_peers:rate_response(Peer, PathLabel, Method, Response), + %% XXX maybe don't do this + %% ar_peers:rate_response(Peer, PathLabel, Method, Response), %% NOTE: the erlang prometheus client looks at the metric name to determine units. %% If it sees _duration_ it assumes the observed value is in %% native units and it converts it to .To query native units, use: diff --git a/apps/arweave/src/ar_http_iface_client.erl b/apps/arweave/src/ar_http_iface_client.erl index 7afed44ed..3a7d774d7 100644 --- a/apps/arweave/src/ar_http_iface_client.erl +++ b/apps/arweave/src/ar_http_iface_client.erl @@ -303,9 +303,6 @@ get_sync_record(Peer, Start, Limit) -> }), Start, Limit). get_chunk_binary(Peer, Offset, RequestedPacking) -> - get_chunk(Peer, Offset, RequestedPacking, binary). - -get_chunk(Peer, Offset, RequestedPacking, Encoding) -> PackingBinary = case RequestedPacking of any -> @@ -334,7 +331,7 @@ get_chunk(Peer, Offset, RequestedPacking, Encoding) -> Response = ar_http:req(#{ peer => Peer, method => get, - path => get_chunk_path(Offset, Encoding), + path => "/chunk2/" ++ integer_to_binary(Offset), timeout => 120 * 1000, connect_timeout => 5000, limit => ?MAX_SERIALIZED_CHUNK_PROOF_SIZE, @@ -348,12 +345,7 @@ get_chunk(Peer, Offset, RequestedPacking, Encoding) -> ], erlang:monotonic_time() - StartTime), - handle_chunk_response(Encoding, Response). - -get_chunk_path(Offset, json) -> - "/chunk/" ++ integer_to_binary(Offset); -get_chunk_path(Offset, binary) -> - "/chunk2/" ++ integer_to_binary(Offset). + handle_chunk_response(Response). get_mempool(Peer) -> handle_mempool_response(ar_http:req(#{ @@ -559,29 +551,13 @@ handle_sync_record_response({ok, {{<<"200">>, _}, _, Body, _, _}}, Start, Limit) handle_sync_record_response(Reply, _, _) -> {error, Reply}. -handle_chunk_response(Encoding, {ok, {{<<"200">>, _}, _, Body, _, _}}) -> - DecodeFun = - case Encoding of - json -> - fun(Bin) -> - ar_serialize:json_map_to_chunk_proof(jiffy:decode(Bin, [return_maps])) - end; - binary -> - fun(Bin) -> - case ar_serialize:binary_to_poa(Bin) of - {ok, Reply} -> - Reply; - {error, Reason} -> - {error, Reason} - end - end - end, - Result = case catch DecodeFun(Body) of +handle_chunk_response({ok, {{<<"200">>, _}, _, Body, _, _}}) -> + case catch ar_serialize:binary_to_poa(Body) of {'EXIT', Reason} -> {error, Reason}; {error, Reason} -> {error, Reason}; - Proof -> + {ok, Proof} -> case maps:get(chunk, Proof) of <<>> -> {error, empty_chunk}; @@ -590,12 +566,10 @@ handle_chunk_response(Encoding, {ok, {{<<"200">>, _}, _, Body, _, _}}) -> _ -> {ok, Proof} end - end, - ar_peers:rate_fetched_data(Result), - Result; -handle_chunk_response(_Encoding, {error, _} = Response) -> + end; +handle_chunk_response({error, _} = Response) -> Response; -handle_chunk_response(_Encoding, Response) -> +handle_chunk_response(Response) -> {error, Response}. handle_mempool_response({ok, {{<<"200">>, _}, _, Body, _, _}}) -> diff --git a/apps/arweave/src/ar_http_iface_middleware.erl b/apps/arweave/src/ar_http_iface_middleware.erl index 90483ad4c..c73b30a1d 100644 --- a/apps/arweave/src/ar_http_iface_middleware.erl +++ b/apps/arweave/src/ar_http_iface_middleware.erl @@ -1830,7 +1830,7 @@ handle_post_tx_accepted(Req, TX, Peer) -> %% of excessive transaction volumes. {A, B, C, D, _} = Peer, ar_blacklist_middleware:decrement_ip_addr({A, B, C, D}, Req), - ar_peers:gossiped_data(Peer, Tx), + ar_peers:rate_gossiped_data(Peer, Tx), ar_events:send(tx, {new, TX, Peer}), TXID = TX#tx.id, ar_ignore_registry:remove_temporary(TXID), @@ -2359,7 +2359,7 @@ post_block(enqueue_block, {B, Peer}, Req, ReceiveTimestamp) -> end, ?LOG_INFO([{event, received_block}, {block, ar_util:encode(B#block.indep_hash)}]), ValidationStatus = ar_block_pre_validator:pre_validate(B2, Peer, ReceiveTimestamp), - ar_peers:gossiped_data(Peer, B2, ValidationStatus), + ar_peers:rate_gossiped_data(Peer, B2, ValidationStatus), {200, #{}, <<"OK">>, Req}. encode_txids([]) -> diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index f862965c3..cf1e391cc 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -11,7 +11,7 @@ -export([start_link/0, get_peers/0, get_peer_performances/1, get_trusted_peers/0, is_public_peer/1, get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, resolve_and_cache_peer/2, - rate_response/4, rate_fetched_data/2, gossiped_data/3, gossiped_data/2 + rate_response/4, rate_fetched_data/2, rate_gossiped_data/3, rate_gossiped_data/2 ]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -173,13 +173,13 @@ rate_fetched_data(Peer, invalid) -> %% in end_request, and then apply a penalty gen_server:cast(?MODULE, {invalid_fetched_data, Peer}). -gossiped_data(Peer, Data) -> - gossiped_data(Peer, Data, ok). -gossiped_data(Peer, Data, ok) -> +rate_gossiped_data(Peer, Data) -> + rate_gossiped_data(Peer, Data, ok). +rate_gossiped_data(Peer, Data, ok) -> gen_server:cast(?MODULE, { gossiped_data, Peer, Data }); -gossiped_data(_Peer, _Data, _ValidationStatus) -> +rate_gossiped_data(_Peer, _Data, _ValidationStatus) -> %% Ignore skipped or invalid blocks for now (consistent with old behavior, but may need to %% be revisited) ok. @@ -303,7 +303,7 @@ handle_cast(ping_peers, State) -> handle_cast({rate_response, Peer, PathLabel, get, Status}, State) -> case Status of "success" -> - update_rating(Peer, overall, ?RATE_SUCCESS); + update_rating(Peer, overall, true); "redirection" -> %% don't update rating ok; @@ -311,7 +311,7 @@ handle_cast({rate_response, Peer, PathLabel, get, Status}, State) -> %% don't update rating ok; _ -> - update_rating(Peer, overall, ?RATE_ERROR) + update_rating(Peer, overall, false) end, ?LOG_DEBUG([ {event, update_rating}, @@ -342,7 +342,7 @@ handle_cast({gossiped_data, Peer, Data}, State) -> {update_type, gossiped_data}, {peer, ar_util:format_peer(Peer)} ]), - update_rating(Peer, overall, ?RATE_SUCCESS); + update_rating(Peer, overall, true); _ -> ok end, @@ -737,12 +737,11 @@ check_external_peer(Peer) -> ok end. -update_rating(Peer, Metric, SuccessRating) -> - update_rating(Peer, Metric, undefined, undefined, SuccessRating). -update_rating(Peer, Metric, LatencyMicroseconds, Size, SuccessRating) -> +update_rating(Peer, Metric, IsSuccess) -> + update_rating(Peer, Metric, undefined, undefined, IsSuccess). +update_rating(Peer, Metric, LatencyMicroseconds, Size, IsSuccess) -> %% only update available metrics true = lists:member(Metric, ?AVAILABLE_METRICS), - true = lists:member(SuccessRating, ?AVAILABLE_SUCCESS_RATINGS), Performance = get_or_init_performance(Peer, Metric), Total = get_total_rating(Metric), #performance{ @@ -764,7 +763,7 @@ update_rating(Peer, Metric, LatencyMicroseconds, Size, SuccessRating) -> undefined -> Transfers; _ -> Transfers + 1 end, - Success2 = calculate_ema(Success, SuccessRating, ?SUCCESS_ALPHA), + Success2 = calculate_ema(Success, ar_util:bool_to_int(IsSuccess), ?SUCCESS_ALPHA), Rating2 = (Bytes2 / Latency2) * Success2, Performance2 = Performance#performance{ bytes = Bytes2, From 9e45c2ef7736a39b977b4a407635a75747bbd490 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Thu, 6 Jul 2023 19:25:21 +0000 Subject: [PATCH 23/30] WIP --- apps/arweave/include/ar.hrl | 2 - apps/arweave/include/ar_peers.hrl | 19 +- apps/arweave/src/ar_block_pre_validator.erl | 4 +- apps/arweave/src/ar_data_sync.erl | 4 +- apps/arweave/src/ar_data_sync_worker.erl | 5 +- .../src/ar_data_sync_worker_master.erl | 17 +- apps/arweave/src/ar_http.erl | 2 - apps/arweave/src/ar_http_iface_middleware.erl | 5 +- apps/arweave/src/ar_peers.erl | 164 ++++++++++++------ apps/arweave/src/ar_poller.erl | 17 +- 10 files changed, 140 insertions(+), 99 deletions(-) diff --git a/apps/arweave/include/ar.hrl b/apps/arweave/include/ar.hrl index d9b6db566..1146f69c9 100644 --- a/apps/arweave/include/ar.hrl +++ b/apps/arweave/include/ar.hrl @@ -7,8 +7,6 @@ %% (e.g. bin/test or bin/shell) -define(IS_TEST, erlang:get_cookie() == test). --define(DATA_SIZE(Term), erlang:byte_size(term_to_binary(Term))). - %% The mainnet name. Does not change at the hard forks. -ifndef(NETWORK_NAME). -ifdef(DEBUG). diff --git a/apps/arweave/include/ar_peers.hrl b/apps/arweave/include/ar_peers.hrl index cb455419a..f95593e10 100644 --- a/apps/arweave/include/ar_peers.hrl +++ b/apps/arweave/include/ar_peers.hrl @@ -3,21 +3,20 @@ -include_lib("ar.hrl"). --define(STARTING_LATENCY_EMA, 1000). %% initial value to avoid over-weighting the first response --define(RATE_SUCCESS, 1). --define(RATE_ERROR, 0). --define(RATE_PENALTY, -1). - --define(AVAILABLE_METRICS, [overall, data_sync]). %% the performance metrics currently tracked --define(AVAILABLE_SUCCESS_RATINGS, [?RATE_PENALTY, ?RATE_ERROR, ?RATE_SUCCESS]). +%% the performance metrics currently tracked +-define(AVAILABLE_METRICS, [overall, data_sync]). +%% factor to scale the average latency by when rating gossiped data - lower is better +-define(GOSSIP_ADVANTAGE, 0.5). -record(performance, { version = 3, release = -1, - bytes = 0, - latency = ?STARTING_LATENCY_EMA, + average_bytes = 0.0, + total_bytes = 0, + average_latency = 0.0, + total_latency = 0.0, transfers = 0, - success = 1.0, + average_success = 1.0, rating = 0 }). diff --git a/apps/arweave/src/ar_block_pre_validator.erl b/apps/arweave/src/ar_block_pre_validator.erl index 49be0a6af..e3323ebf2 100644 --- a/apps/arweave/src/ar_block_pre_validator.erl +++ b/apps/arweave/src/ar_block_pre_validator.erl @@ -709,7 +709,7 @@ pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp) -> accept_block(B, Peer, Timestamp, Gossip) -> ar_ignore_registry:add(B#block.indep_hash), ar_events:send(block, {new, B, #{ source => {peer, Peer}, gossip => Gossip }}), - % ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), + % HANDLED ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), record_block_pre_validation_time(Timestamp), ?LOG_INFO([{event, accepted_block}, {height, B#block.height}, {indep_hash, ar_util:encode(B#block.indep_hash)}]), @@ -753,7 +753,7 @@ pre_validate_pow(B, BDS, PrevB, Peer, Timestamp) -> B2 = B#block{ txs = include_transactions(B#block.txs) }, ar_events:send(block, {new, B2, #{ source => {peer, Peer}, recall_byte => RecallByte }}), - % ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), + % HANDLED ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), record_block_pre_validation_time(Timestamp), prometheus_counter:inc(block2_received_transactions, count_received_transactions(B#block.txs)), diff --git a/apps/arweave/src/ar_data_sync.erl b/apps/arweave/src/ar_data_sync.erl index ed4f62df2..b7dd36cad 100644 --- a/apps/arweave/src/ar_data_sync.erl +++ b/apps/arweave/src/ar_data_sync.erl @@ -944,7 +944,7 @@ handle_cast({store_fetched_chunk, Peer, Byte, Proof} = Cast, State) -> ar_util:cast_after(1000, self(), Cast), {noreply, State}; false -> - % ar_events:send(peer, {fetched_chunk, Peer, Time, TransferSize}), + % HANDLED ar_events:send(peer, {fetched_chunk, Peer, Time, TransferSize}), ar_packing_server:request_unpack(AbsoluteOffset, ChunkArgs), ?LOG_DEBUG([{event, requested_fetched_chunk_unpacking}, {data_path_hash, ar_util:encode(crypto:hash(sha256, @@ -964,7 +964,7 @@ handle_cast({store_fetched_chunk, Peer, Byte, Proof} = Cast, State) -> decrement_chunk_cache_size(), process_invalid_fetched_chunk(Peer, Byte, State); {true, DataRoot, TXStartOffset, ChunkEndOffset, TXSize, ChunkSize, ChunkID} -> - % ar_events:send(peer, {fetched_chunk, Peer, Time, TransferSize}), + % HANDLED ar_events:send(peer, {fetched_chunk, Peer, Time, TransferSize}), AbsoluteTXStartOffset = BlockStartOffset + TXStartOffset, AbsoluteEndOffset = AbsoluteTXStartOffset + ChunkEndOffset, ChunkArgs = {unpacked, Chunk, AbsoluteEndOffset, TXRoot, ChunkSize}, diff --git a/apps/arweave/src/ar_data_sync_worker.erl b/apps/arweave/src/ar_data_sync_worker.erl index 5b88a11ee..02e410a99 100644 --- a/apps/arweave/src/ar_data_sync_worker.erl +++ b/apps/arweave/src/ar_data_sync_worker.erl @@ -54,7 +54,6 @@ handle_cast({read_range, Args}, State) -> {noreply, State}; handle_cast({sync_range, Args}, State) -> - {_Start, _End, Peer, _TargetStoreID, _RetryCount} = Args, StartTime = erlang:monotonic_time(), SyncResult = sync_range(Args), EndTime = erlang:monotonic_time(), @@ -62,8 +61,8 @@ handle_cast({sync_range, Args}, State) -> recast -> ok; _ -> - gen_server:cast(ar_data_sync_worker_master, - {task_completed, {sync_range, {State#state.name, SyncResult, Peer, EndTime-StartTime}}}) + gen_server:cast(ar_data_sync_worker_master, {task_completed, + {sync_range, {State#state.name, SyncResult, Args, EndTime-StartTime}}}) end, {noreply, State}; diff --git a/apps/arweave/src/ar_data_sync_worker_master.erl b/apps/arweave/src/ar_data_sync_worker_master.erl index dda2dbc8d..e01fd43b2 100644 --- a/apps/arweave/src/ar_data_sync_worker_master.erl +++ b/apps/arweave/src/ar_data_sync_worker_master.erl @@ -110,10 +110,12 @@ handle_cast({task_completed, {read_range, {Worker, _, _}}}, State) -> State2 = update_scheduled_task_count(Worker, read_range, "localhost", -1, State), {noreply, State2}; -handle_cast({task_completed, {sync_range, {Worker, _Result, Peer}}}, State) -> +handle_cast({task_completed, {sync_range, {Worker, Result, Args, ElapsedNative}}}, State) -> + {Start, End, Peer, _} = Args, + DataSize = End - Start, State2 = update_scheduled_task_count(Worker, sync_range, ar_util:format_peer(Peer), -1, State), PeerTasks = get_peer_tasks(Peer, State2), - {PeerTasks2, State3} = complete_sync_range(PeerTasks, State2), + {PeerTasks2, State3} = complete_sync_range(PeerTasks, Result, ElapsedNative, DataSize, State2), {PeerTasks3, State4} = process_peer_queue(PeerTasks2, State3), {noreply, set_peer_tasks(PeerTasks3, State4)}; @@ -324,10 +326,14 @@ schedule_task(Task, Args, State) -> %% Stage 3: record a completed task and update related values (i.e. %% EMA, max_active, peer queue length) %%-------------------------------------------------------------------- -complete_sync_range(PeerTasks, State) -> +complete_sync_range(PeerTasks, Result, ElapsedNative, DataSize, State) -> PeerTasks2 = PeerTasks#peer_tasks{ active_count = PeerTasks#peer_tasks.active_count - 1 }, + ar_peers:rate_fetched_data( + PeerTasks2#peer_tasks.peer, chunk, Result, + erlang:convert_time_unit(ElapsedNative, native, microsecond), DataSize, + PeerTasks2#peer_tasks.max_active), {PeerTasks2, State}. rebalance_peers([], [], _, State) -> @@ -375,11 +381,6 @@ update_scheduled_task_count(Worker, Task, FormattedPeer, N, State) -> }, State2. -calculate_ema(OldEMA, false, _Value, _Alpha) -> - OldEMA; -calculate_ema(OldEMA, true, Value, Alpha) -> - Alpha * Value + (1 - Alpha) * OldEMA. - get_peer_tasks(Peer, State) -> maps:get(Peer, State#state.peer_tasks, #peer_tasks{peer = Peer}). diff --git a/apps/arweave/src/ar_http.erl b/apps/arweave/src/ar_http.erl index 10c297bf7..f8392bacb 100644 --- a/apps/arweave/src/ar_http.erl +++ b/apps/arweave/src/ar_http.erl @@ -92,8 +92,6 @@ req(Args, ReestablishedConnection) -> false -> Status = ar_metrics:get_status_class(Response), ElapsedNative = EndTime - StartTime, - %% XXX maybe don't do this - %% ar_peers:rate_response(Peer, PathLabel, Method, Response), %% NOTE: the erlang prometheus client looks at the metric name to determine units. %% If it sees _duration_ it assumes the observed value is in %% native units and it converts it to .To query native units, use: diff --git a/apps/arweave/src/ar_http_iface_middleware.erl b/apps/arweave/src/ar_http_iface_middleware.erl index c73b30a1d..41370ca6d 100644 --- a/apps/arweave/src/ar_http_iface_middleware.erl +++ b/apps/arweave/src/ar_http_iface_middleware.erl @@ -1830,7 +1830,8 @@ handle_post_tx_accepted(Req, TX, Peer) -> %% of excessive transaction volumes. {A, B, C, D, _} = Peer, ar_blacklist_middleware:decrement_ip_addr({A, B, C, D}, Req), - ar_peers:rate_gossiped_data(Peer, Tx), + %% ar_events:send(peer, {gossiped_tx, Peer, erlang:get(read_body_time), erlang:get(body_size)}), + ar_peers:rate_gossiped_data(Peer, byte_size(term_to_binary(TX))), ar_events:send(tx, {new, TX, Peer}), TXID = TX#tx.id, ar_ignore_registry:remove_temporary(TXID), @@ -2359,7 +2360,7 @@ post_block(enqueue_block, {B, Peer}, Req, ReceiveTimestamp) -> end, ?LOG_INFO([{event, received_block}, {block, ar_util:encode(B#block.indep_hash)}]), ValidationStatus = ar_block_pre_validator:pre_validate(B2, Peer, ReceiveTimestamp), - ar_peers:rate_gossiped_data(Peer, B2, ValidationStatus), + ar_peers:rate_gossiped_data(Peer, byte_size(term_to_binary(B2)), ValidationStatus), {200, #{}, <<"OK">>, Req}. encode_txids([]) -> diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index cf1e391cc..c7c019475 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -11,7 +11,7 @@ -export([start_link/0, get_peers/0, get_peer_performances/1, get_trusted_peers/0, is_public_peer/1, get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, resolve_and_cache_peer/2, - rate_response/4, rate_fetched_data/2, rate_gossiped_data/3, rate_gossiped_data/2 + rate_response/4, rate_fetched_data/6, rate_gossiped_data/3, rate_gossiped_data/2 ]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -160,26 +160,25 @@ rate_response(Peer, PathLabel, get, Response) -> rate_response(_Peer, _PathLabel, _Method, _Response) -> ok. -rate_fetched_data(_Peer, {ok, _}) -> - %% The fetched data is valid so the rating was already captured as part of - %% the start/end request pair. Nothing more to do. +rate_fetched_data(Peer, DataType, ok, LatencyMicroseconds, DataSize, Concurrency) -> + gen_server:cast(?MODULE, + {fetched_data, DataType, Peer, LatencyMicroseconds, DataSize, Concurrency}); +rate_fetched_data(Peer, DataType, {ok, _}, LatencyMicroseconds, DataSize, Concurrency) -> + gen_server:cast(?MODULE, + {fetched_data, DataType, Peer, LatencyMicroseconds, DataSize, Concurrency}); +rate_fetched_data(Peer, DataType, skipped, _LatencyMicroseconds, _DataSize, _Concurrency) -> + %% No need to penalize skipped blocks, as they are not the peer's fault. ok; -rate_fetched_data(Peer, {error, _}) -> - %% The fetched data is invalid, so we need to reverse the rating that was applied - %% in end_request, and then apply a penalty - gen_server:cast(?MODULE, {invalid_fetched_data, Peer}); -rate_fetched_data(Peer, invalid) -> - %% The fetched data is invalid, so we need to reverse the rating that was applied - %% in end_request, and then apply a penalty - gen_server:cast(?MODULE, {invalid_fetched_data, Peer}). - -rate_gossiped_data(Peer, Data) -> - rate_gossiped_data(Peer, Data, ok). -rate_gossiped_data(Peer, Data, ok) -> +rate_fetched_data(Peer, DataType, _, _LatencyMicroseconds, _DataSize, _Concurrency) -> + gen_server:cast(?MODULE, {invalid_fetched_data, DataType, Peer}). + +rate_gossiped_data(Peer, DataSize) -> + rate_gossiped_data(Peer, DataSize, ok). +rate_gossiped_data(Peer, DataSize, ok) -> gen_server:cast(?MODULE, { - gossiped_data, Peer, Data + gossiped_data, Peer, DataSize }); -rate_gossiped_data(_Peer, _Data, _ValidationStatus) -> +rate_gossiped_data(_Peer, _DataSize, _ValidationStatus) -> %% Ignore skipped or invalid blocks for now (consistent with old behavior, but may need to %% be revisited) ok. @@ -323,26 +322,63 @@ handle_cast({rate_response, Peer, PathLabel, get, Status}, State) -> ]), {noreply, State}; -handle_cast({invalid_fetched_data, Peer}, State) -> +handle_cast({fetched_data, DataType, Peer, LatencyMicroseconds, DataSize, Concurrency}, State) -> + ?LOG_DEBUG([ + {event, update_rating}, + {update_type, fetched_data}, + {data_type, DataType}, + {peer, ar_util:format_peer(Peer)}, + {latency, LatencyMicroseconds / 1000}, + {data_size, DataSize}, + {concurrency, Concurrency} + ]), + update_rating(Peer, overall, LatencyMicroseconds, DataSize, Concurrency, true), + case DataType of + chunk -> update_rating(Peer, data_sync, LatencyMicroseconds, DataSize, Concurrency, true); + _ -> ok + end, + {noreply, State}; + + +handle_cast({invalid_fetched_data, DataType, Peer}, State) -> ?LOG_DEBUG([ {event, update_rating}, {update_type, invalid_fetched_data}, + {data_type, DataType}, {peer, ar_util:format_peer(Peer)} ]), - %% Log a penalty in order to reverse the SUCESS_RATING that was applied in rate_response - %% (When the data was successfully fetched, but before it was validated) - update_rating(Peer, overall, ?RATE_PENALTY), + update_rating(Peer, overall, false), + case DataType of + chunk -> update_rating(Peer, data_sync, false); + _ -> ok + end, {noreply, State}; -handle_cast({gossiped_data, Peer, Data}, State) -> +handle_cast({gossiped_data, Peer, DataSize}, State) -> case check_external_peer(Peer) of ok -> + %% Since gossiped data is pushed to us we don't know the latency, but we do want + %% to incentivize peers to gossip data quickly and frequently, so we will assign + %% a latency that is guaranteed to improve the peer's rating: + %% 1. Calculate the latency that would be required to transfer DataSize bytes at the + %% peer's current average rate. + %% 2. Scale that latency by ?GOSSIP_ADVANTAGE and rate using the scaled latency + Performance = get_or_init_performance(Peer), + #performance{ + average_bytes = AverageBytes, + average_latency = AverageLatency + } = Performance, + AverageThroughput = AverageBytes / AverageLatency, + GossipLatency = (DataSize / AverageThroughput) * ?GOSSIP_ADVANTAGE, + LatencyMicroseconds = GossipLatency * 1000, ?LOG_DEBUG([ {event, update_rating}, {update_type, gossiped_data}, - {peer, ar_util:format_peer(Peer)} + {peer, ar_util:format_peer(Peer)}, + {latency, LatencyMicroseconds / 1000}, + {data_size, DataSize} ]), - update_rating(Peer, overall, true); + update_rating(Peer, overall, LatencyMicroseconds, DataSize, 1, true); _ -> ok end, @@ -478,12 +514,12 @@ discover_peers([Peer | Peers]) -> discover_peers(Peers). format_stats(Peer, Perf) -> - KB = Perf#performance.bytes / 1024, + KB = Perf#performance.average_bytes / 1024, io:format( "\t~s ~.2f kB/s (~.2f kB, ~B latency, ~.2f success, ~p transfers)~n", [string:pad(ar_util:format_peer(Peer), 21, trailing, $\s), - float(Perf#performance.rating), KB, trunc(Perf#performance.latency), - Perf#performance.success, Perf#performance.transfers]). + float(Perf#performance.rating), KB, trunc(Perf#performance.average_latency), + Perf#performance.average_success, Perf#performance.transfers]). read_peer_records() -> PeerRecords = case ar_storage:read_term(peers) of @@ -554,26 +590,28 @@ load_peer({Peer, Performance}) -> load_peer({Peer, Metric, Performance}) -> may_be_rotate_peer_ports(Peer), case Performance of - {performance, Bytes, Latency, Transfers, _Failures, Rating} -> + {performance, TotalBytes, TotalLatency, Transfers, _Failures, Rating} -> %% For compatibility with a few nodes already storing the records %% without the release field. set_performance(Peer, Metric, #performance{ - bytes = Bytes, - latency = Latency, + total_bytes = TotalBytes, + total_latency = TotalLatency, transfers = Transfers, rating = Rating }); - {performance, Bytes, Latency, Transfers, _Failures, Rating, Release} -> + {performance, TotalBytes, TotalLatency, Transfers, _Failures, Rating, Release} -> %% For compatibility with nodes storing records from before the introduction of %% the version field set_performance(Peer, Metric, #performance{ release = Release, - bytes = Bytes, - latency = Latency, + total_bytes = TotalBytes, + total_latency = TotalLatency, transfers = Transfers, rating = Rating }); - {performance, 3, _Release, _Bytes, _Latency, _Transfers, _Success, _Rating} -> + {performance, 3, + _Release, _AverageBytes, _TotalBytes, _AverageLatency, _TotalLatency, + _Transfers, _AverageSuccess, _Rating} -> %% Going forward whenever we change the #performance record we should increment the %% version field so we can match on it when doing a load. Here we're handling the %% version 3 format. @@ -738,37 +776,55 @@ check_external_peer(Peer) -> end. update_rating(Peer, Metric, IsSuccess) -> - update_rating(Peer, Metric, undefined, undefined, IsSuccess). -update_rating(Peer, Metric, LatencyMicroseconds, Size, IsSuccess) -> + update_rating(Peer, Metric, undefined, undefined, 1, IsSuccess). +update_rating(Peer, Metric, LatencyMicroseconds, DataSize, Concurrency, IsSuccess) -> %% only update available metrics true = lists:member(Metric, ?AVAILABLE_METRICS), Performance = get_or_init_performance(Peer, Metric), Total = get_total_rating(Metric), + LatencyMilliseconds = LatencyMicroseconds / 1000, #performance{ - bytes = Bytes, - latency = Latency, - success = Success, + average_bytes = AverageBytes, + total_bytes = TotalBytes, + average_latency = AverageLatency, + total_latency = TotalLatency, + average_success = AverageSuccess, rating = Rating, transfers = Transfers } = Performance, - Bytes2 = case Size of - undefined -> Bytes; - _ -> calculate_ema(Bytes, Size, ?THROUGHPUT_ALPHA) + TotalBytes2 = case DataSize of + undefined -> TotalBytes; + _ -> TotalBytes + DataSize + end, + %% AverageBytes is the average number of bytes transferred during the AverageLatency time + %% period. In order to approximate the impact of multiple concurrent requests we multiply + %% DataSize by the Concurrency value. We do this *only* when updating the AverageBytes + %% value so that it doesn't distort the TotalBytes. + AverageBytes2 = case DataSize of + undefined -> AverageBytes; + _ -> calculate_ema(AverageBytes, (DataSize * Concurrency), ?THROUGHPUT_ALPHA) + end, + TotalLatency2 = case LatencyMilliseconds of + undefined -> TotalLatency; + _ -> TotalLatency + LatencyMilliseconds end, - Latency2 = case LatencyMicroseconds of - undefined -> Latency; - _ -> calculate_ema(Latency, LatencyMicroseconds / 1000, ?THROUGHPUT_ALPHA) + AverageLatency2 = case LatencyMilliseconds of + undefined -> AverageLatency; + _ -> calculate_ema(AverageLatency, LatencyMilliseconds, ?THROUGHPUT_ALPHA) end, - Transfers2 = case Size of + Transfers2 = case DataSize of undefined -> Transfers; _ -> Transfers + 1 end, - Success2 = calculate_ema(Success, ar_util:bool_to_int(IsSuccess), ?SUCCESS_ALPHA), - Rating2 = (Bytes2 / Latency2) * Success2, + AverageSuccess2 = calculate_ema(AverageSuccess, ar_util:bool_to_int(IsSuccess), ?SUCCESS_ALPHA), + %% Rating is an estimate of the peer's effective throughput in bytes per second. + Rating2 = (AverageBytes2 / AverageLatency2) * AverageSuccess2, Performance2 = Performance#performance{ - bytes = Bytes2, - latency = Latency2, - success = Success2, + average_bytes = AverageBytes2, + total_bytes = TotalBytes2, + average_latency = AverageLatency2, + total_latency = TotalLatency2, + average_success = AverageSuccess2, rating = Rating2, transfers = Transfers2 }, @@ -850,12 +906,12 @@ store_peers() -> issue_warning(Peer) -> Performance = get_or_init_performance(Peer), - Success = calculate_ema(Performance#performance.success, 0, ?SUCCESS_ALPHA), + Success = calculate_ema(Performance#performance.average_success, 0, ?SUCCESS_ALPHA), case Success < ?MINIMUM_SUCCESS of true -> remove_peer(Peer); false -> - Performance2 = Performance#performance{success = Success}, + Performance2 = Performance#performance{average_success = Success}, may_be_rotate_peer_ports(Peer), set_performance(Peer, Performance2) end. diff --git a/apps/arweave/src/ar_poller.erl b/apps/arweave/src/ar_poller.erl index c4079b659..ba927a042 100644 --- a/apps/arweave/src/ar_poller.erl +++ b/apps/arweave/src/ar_poller.erl @@ -133,7 +133,7 @@ handle_cast(Msg, State) -> ?LOG_ERROR([{event, unhandled_cast}, {module, ?MODULE}, {message, Msg}]), {noreply, State}. -handle_info({event, block, {discovered, Peer, B, Time, Size}}, State) -> +handle_info({event, block, {discovered, Peer, B, ElapsedMicroseconds, Size}}, State) -> case ar_ignore_registry:member(B#block.indep_hash) of false -> ?LOG_INFO([{event, fetched_block_for_validation}, @@ -142,19 +142,8 @@ handle_info({event, block, {discovered, Peer, B, Time, Size}}, State) -> true -> ok end, - %% How we rank peers changed in June 2023 - %% - %% Previous Behavior: - %% - throughput metrics (block size and time to download) were recorded in pre_validate - %% for valid blocks only - %% Current Behavior: - %% - throughput metrics are recorded for all outbound web requests to peers (including the - %% GET /block/hash request that triggers the block/discovered event) - %% - %% The new behavior is slightly different, but I believe it still results in a valid ranking. - %% Future work may change the behavior further (e.g. regarding when penalties are recorded - %% for errors or invalid blocks) - ar_block_pre_validator:pre_validate(B, Peer, erlang:timestamp()), + ValidationStatus = ar_block_pre_validator:pre_validate(B, Peer, erlang:timestamp()), + ar_peers:rate_fetched_data(Peer, block, ValidationStatus, ElapsedMicroseconds, Size, 1), {noreply, State}; handle_info({event, block, _}, State) -> {noreply, State}; From b1ac4bce3e378d766c582f6ba1944450a26ffa2c Mon Sep 17 00:00:00 2001 From: James Piechota Date: Thu, 6 Jul 2023 20:44:16 +0000 Subject: [PATCH 24/30] call rate_xxx wherever we called send(peer (except for chunks) --- apps/arweave/src/ar_block_pre_validator.erl | 4 ++-- apps/arweave/src/ar_data_sync_worker_master.erl | 2 +- apps/arweave/src/ar_header_sync.erl | 4 ++-- apps/arweave/src/ar_http_iface_client.erl | 2 +- apps/arweave/src/ar_http_iface_middleware.erl | 3 +-- apps/arweave/src/ar_peers.erl | 14 ++++---------- apps/arweave/src/ar_randomx_state.erl | 2 +- 7 files changed, 12 insertions(+), 19 deletions(-) diff --git a/apps/arweave/src/ar_block_pre_validator.erl b/apps/arweave/src/ar_block_pre_validator.erl index e3323ebf2..c2be89faa 100644 --- a/apps/arweave/src/ar_block_pre_validator.erl +++ b/apps/arweave/src/ar_block_pre_validator.erl @@ -709,7 +709,7 @@ pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp) -> accept_block(B, Peer, Timestamp, Gossip) -> ar_ignore_registry:add(B#block.indep_hash), ar_events:send(block, {new, B, #{ source => {peer, Peer}, gossip => Gossip }}), - % HANDLED ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), + ar_peers:rate_gossiped_data(Peer, byte_size(term_to_binary(B))), record_block_pre_validation_time(Timestamp), ?LOG_INFO([{event, accepted_block}, {height, B#block.height}, {indep_hash, ar_util:encode(B#block.indep_hash)}]), @@ -753,7 +753,7 @@ pre_validate_pow(B, BDS, PrevB, Peer, Timestamp) -> B2 = B#block{ txs = include_transactions(B#block.txs) }, ar_events:send(block, {new, B2, #{ source => {peer, Peer}, recall_byte => RecallByte }}), - % HANDLED ar_events:send(peer, {gossiped_block, Peer, ReadBodyTime, BodySize}), + ar_peers:rate_gossiped_data(Peer, byte_size(term_to_binary(B2))), record_block_pre_validation_time(Timestamp), prometheus_counter:inc(block2_received_transactions, count_received_transactions(B#block.txs)), diff --git a/apps/arweave/src/ar_data_sync_worker_master.erl b/apps/arweave/src/ar_data_sync_worker_master.erl index e01fd43b2..a26e8352b 100644 --- a/apps/arweave/src/ar_data_sync_worker_master.erl +++ b/apps/arweave/src/ar_data_sync_worker_master.erl @@ -362,7 +362,7 @@ rebalance_peer(PeerTasks, Performance, ThroughputTarget, State) -> {worker_count, WorkerCount}, {active_count, PeerTasks2#peer_tasks.active_count}, {throughput_target, ThroughputTarget}, - {latency_ema, Performance#performance.latency} + {latency_ema, Performance#performance.average_latency} ]), {PeerTasks3, State2}. diff --git a/apps/arweave/src/ar_header_sync.erl b/apps/arweave/src/ar_header_sync.erl index fb44c2d71..4c7c4297b 100644 --- a/apps/arweave/src/ar_header_sync.erl +++ b/apps/arweave/src/ar_header_sync.erl @@ -511,10 +511,10 @@ download_block(Peers, H, H2, TXRoot) -> end, case BH of H when Height >= Fork_2_0 -> - % ar_events:send(peer, {fetched_block, Peer, Time, Size}), + ar_peers:rate_fetched_data(Peer, block, Time, Size), download_txs(Peers, B, TXRoot); H2 when Height < Fork_2_0 -> - % ar_events:send(peer, {fetched_block, Peer, Time, Size}), + ar_peers:rate_fetched_data(Peer, block, Time, Size), download_txs(Peers, B, TXRoot); _ -> ?LOG_WARNING([ diff --git a/apps/arweave/src/ar_http_iface_client.erl b/apps/arweave/src/ar_http_iface_client.erl index 3a7d774d7..0b64f1615 100644 --- a/apps/arweave/src/ar_http_iface_client.erl +++ b/apps/arweave/src/ar_http_iface_client.erl @@ -813,7 +813,7 @@ get_tx_from_remote_peer(Peer, TXID) -> ar_events:send(peer, {bad_response, {Peer, tx, invalid}}), {error, invalid_tx}; true -> - % ar_events:send(peer, {fetched_tx, Peer, Time, Size}), + ar_peers:rate_fetched_data(Peer, tx, Time, Size), TX end; Error -> diff --git a/apps/arweave/src/ar_http_iface_middleware.erl b/apps/arweave/src/ar_http_iface_middleware.erl index 41370ca6d..0c04e5143 100644 --- a/apps/arweave/src/ar_http_iface_middleware.erl +++ b/apps/arweave/src/ar_http_iface_middleware.erl @@ -1830,7 +1830,6 @@ handle_post_tx_accepted(Req, TX, Peer) -> %% of excessive transaction volumes. {A, B, C, D, _} = Peer, ar_blacklist_middleware:decrement_ip_addr({A, B, C, D}, Req), - %% ar_events:send(peer, {gossiped_tx, Peer, erlang:get(read_body_time), erlang:get(body_size)}), ar_peers:rate_gossiped_data(Peer, byte_size(term_to_binary(TX))), ar_events:send(tx, {new, TX, Peer}), TXID = TX#tx.id, @@ -2360,7 +2359,7 @@ post_block(enqueue_block, {B, Peer}, Req, ReceiveTimestamp) -> end, ?LOG_INFO([{event, received_block}, {block, ar_util:encode(B#block.indep_hash)}]), ValidationStatus = ar_block_pre_validator:pre_validate(B2, Peer, ReceiveTimestamp), - ar_peers:rate_gossiped_data(Peer, byte_size(term_to_binary(B2)), ValidationStatus), + % ar_peers:rate_gossiped_data(Peer, byte_size(term_to_binary(B2)), ValidationStatus), {200, #{}, <<"OK">>, Req}. encode_txids([]) -> diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index c7c019475..60a65dad2 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -11,7 +11,7 @@ -export([start_link/0, get_peers/0, get_peer_performances/1, get_trusted_peers/0, is_public_peer/1, get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, resolve_and_cache_peer/2, - rate_response/4, rate_fetched_data/6, rate_gossiped_data/3, rate_gossiped_data/2 + rate_response/4, rate_fetched_data/4, rate_fetched_data/6, rate_gossiped_data/2 ]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -160,6 +160,8 @@ rate_response(Peer, PathLabel, get, Response) -> rate_response(_Peer, _PathLabel, _Method, _Response) -> ok. +rate_fetched_data(Peer, DataType, LatencyMicroseconds, DataSize) -> + rate_fetched_data(Peer, DataType, ok, LatencyMicroseconds, DataSize, 1). rate_fetched_data(Peer, DataType, ok, LatencyMicroseconds, DataSize, Concurrency) -> gen_server:cast(?MODULE, {fetched_data, DataType, Peer, LatencyMicroseconds, DataSize, Concurrency}); @@ -173,15 +175,7 @@ rate_fetched_data(Peer, DataType, _, _LatencyMicroseconds, _DataSize, _Concurren gen_server:cast(?MODULE, {invalid_fetched_data, DataType, Peer}). rate_gossiped_data(Peer, DataSize) -> - rate_gossiped_data(Peer, DataSize, ok). -rate_gossiped_data(Peer, DataSize, ok) -> - gen_server:cast(?MODULE, { - gossiped_data, Peer, DataSize - }); -rate_gossiped_data(_Peer, _DataSize, _ValidationStatus) -> - %% Ignore skipped or invalid blocks for now (consistent with old behavior, but may need to - %% be revisited) - ok. + gen_server:cast(?MODULE, {gossiped_data, Peer, DataSize}). %% @doc Print statistics about the current peers. stats() -> diff --git a/apps/arweave/src/ar_randomx_state.erl b/apps/arweave/src/ar_randomx_state.erl index cf9c12e3a..6ad2e33f9 100644 --- a/apps/arweave/src/ar_randomx_state.erl +++ b/apps/arweave/src/ar_randomx_state.erl @@ -331,7 +331,7 @@ get_block2(BH, Peers, RetryCount) -> {Peer, B, Time, Size} -> case ar_block:indep_hash(B) of BH -> - % ar_events:send(peer, {fetched_block, Peer, Time, Size}), + ar_peers:rate_fetched_data(Peer, block, Time, Size), {ok, B}; InvalidBH -> ?LOG_WARNING([ From 066218df8d8a2c21f37aa05e6ec2576d78ba837d Mon Sep 17 00:00:00 2001 From: James Piechota Date: Fri, 7 Jul 2023 01:46:04 +0000 Subject: [PATCH 25/30] WIP --- apps/arweave/include/ar_peers.hrl | 2 +- apps/arweave/src/ar_peers.erl | 87 ++++++++++++------------------- 2 files changed, 35 insertions(+), 54 deletions(-) diff --git a/apps/arweave/include/ar_peers.hrl b/apps/arweave/include/ar_peers.hrl index f95593e10..dc8a80dcd 100644 --- a/apps/arweave/include/ar_peers.hrl +++ b/apps/arweave/include/ar_peers.hrl @@ -5,7 +5,7 @@ %% the performance metrics currently tracked -define(AVAILABLE_METRICS, [overall, data_sync]). -%% factor to scale the average latency by when rating gossiped data - lower is better +%% factor to scale the average throughput by when rating gossiped data - lower is better -define(GOSSIP_ADVANTAGE, 0.5). -record(performance, { diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 60a65dad2..542be1d5a 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -144,7 +144,7 @@ is_public_peer(_) -> %% Return -1 if the release is not known. get_peer_release(Peer) -> case catch ets:lookup(?MODULE, {peer, Peer}) of - [{_, #performance{release = Release}}] -> + [{_, Release}] -> Release; _ -> -1 @@ -165,12 +165,6 @@ rate_fetched_data(Peer, DataType, LatencyMicroseconds, DataSize) -> rate_fetched_data(Peer, DataType, ok, LatencyMicroseconds, DataSize, Concurrency) -> gen_server:cast(?MODULE, {fetched_data, DataType, Peer, LatencyMicroseconds, DataSize, Concurrency}); -rate_fetched_data(Peer, DataType, {ok, _}, LatencyMicroseconds, DataSize, Concurrency) -> - gen_server:cast(?MODULE, - {fetched_data, DataType, Peer, LatencyMicroseconds, DataSize, Concurrency}); -rate_fetched_data(Peer, DataType, skipped, _LatencyMicroseconds, _DataSize, _Concurrency) -> - %% No need to penalize skipped blocks, as they are not the peer's fault. - ok; rate_fetched_data(Peer, DataType, _, _LatencyMicroseconds, _DataSize, _Concurrency) -> gen_server:cast(?MODULE, {invalid_fetched_data, DataType, Peer}). @@ -250,15 +244,7 @@ handle_call(Request, _From, State) -> {reply, ok, State}. handle_cast({add_peer, Peer, Release}, State) -> - may_be_rotate_peer_ports(Peer), - case ets:lookup(?MODULE, {peer, Peer}) of - [{_, #performance{release = Release}}] -> - ok; - [{_, Performance}] -> - set_performance(Peer, Performance#performance{release = Release}); - [] -> - set_performance(Peer, #performance{release = Release}) - end, + add_peer(Peer, Release), {noreply, State}; handle_cast(rank_peers, State) -> @@ -266,7 +252,7 @@ handle_cast(rank_peers, State) -> Peers = ets:foldl( fun - ({{peer, Peer}, Performance}, Acc) -> + ({{performance, Peer, overall}, Performance}, Acc) -> %% Bigger score increases the chances to end up on the top %% of the peer list, but at the same time the ranking is %% probabilistic to always give everyone a chance to improve @@ -349,7 +335,7 @@ handle_cast({invalid_fetched_data, DataType, Peer}, State) -> {noreply, State}; handle_cast({gossiped_data, Peer, DataSize}, State) -> - case check_external_peer(Peer) of + case check_peer(Peer) of ok -> %% Since gossiped data is pushed to us we don't know the latency, but we do want %% to incentivize peers to gossip data quickly and frequently, so we will assign @@ -384,20 +370,7 @@ handle_cast(Cast, State) -> {noreply, State}. handle_info({event, peer, {made_request, Peer, Release}}, State) -> - may_be_rotate_peer_ports(Peer), - case ets:lookup(?MODULE, {peer, Peer}) of - [{_, #performance{release = Release}}] -> - ok; - [{_, Performance}] -> - set_performance(Peer, Performance#performance{release = Release}); - [] -> - case check_external_peer(Peer) of - ok -> - set_performance(Peer, #performance{release = Release}); - _ -> - ok - end - end, + add_peer(Peer, Release), {noreply, State}; handle_info({event, peer, {fetched_tx, Peer, TimeDelta, Size}}, State) -> @@ -456,7 +429,7 @@ get_peer_peers(Peer) -> get_or_init_performance(Peer) -> get_or_init_performance(Peer, overall). get_or_init_performance(Peer, Metric) -> - case ets:lookup(?MODULE, {peer, Peer, Metric}) of + case ets:lookup(?MODULE, {performance, Peer, Metric}) of [] -> #performance{}; [{_, Performance}] -> @@ -466,7 +439,7 @@ get_or_init_performance(Peer, Metric) -> set_performance(Peer, Performance) -> set_performance(Peer, overall, Performance). set_performance(Peer, Metric, Performance) -> - ets:insert(?MODULE, [{{peer, Peer, Metric}, Performance}]). + ets:insert(?MODULE, [{{performance, Peer, Metric}, Performance}]). get_total_rating() -> get_total_rating(overall). @@ -490,19 +463,16 @@ discover_peers([Peer | Peers]) -> true -> ok; false -> - IsPublic = is_public_peer(Peer), - IsBanned = ar_blacklist_middleware:is_peer_banned(Peer) == banned, - IsBlacklisted = lists:member(Peer, ?PEER_PERMANENT_BLACKLIST), - case IsPublic andalso not IsBanned andalso not IsBlacklisted of - false -> - ok; - true -> + case check_peer(Peer, is_public_peer(Peer)) of + ok -> case ar_http_iface_client:get_info(Peer, release) of {<<"release">>, Release} when is_integer(Release) -> gen_server:cast(?MODULE, {add_peer, Peer, Release}); _ -> ok - end + end; + _ -> + ok end end, discover_peers(Peers). @@ -754,19 +724,16 @@ rank_peers(ScoredPeers) -> ) ]. -check_external_peer(Peer) -> - IsLoopbackIP = is_loopback_ip(Peer), +check_peer(Peer) -> + check_peer(Peer, not is_loopback_ip(Peer)). +check_peer(Peer, IsPeerScopeValid) -> IsBlacklisted = lists:member(Peer, ?PEER_PERMANENT_BLACKLIST), IsBanned = ar_blacklist_middleware:is_peer_banned(Peer) == banned, - case {IsLoopbackIP, IsBlacklisted, IsBanned} of - {true, _, _} -> - reject; - {_, true, _} -> - reject; - {_, _, true} -> - reject; - _ -> - ok + case IsPeerScopeValid andalso not IsBlacklisted andalso not IsBanned of + true -> + ok; + false -> + reject end. update_rating(Peer, Metric, IsSuccess) -> @@ -830,6 +797,20 @@ update_rating(Peer, Metric, LatencyMicroseconds, DataSize, Concurrency, IsSucces calculate_ema(OldEMA, Value, Alpha) -> Alpha * Value + (1 - Alpha) * OldEMA. +add_peer(Peer, Release) -> + may_be_rotate_peer_ports(Peer), + case ets:lookup(?MODULE, {peer, Peer}) of + [{_, Release}] -> + ok; + _ -> + case check_peer(Peer) of + ok -> + ets:insert(?MODULE, [{{peer, Peer}, Release}]); + _ -> + ok + end + end. + remove_peer(RemovedPeer) -> ?LOG_DEBUG([ {event, remove_peer}, From a51d5e76e51e5e08adb94d6f1d4d23f472487c5d Mon Sep 17 00:00:00 2001 From: James Piechota Date: Fri, 7 Jul 2023 13:29:04 +0000 Subject: [PATCH 26/30] revert metric addition --- apps/arweave/include/ar_peers.hrl | 2 - apps/arweave/src/ar_peers.erl | 280 +++++++++++------------------- 2 files changed, 101 insertions(+), 181 deletions(-) diff --git a/apps/arweave/include/ar_peers.hrl b/apps/arweave/include/ar_peers.hrl index dc8a80dcd..0d2c2200f 100644 --- a/apps/arweave/include/ar_peers.hrl +++ b/apps/arweave/include/ar_peers.hrl @@ -3,8 +3,6 @@ -include_lib("ar.hrl"). -%% the performance metrics currently tracked --define(AVAILABLE_METRICS, [overall, data_sync]). %% factor to scale the average throughput by when rating gossiped data - lower is better -define(GOSSIP_ADVANTAGE, 0.5). diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 542be1d5a..da0458c71 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -97,9 +97,7 @@ resolve_peers([RawPeer | Peers]) -> {ok, Peer} -> [Peer | resolve_peers(Peers)]; {error, invalid} -> - ?LOG_WARNING([{event, failed_to_resolve_trusted_peer}, - {peer, RawPeer} - ]), + ?LOG_WARNING([{event, failed_to_resolve_trusted_peer}, {peer, RawPeer}]), resolve_peers(Peers) end. @@ -144,7 +142,7 @@ is_public_peer(_) -> %% Return -1 if the release is not known. get_peer_release(Peer) -> case catch ets:lookup(?MODULE, {peer, Peer}) of - [{_, Release}] -> + [{_, #performance{ release = Release }}] -> Release; _ -> -1 @@ -252,7 +250,7 @@ handle_cast(rank_peers, State) -> Peers = ets:foldl( fun - ({{performance, Peer, overall}, Performance}, Acc) -> + ({{peer, Peer}, Performance}, Acc) -> %% Bigger score increases the chances to end up on the top %% of the peer list, but at the same time the ranking is %% probabilistic to always give everyone a chance to improve @@ -282,7 +280,7 @@ handle_cast(ping_peers, State) -> handle_cast({rate_response, Peer, PathLabel, get, Status}, State) -> case Status of "success" -> - update_rating(Peer, overall, true); + update_rating(Peer, true); "redirection" -> %% don't update rating ok; @@ -290,12 +288,11 @@ handle_cast({rate_response, Peer, PathLabel, get, Status}, State) -> %% don't update rating ok; _ -> - update_rating(Peer, overall, false) + update_rating(Peer, false) end, ?LOG_DEBUG([ {event, update_rating}, {update_type, response}, - {metric, overall}, {path, PathLabel}, {status, Status}, {peer, ar_util:format_peer(Peer)} @@ -312,11 +309,7 @@ handle_cast({fetched_data, DataType, Peer, LatencyMicroseconds, DataSize, Concur {data_size, DataSize}, {concurrency, Concurrency} ]), - update_rating(Peer, overall, LatencyMicroseconds, DataSize, Concurrency, true), - case DataType of - chunk -> update_rating(Peer, data_sync, LatencyMicroseconds, DataSize, Concurrency, true); - _ -> ok - end, + update_rating(Peer, LatencyMicroseconds, DataSize, Concurrency, true), {noreply, State}; @@ -327,11 +320,7 @@ handle_cast({invalid_fetched_data, DataType, Peer}, State) -> {data_type, DataType}, {peer, ar_util:format_peer(Peer)} ]), - update_rating(Peer, overall, false), - case DataType of - chunk -> update_rating(Peer, data_sync, false); - _ -> ok - end, + update_rating(Peer, false), {noreply, State}; handle_cast({gossiped_data, Peer, DataSize}, State) -> @@ -427,9 +416,7 @@ get_peer_peers(Peer) -> end. get_or_init_performance(Peer) -> - get_or_init_performance(Peer, overall). -get_or_init_performance(Peer, Metric) -> - case ets:lookup(?MODULE, {performance, Peer, Metric}) of + case ets:lookup(?MODULE, {peer, Peer}) of [] -> #performance{}; [{_, Performance}] -> @@ -437,14 +424,10 @@ get_or_init_performance(Peer, Metric) -> end. set_performance(Peer, Performance) -> - set_performance(Peer, overall, Performance). -set_performance(Peer, Metric, Performance) -> - ets:insert(?MODULE, [{{performance, Peer, Metric}, Performance}]). + ets:insert(?MODULE, [{{peer, Peer}, Performance}]). get_total_rating() -> - get_total_rating(overall). -get_total_rating(Metric) -> - case ets:lookup(?MODULE, {rating_total, Metric}) of + case ets:lookup(?MODULE, rating_total) of [] -> 0; [{_, Total}] -> @@ -452,9 +435,7 @@ get_total_rating(Metric) -> end. set_total_rating(Total) -> - set_total_rating(overall, Total). -set_total_rating(Metric, Total) -> - ets:insert(?MODULE, {{rating_total, Metric}, Total}). + ets:insert(?MODULE, {rating_total, Total}). discover_peers([]) -> ok; @@ -485,124 +466,78 @@ format_stats(Peer, Perf) -> float(Perf#performance.rating), KB, trunc(Perf#performance.average_latency), Perf#performance.average_success, Perf#performance.transfers]). -read_peer_records() -> - PeerRecords = case ar_storage:read_term(peers) of +load_peers() -> + case ar_storage:read_term(peers) of not_found -> ok; {ok, {_TotalRating, Records}} -> - %% Legacy format included the TotalRating, but since we always recalculate it when - %% loading the peers, we've dropped it from the saved format. - Records; - {ok, Records} -> - Records - end, - - %% We only want to return records for available peers. However, PeerRecords may contain - %% multiple records for the same peer (one for each tracked metric) and we don't want to - %% ping each peer multiple times. So: - %% 1. Get a set of UniquePeers from PeerRecords - %% 2. Ping those peers to get a set of VaidPeers - %% 3. Filter PeerRecords to only include records for ValidPeers - UniquePeers = sets:from_list([ element(1, Record) || Record <- PeerRecords ]), - - ValidPeers = sets:filter( - fun(Peer) -> - case ar_http_iface_client:get_info(Peer, name) of - info_unavailable -> - ?LOG_DEBUG([{event, peer_unavailable}, {peer, ar_util:format_peer(Peer)}]), - false; - <> -> - true; - Network -> - ?LOG_DEBUG([ - {event, peer_from_the_wrong_network}, - {peer, ar_util:format_peer(Peer)}, - {network, Network} - ]), - false - end - end, - UniquePeers - ), + ?LOG_INFO([{event, polling_saved_peers}]), + ar:console("Polling saved peers...~n"), + load_peers(Records), + TotalRating = + ets:foldl( + fun ({{peer, _Peer}, Performance}, Acc) -> + Acc + Performance#performance.rating; + (_, Acc) -> + Acc + end, + 0, + ?MODULE + ), + ets:insert(?MODULE, {rating_total, TotalRating}), + ?LOG_INFO([{event, polled_saved_peers}]), + ar:console("Polled saved peers.~n") + end. - ValidPeerRecords = lists:filter( - fun(PeerRecord) -> - sets:is_element(element(1, PeerRecord), ValidPeers) - end, - PeerRecords - ), - ValidPeerRecords. - -load_peers() -> - ?LOG_INFO([{event, polling_saved_peers}]), - ar:console("Polling saved peers...~n"), - PeerRecords = read_peer_records(), - load_peers(PeerRecords), - load_totals(), - ?LOG_INFO([{event, polled_saved_peers}]), - ar:console("Polled saved peers.~n"). - -load_peers(PeerRecords) when length(PeerRecords) < 20 -> - ar_util:pmap(fun load_peer/1, PeerRecords); -load_peers(PeerRecords) -> - {PeerRecords2, PeerRecords3} = lists:split(20, PeerRecords), - ar_util:pmap(fun load_peer/1, PeerRecords2), - load_peers(PeerRecords3). +load_peers(Peers) when length(Peers) < 20 -> + ar_util:pmap(fun load_peer/1, Peers); +load_peers(Peers) -> + {Peers2, Peers3} = lists:split(20, Peers), + ar_util:pmap(fun load_peer/1, Peers2), + load_peers(Peers3). load_peer({Peer, Performance}) -> - load_peer({Peer, overall, Performance}); -load_peer({Peer, Metric, Performance}) -> - may_be_rotate_peer_ports(Peer), - case Performance of - {performance, TotalBytes, TotalLatency, Transfers, _Failures, Rating} -> - %% For compatibility with a few nodes already storing the records - %% without the release field. - set_performance(Peer, Metric, #performance{ - total_bytes = TotalBytes, - total_latency = TotalLatency, - transfers = Transfers, - rating = Rating - }); - {performance, TotalBytes, TotalLatency, Transfers, _Failures, Rating, Release} -> - %% For compatibility with nodes storing records from before the introduction of - %% the version field - set_performance(Peer, Metric, #performance{ - release = Release, - total_bytes = TotalBytes, - total_latency = TotalLatency, - transfers = Transfers, - rating = Rating - }); - {performance, 3, - _Release, _AverageBytes, _TotalBytes, _AverageLatency, _TotalLatency, - _Transfers, _AverageSuccess, _Rating} -> - %% Going forward whenever we change the #performance record we should increment the - %% version field so we can match on it when doing a load. Here we're handling the - %% version 3 format. - set_performance(Peer, Metric, Performance) + case ar_http_iface_client:get_info(Peer, name) of + info_unavailable -> + ?LOG_DEBUG([{event, peer_unavailable}, {peer, ar_util:format_peer(Peer)}]), + ok; + <> -> + may_be_rotate_peer_ports(Peer), + case Performance of + {performance, TotalBytes, TotalLatency, Transfers, _Failures, Rating} -> + %% For compatibility with a few nodes already storing the records + %% without the release field. + set_performance(Peer, #performance{ + total_bytes = TotalBytes, + total_latency = TotalLatency, + transfers = Transfers, + rating = Rating + }); + {performance, TotalBytes, TotalLatency, Transfers, _Failures, Rating, Release} -> + %% For compatibility with nodes storing records from before the introduction of + %% the version field + set_performance(Peer, #performance{ + release = Release, + total_bytes = TotalBytes, + total_latency = TotalLatency, + transfers = Transfers, + rating = Rating + }); + {performance, 3, + _Release, _AverageBytes, _TotalBytes, _AverageLatency, _TotalLatency, + _Transfers, _AverageSuccess, _Rating} -> + %% Going forward whenever we change the #performance record we should increment the + %% version field so we can match on it when doing a load. Here we're handling the + %% version 3 format. + set_performance(Peer, Performance) + end, + ok; + Network -> + ?LOG_DEBUG([{event, peer_from_the_wrong_network}, + {peer, ar_util:format_peer(Peer)}, {network, Network}]), + ok end. -load_totals() -> - Totals = ets:foldl( - fun - ({{peer, Metric, _Peer}, Performance}, Acc) -> - Total = maps:get(Metric, Acc, 0), - maps:put(Metric, Total + Performance#performance.rating, Acc); - (_, Acc) -> - Acc - end, - #{}, - ?MODULE - ), - - lists:foreach( - fun(Metric) -> - Total = maps:get(Metric, Totals, 0), - set_total_rating(Metric, Total) - end, - ?AVAILABLE_METRICS - ). - may_be_rotate_peer_ports(Peer) -> {IP, Port} = get_ip_port(Peer), case ets:lookup(?MODULE, {peer_ip, IP}) of @@ -700,29 +635,23 @@ rank_peers(ScoredPeers) -> ScoredSubnetPeers = maps:fold( fun(_Subnet, SubnetPeers, Acc) -> - element( - 2, - lists:foldl( - fun({Peer, Score}, {N, Acc2}) -> - %% At first we take the best peer from every subnet, - %% then take the second best from every subnet, etc. - {N + 1, [{Peer, {-N, Score}} | Acc2]} - end, - {0, Acc}, - SubnetPeers - ) - ) + element(2, lists:foldl( + fun({Peer, Score}, {N, Acc2}) -> + %% At first we take the best peer from every subnet, + %% then take the second best from every subnet, etc. + {N + 1, [{Peer, {-N, Score}} | Acc2]} + end, + {0, Acc}, + SubnetPeers + )) end, [], GroupedBySubnet ), - [ - Peer - || {Peer, _} <- lists:sort( - fun({_, S1}, {_, S2}) -> S1 >= S2 end, - ScoredSubnetPeers - ) - ]. + [Peer || {Peer, _} <- lists:sort( + fun({_, S1}, {_, S2}) -> S1 >= S2 end, + ScoredSubnetPeers + )]. check_peer(Peer) -> check_peer(Peer, not is_loopback_ip(Peer)). @@ -736,13 +665,11 @@ check_peer(Peer, IsPeerScopeValid) -> reject end. -update_rating(Peer, Metric, IsSuccess) -> - update_rating(Peer, Metric, undefined, undefined, 1, IsSuccess). -update_rating(Peer, Metric, LatencyMicroseconds, DataSize, Concurrency, IsSuccess) -> - %% only update available metrics - true = lists:member(Metric, ?AVAILABLE_METRICS), - Performance = get_or_init_performance(Peer, Metric), - Total = get_total_rating(Metric), +update_rating(Peer, IsSuccess) -> + update_rating(Peer, undefined, undefined, 1, IsSuccess). +update_rating(Peer, LatencyMicroseconds, DataSize, Concurrency, IsSuccess) -> + Performance = get_or_init_performance(Peer), + Total = get_total_rating(), LatencyMilliseconds = LatencyMicroseconds / 1000, #performance{ average_bytes = AverageBytes, @@ -791,8 +718,8 @@ update_rating(Peer, Metric, LatencyMicroseconds, DataSize, Concurrency, IsSucces }, Total2 = Total - Rating + Rating2, may_be_rotate_peer_ports(Peer), - set_performance(Peer, Metric, Performance2), - set_total_rating(Metric, Total2). + set_performance(Peer, Performance2), + set_total_rating(Total2). calculate_ema(OldEMA, Value, Alpha) -> Alpha * Value + (1 - Alpha) * OldEMA. @@ -816,15 +743,10 @@ remove_peer(RemovedPeer) -> {event, remove_peer}, {peer, ar_util:format_peer(RemovedPeer)} ]), - lists:foreach( - fun(Metric) -> - Performance = get_or_init_performance(RemovedPeer, Metric), - Total = get_total_rating(Metric), - set_total_rating(Metric, Total - Performance#performance.rating), - ets:delete(?MODULE, {peer, RemovedPeer, Metric}) - end, - ?AVAILABLE_METRICS - ), + Performance = get_or_init_performance(RemovedPeer), + Total = get_total_rating(), + set_total_rating(Total - Performance#performance.rating), + ets:delete(?MODULE, {peer, RemovedPeer}), remove_peer_port(RemovedPeer). remove_peer_port(Peer) -> @@ -864,8 +786,8 @@ store_peers() -> Records = ets:foldl( fun - ({{peer, Peer, Metric}, Performance}, Acc) -> - [{Peer, Metric, Performance} | Acc]; + ({{peer, Peer}, Performance}, Acc) -> + [{Peer, Performance} | Acc]; (_, Acc) -> Acc end, From ab54e3be53302e2819158f13cb65d6426cfcc3da Mon Sep 17 00:00:00 2001 From: James Piechota Date: Fri, 7 Jul 2023 16:13:03 +0000 Subject: [PATCH 27/30] Cleanup how we rate new blocks that are fetched vs. gossiped --- apps/arweave/src/ar_block_pre_validator.erl | 149 +++++++++--------- apps/arweave/src/ar_http_iface_middleware.erl | 3 +- apps/arweave/src/ar_peers.erl | 46 +++--- apps/arweave/src/ar_poller.erl | 5 +- apps/arweave/src/ar_poller_worker.erl | 2 +- 5 files changed, 106 insertions(+), 99 deletions(-) diff --git a/apps/arweave/src/ar_block_pre_validator.erl b/apps/arweave/src/ar_block_pre_validator.erl index c2be89faa..41c088d55 100644 --- a/apps/arweave/src/ar_block_pre_validator.erl +++ b/apps/arweave/src/ar_block_pre_validator.erl @@ -2,7 +2,7 @@ -behaviour(gen_server). --export([start_link/2, pre_validate/3]). +-export([start_link/2, pre_validate/4]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -40,14 +40,20 @@ start_link(Name, Workers) -> %% Bigger-height blocks from better-rated peers have higher priority. Additionally, %% the processing is throttled by IP and solution hash. %% Returns: ok, invalid, skipped -pre_validate(B, Peer, Timestamp) -> +pre_validate(B, Peer, QueryBlockTime, ReceiveTimestamp) -> #block{ indep_hash = H } = B, - case ar_ignore_registry:member(H) of + ValidationStatus = case ar_ignore_registry:member(H) of true -> skipped; false -> - pre_validate_is_peer_banned(B, Peer, Timestamp) - end. + pre_validate_is_peer_banned(B, Peer, QueryBlockTime) + end, + case ValidationStatus of + ok -> record_block_pre_validation_time(ReceiveTimestamp); + _ -> ok + end, + ValidationStatus. + %%%=================================================================== %%% gen_server callbacks. @@ -178,15 +184,15 @@ terminate(_Reason, _State) -> %%% Private functions. %%%=================================================================== -pre_validate_is_peer_banned(B, Peer, Timestamp) -> +pre_validate_is_peer_banned(B, Peer, QueryBlockTime) -> case ar_blacklist_middleware:is_peer_banned(Peer) of not_banned -> - pre_validate_previous_block(B, Peer, Timestamp); + pre_validate_previous_block(B, Peer, QueryBlockTime); banned -> skipped end. -pre_validate_previous_block(B, Peer, Timestamp) -> +pre_validate_previous_block(B, Peer, QueryBlockTime) -> PrevH = B#block.previous_block, case ar_node:get_block_shadow_from_cache(PrevH) of not_found -> @@ -205,28 +211,28 @@ pre_validate_previous_block(B, Peer, Timestamp) -> PrevCDiff = B#block.previous_cumulative_diff, case PrevB#block.cumulative_diff == PrevCDiff of true -> - pre_validate_indep_hash(B, PrevB, Peer, Timestamp); + pre_validate_indep_hash(B, PrevB, Peer, QueryBlockTime); false -> invalid end; false -> - pre_validate_may_be_fetch_chunk(B, PrevB, Peer, Timestamp) + pre_validate_may_be_fetch_chunk(B, PrevB, Peer, QueryBlockTime) end end end. -pre_validate_indep_hash(#block{ indep_hash = H } = B, PrevB, Peer, Timestamp) -> +pre_validate_indep_hash(#block{ indep_hash = H } = B, PrevB, Peer, QueryBlockTime) -> case catch compute_hash(B, PrevB#block.cumulative_diff) of {ok, {BDS, H}} -> ar_ignore_registry:add_temporary(H, 5000), - pre_validate_timestamp(B, BDS, PrevB, Peer, Timestamp); + pre_validate_timestamp(B, BDS, PrevB, Peer, QueryBlockTime); {ok, H} -> case ar_ignore_registry:permanent_member(H) of true -> skipped; false -> ar_ignore_registry:add_temporary(H, 5000), - pre_validate_timestamp(B, none, PrevB, Peer, Timestamp) + pre_validate_timestamp(B, none, PrevB, Peer, QueryBlockTime) end; {error, invalid_signature} -> post_block_reject_warn(B, check_signature, Peer), @@ -238,11 +244,11 @@ pre_validate_indep_hash(#block{ indep_hash = H } = B, PrevB, Peer, Timestamp) -> invalid end. -pre_validate_timestamp(B, BDS, PrevB, Peer, Timestamp) -> +pre_validate_timestamp(B, BDS, PrevB, Peer, QueryBlockTime) -> #block{ indep_hash = H } = B, case ar_block:verify_timestamp(B, PrevB) of true -> - pre_validate_existing_solution_hash(B, BDS, PrevB, Peer, Timestamp); + pre_validate_existing_solution_hash(B, BDS, PrevB, Peer, QueryBlockTime); false -> post_block_reject_warn(B, check_timestamp, Peer, [{block_time, B#block.timestamp}, {current_time, os:system_time(seconds)}]), @@ -251,10 +257,10 @@ pre_validate_timestamp(B, BDS, PrevB, Peer, Timestamp) -> invalid end. -pre_validate_existing_solution_hash(B, BDS, PrevB, Peer, Timestamp) -> +pre_validate_existing_solution_hash(B, BDS, PrevB, Peer, QueryBlockTime) -> case B#block.height >= ar_fork:height_2_6() of false -> - pre_validate_last_retarget(B, BDS, PrevB, false, Peer, Timestamp); + pre_validate_last_retarget(B, BDS, PrevB, false, Peer, QueryBlockTime); true -> SolutionH = B#block.hash, #block{ hash = SolutionH, nonce = Nonce, reward_addr = RewardAddr, @@ -311,7 +317,7 @@ pre_validate_existing_solution_hash(B, BDS, PrevB, Peer, Timestamp) -> case ValidatedCachedSolutionDiff of not_found -> pre_validate_nonce_limiter_global_step_number(B, BDS, PrevB, false, Peer, - Timestamp); + QueryBlockTime); invalid -> post_block_reject_warn(B, check_resigned_solution_hash, Peer), ar_events:send(block, {rejected, invalid_resigned_solution_hash, @@ -319,7 +325,7 @@ pre_validate_existing_solution_hash(B, BDS, PrevB, Peer, Timestamp) -> invalid; {valid, B3} -> pre_validate_nonce_limiter_global_step_number(B3, BDS, PrevB, true, Peer, - Timestamp) + QueryBlockTime) end end. @@ -353,7 +359,8 @@ get_last_step_prev_output(B) -> PrevOutput end. -pre_validate_nonce_limiter_global_step_number(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) -> +pre_validate_nonce_limiter_global_step_number(B, BDS, PrevB, SolutionResigned, Peer, + QueryBlockTime) -> BlockInfo = B#block.nonce_limiter_info, StepNumber = BlockInfo#nonce_limiter_info.global_step_number, PrevBlockInfo = PrevB#block.nonce_limiter_info, @@ -387,10 +394,10 @@ pre_validate_nonce_limiter_global_step_number(B, BDS, PrevB, SolutionResigned, P true -> prometheus_gauge:set(block_vdf_advance, StepNumber - CurrentStepNumber), pre_validate_previous_solution_hash(B, BDS, PrevB, SolutionResigned, Peer, - Timestamp) + QueryBlockTime) end. -pre_validate_previous_solution_hash(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) -> +pre_validate_previous_solution_hash(B, BDS, PrevB, SolutionResigned, Peer, QueryBlockTime) -> case B#block.previous_solution_hash == PrevB#block.hash of false -> post_block_reject_warn(B, check_previous_solution_hash, Peer), @@ -399,17 +406,17 @@ pre_validate_previous_solution_hash(B, BDS, PrevB, SolutionResigned, Peer, Times B#block.indep_hash, Peer}), invalid; true -> - pre_validate_last_retarget(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) + pre_validate_last_retarget(B, BDS, PrevB, SolutionResigned, Peer, QueryBlockTime) end. -pre_validate_last_retarget(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) -> +pre_validate_last_retarget(B, BDS, PrevB, SolutionResigned, Peer, QueryBlockTime) -> case B#block.height >= ar_fork:height_2_6() of false -> - pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp); + pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, QueryBlockTime); true -> case ar_block:verify_last_retarget(B, PrevB) of true -> - pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp); + pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, QueryBlockTime); false -> post_block_reject_warn(B, check_last_retarget, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), @@ -419,7 +426,7 @@ pre_validate_last_retarget(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) -> end end. -pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) -> +pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, QueryBlockTime) -> DiffValid = case B#block.height >= ar_fork:height_2_6() of true -> @@ -430,7 +437,7 @@ pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) -> case DiffValid of true -> pre_validate_cumulative_difficulty(B, BDS, PrevB, SolutionResigned, Peer, - Timestamp); + QueryBlockTime); _ -> post_block_reject_warn(B, check_difficulty, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), @@ -438,7 +445,7 @@ pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) -> invalid end. -pre_validate_cumulative_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timestamp) -> +pre_validate_cumulative_difficulty(B, BDS, PrevB, SolutionResigned, Peer, QueryBlockTime) -> case B#block.height >= ar_fork:height_2_6() of true -> case ar_block:verify_cumulative_diff(B, PrevB) of @@ -452,17 +459,17 @@ pre_validate_cumulative_difficulty(B, BDS, PrevB, SolutionResigned, Peer, Timest case SolutionResigned of true -> gen_server:cast(?MODULE, {enqueue, {B, PrevB, true, Peer, - Timestamp}}), + QueryBlockTime}}), ok; false -> - pre_validate_quick_pow(B, PrevB, false, Peer, Timestamp) + pre_validate_quick_pow(B, PrevB, false, Peer, QueryBlockTime) end end; false -> - pre_validate_pow(B, BDS, PrevB, Peer, Timestamp) + pre_validate_pow(B, BDS, PrevB, Peer, QueryBlockTime) end. -pre_validate_quick_pow(B, PrevB, SolutionResigned, Peer, Timestamp) -> +pre_validate_quick_pow(B, PrevB, SolutionResigned, Peer, QueryBlockTime) -> #block{ hash_preimage = HashPreimage, diff = Diff, nonce_limiter_info = NonceLimiterInfo, partition_number = PartitionNumber, reward_addr = RewardAddr } = B, PrevNonceLimiterInfo = get_prev_nonce_limiter_info(PrevB), @@ -492,7 +499,7 @@ pre_validate_quick_pow(B, PrevB, SolutionResigned, Peer, Timestamp) -> invalid; true -> gen_server:cast(?MODULE, {enqueue, {B, PrevB, SolutionResigned, Peer, - Timestamp}}), + QueryBlockTime}}), ok end end. @@ -515,7 +522,7 @@ get_prev_nonce_limiter_info(#block{ indep_hash = PrevH, height = PrevHeight } = PrevB#block.nonce_limiter_info end. -pre_validate_nonce_limiter_seed_data(B, PrevB, SolutionResigned, Peer, Timestamp) -> +pre_validate_nonce_limiter_seed_data(B, PrevB, SolutionResigned, Peer, QueryBlockTime) -> Info = B#block.nonce_limiter_info, #nonce_limiter_info{ global_step_number = StepNumber, seed = Seed, next_seed = NextSeed, partition_upper_bound = PartitionUpperBound, @@ -533,7 +540,7 @@ pre_validate_nonce_limiter_seed_data(B, PrevB, SolutionResigned, Peer, Timestamp NextPartitionUpperBound} of true -> pre_validate_partition_number(B, PrevB, PartitionUpperBound, - SolutionResigned, Peer, Timestamp); + SolutionResigned, Peer, QueryBlockTime); false -> post_block_reject_warn(B, check_nonce_limiter_seed_data, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), @@ -543,7 +550,8 @@ pre_validate_nonce_limiter_seed_data(B, PrevB, SolutionResigned, Peer, Timestamp end end. -pre_validate_partition_number(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, Timestamp) -> +pre_validate_partition_number(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, + QueryBlockTime) -> Max = max(0, PartitionUpperBound div ?PARTITION_SIZE - 1), case B#block.partition_number > Max of true -> @@ -553,10 +561,11 @@ pre_validate_partition_number(B, PrevB, PartitionUpperBound, SolutionResigned, P Peer}), invalid; false -> - pre_validate_nonce(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, Timestamp) + pre_validate_nonce(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, + QueryBlockTime) end. -pre_validate_nonce(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, Timestamp) -> +pre_validate_nonce(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, QueryBlockTime) -> Max = max(0, (?RECALL_RANGE_SIZE) div ?DATA_CHUNK_SIZE - 1), case B#block.nonce > Max of true -> @@ -567,15 +576,15 @@ pre_validate_nonce(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, Timest false -> case SolutionResigned of true -> - accept_block(B, Peer, Timestamp, false); + accept_block(B, Peer, QueryBlockTime, false); false -> pre_validate_may_be_fetch_first_chunk(B, PrevB, PartitionUpperBound, Peer, - Timestamp) + QueryBlockTime) end end. pre_validate_may_be_fetch_first_chunk(#block{ recall_byte = RecallByte, - poa = #poa{ chunk = <<>> } } = B, PrevB, PartitionUpperBound, Peer, Timestamp) + poa = #poa{ chunk = <<>> } } = B, PrevB, PartitionUpperBound, Peer, QueryBlockTime) when RecallByte /= undefined -> case ar_data_sync:get_chunk(RecallByte + 1, #{ pack => true, packing => {spora_2_6, B#block.reward_addr}, bucket_based_offset => true }) of @@ -584,17 +593,17 @@ pre_validate_may_be_fetch_first_chunk(#block{ recall_byte = RecallByte, B2 = B#block{ poa = #poa{ chunk = Chunk, data_path = DataPath, tx_path = TXPath } }, pre_validate_may_be_fetch_second_chunk(B2, PrevB, PartitionUpperBound, - Peer, Timestamp); + Peer, QueryBlockTime); _ -> ar_events:send(block, {rejected, failed_to_fetch_first_chunk, B#block.indep_hash, Peer}), invalid end; -pre_validate_may_be_fetch_first_chunk(B, PrevB, PartitionUpperBound, Peer, Timestamp) -> - pre_validate_may_be_fetch_second_chunk(B, PrevB, PartitionUpperBound, Peer, Timestamp). +pre_validate_may_be_fetch_first_chunk(B, PrevB, PartitionUpperBound, Peer, QueryBlockTime) -> + pre_validate_may_be_fetch_second_chunk(B, PrevB, PartitionUpperBound, Peer, QueryBlockTime). pre_validate_may_be_fetch_second_chunk(#block{ recall_byte2 = RecallByte2, - poa2 = #poa{ chunk = <<>> } } = B, PrevB, PartitionUpperBound, Peer, Timestamp) + poa2 = #poa{ chunk = <<>> } } = B, PrevB, PartitionUpperBound, Peer, QueryBlockTime) when RecallByte2 /= undefined -> case ar_data_sync:get_chunk(RecallByte2 + 1, #{ pack => true, packing => {spora_2_6, B#block.reward_addr}, bucket_based_offset => true }) of @@ -602,16 +611,16 @@ pre_validate_may_be_fetch_second_chunk(#block{ recall_byte2 = RecallByte2, prometheus_counter:inc(block2_fetched_chunks), B2 = B#block{ poa2 = #poa{ chunk = Chunk, data_path = DataPath, tx_path = TXPath } }, - pre_validate_pow_2_6(B2, PrevB, PartitionUpperBound, Peer, Timestamp); + pre_validate_pow_2_6(B2, PrevB, PartitionUpperBound, Peer, QueryBlockTime); _ -> ar_events:send(block, {rejected, failed_to_fetch_second_chunk, B#block.indep_hash, Peer}), invalid end; -pre_validate_may_be_fetch_second_chunk(B, PrevB, PartitionUpperBound, Peer, Timestamp) -> - pre_validate_pow_2_6(B, PrevB, PartitionUpperBound, Peer, Timestamp). +pre_validate_may_be_fetch_second_chunk(B, PrevB, PartitionUpperBound, Peer, QueryBlockTime) -> + pre_validate_pow_2_6(B, PrevB, PartitionUpperBound, Peer, QueryBlockTime). -pre_validate_pow_2_6(B, PrevB, PartitionUpperBound, Peer, Timestamp) -> +pre_validate_pow_2_6(B, PrevB, PartitionUpperBound, Peer, QueryBlockTime) -> NonceLimiterInfo = B#block.nonce_limiter_info, NonceLimiterOutput = NonceLimiterInfo#nonce_limiter_info.output, PrevNonceLimiterInfo = get_prev_nonce_limiter_info(PrevB), @@ -624,14 +633,14 @@ pre_validate_pow_2_6(B, PrevB, PartitionUpperBound, Peer, Timestamp) -> andalso Preimage1 == B#block.hash_preimage andalso B#block.recall_byte2 == undefined of true -> - pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, Timestamp); + pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, QueryBlockTime); false -> Chunk2 = (B#block.poa2)#poa.chunk, {H2, Preimage2} = ar_block:compute_h2(H1, Chunk2, H0), case H2 == B#block.hash andalso binary:decode_unsigned(H2, big) > B#block.diff andalso Preimage2 == B#block.hash_preimage of true -> - pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, Timestamp); + pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, QueryBlockTime); false -> post_block_reject_warn(B, check_pow, Peer), ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), @@ -640,7 +649,7 @@ pre_validate_pow_2_6(B, PrevB, PartitionUpperBound, Peer, Timestamp) -> end end. -pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, Timestamp) -> +pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, QueryBlockTime) -> {RecallRange1Start, RecallRange2Start} = ar_block:get_recall_range(H0, B#block.partition_number, PartitionUpperBound), RecallByte1 = RecallRange1Start + B#block.nonce * ?DATA_CHUNK_SIZE, @@ -661,7 +670,7 @@ pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, Timestamp) -> true -> case B#block.hash == H1 of true -> - pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp); + pre_validate_nonce_limiter(B, PrevB, Peer, QueryBlockTime); false -> RecallByte2 = RecallRange2Start + B#block.nonce * ?DATA_CHUNK_SIZE, {BlockStart2, BlockEnd2, TXRoot2} = ar_block_index:get_block_bounds( @@ -682,12 +691,12 @@ pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, Timestamp) -> B#block.indep_hash, Peer}), invalid; true -> - pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp) + pre_validate_nonce_limiter(B, PrevB, Peer, QueryBlockTime) end end end. -pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp) -> +pre_validate_nonce_limiter(B, PrevB, Peer, QueryBlockTime) -> PrevOutput = get_last_step_prev_output(B), case ar_nonce_limiter:validate_last_step_checkpoints(B, PrevB, PrevOutput) of {false, cache_mismatch} -> @@ -701,37 +710,36 @@ pre_validate_nonce_limiter(B, PrevB, Peer, Timestamp) -> ar_events:send(block, {rejected, invalid_nonce_limiter, B#block.indep_hash, Peer}), invalid; {true, cache_match} -> - accept_block(B, Peer, Timestamp, true); + accept_block(B, Peer, QueryBlockTime, true); true -> - accept_block(B, Peer, Timestamp, false) + accept_block(B, Peer, QueryBlockTime, false) end. -accept_block(B, Peer, Timestamp, Gossip) -> +accept_block(B, Peer, QueryBlockTime, Gossip) -> ar_ignore_registry:add(B#block.indep_hash), - ar_events:send(block, {new, B, #{ source => {peer, Peer}, gossip => Gossip }}), - ar_peers:rate_gossiped_data(Peer, byte_size(term_to_binary(B))), - record_block_pre_validation_time(Timestamp), + ar_events:send(block, {new, B, + #{ source => {peer, Peer}, query_block_time => QueryBlockTime, gossip => Gossip }}), ?LOG_INFO([{event, accepted_block}, {height, B#block.height}, {indep_hash, ar_util:encode(B#block.indep_hash)}]), ok. pre_validate_may_be_fetch_chunk(#block{ recall_byte = RecallByte, - poa = #poa{ chunk = <<>> } } = B, PrevB, Peer, Timestamp) when RecallByte /= undefined -> + poa = #poa{ chunk = <<>> } } = B, PrevB, Peer, QueryBlockTime) when RecallByte /= undefined -> Options = #{ pack => false, packing => spora_2_5, bucket_based_offset => true }, case ar_data_sync:get_chunk(RecallByte + 1, Options) of {ok, #{ chunk := Chunk, data_path := DataPath, tx_path := TXPath }} -> prometheus_counter:inc(block2_fetched_chunks), B2 = B#block{ poa = #poa{ chunk = Chunk, tx_path = TXPath, data_path = DataPath } }, - pre_validate_indep_hash(B2, PrevB, Peer, Timestamp); + pre_validate_indep_hash(B2, PrevB, Peer, QueryBlockTime); _ -> ar_events:send(block, {rejected, failed_to_fetch_chunk, B#block.indep_hash, Peer}), invalid end; -pre_validate_may_be_fetch_chunk(B, PrevB, Peer, Timestamp) -> - pre_validate_indep_hash(B, PrevB, Peer, Timestamp). +pre_validate_may_be_fetch_chunk(B, PrevB, Peer, QueryBlockTime) -> + pre_validate_indep_hash(B, PrevB, Peer, QueryBlockTime). -pre_validate_pow(B, BDS, PrevB, Peer, Timestamp) -> +pre_validate_pow(B, BDS, PrevB, Peer, QueryBlockTime) -> #block{ indep_hash = PrevH } = PrevB, MaybeValid = case ar_node:get_recent_partition_upper_bound_by_prev_h(PrevH) of @@ -751,10 +759,9 @@ pre_validate_pow(B, BDS, PrevB, Peer, Timestamp) -> %% corresponding transaction identifiers so that we can gossip them to %% peers who miss them along with the block. B2 = B#block{ txs = include_transactions(B#block.txs) }, - ar_events:send(block, {new, B2, #{ source => {peer, Peer}, - recall_byte => RecallByte }}), - ar_peers:rate_gossiped_data(Peer, byte_size(term_to_binary(B2))), - record_block_pre_validation_time(Timestamp), + ar_events:send(block, {new, B2, #{ + source => {peer, Peer}, query_block_time => QueryBlockTime, + recall_byte => RecallByte }}), prometheus_counter:inc(block2_received_transactions, count_received_transactions(B#block.txs)), ?LOG_INFO([{event, accepted_block}, {indep_hash, ar_util:encode(H)}]), diff --git a/apps/arweave/src/ar_http_iface_middleware.erl b/apps/arweave/src/ar_http_iface_middleware.erl index 0c04e5143..06a1d0801 100644 --- a/apps/arweave/src/ar_http_iface_middleware.erl +++ b/apps/arweave/src/ar_http_iface_middleware.erl @@ -2358,8 +2358,7 @@ post_block(enqueue_block, {B, Peer}, Req, ReceiveTimestamp) -> end end, ?LOG_INFO([{event, received_block}, {block, ar_util:encode(B#block.indep_hash)}]), - ValidationStatus = ar_block_pre_validator:pre_validate(B2, Peer, ReceiveTimestamp), - % ar_peers:rate_gossiped_data(Peer, byte_size(term_to_binary(B2)), ValidationStatus), + ar_block_pre_validator:pre_validate(B2, Peer, undefined, ReceiveTimestamp), {200, #{}, <<"OK">>, Req}. encode_txids([]) -> diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index da0458c71..4e4f009de 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -11,7 +11,7 @@ -export([start_link/0, get_peers/0, get_peer_performances/1, get_trusted_peers/0, is_public_peer/1, get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, resolve_and_cache_peer/2, - rate_response/4, rate_fetched_data/4, rate_fetched_data/6, rate_gossiped_data/2 + rate_response/4, rate_fetched_data/4, rate_fetched_data/6, rate_gossiped_data/3 ]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -162,12 +162,12 @@ rate_fetched_data(Peer, DataType, LatencyMicroseconds, DataSize) -> rate_fetched_data(Peer, DataType, ok, LatencyMicroseconds, DataSize, 1). rate_fetched_data(Peer, DataType, ok, LatencyMicroseconds, DataSize, Concurrency) -> gen_server:cast(?MODULE, - {fetched_data, DataType, Peer, LatencyMicroseconds, DataSize, Concurrency}); + {fetched_data, Peer, DataType, LatencyMicroseconds, DataSize, Concurrency}); rate_fetched_data(Peer, DataType, _, _LatencyMicroseconds, _DataSize, _Concurrency) -> - gen_server:cast(?MODULE, {invalid_fetched_data, DataType, Peer}). + gen_server:cast(?MODULE, {invalid_fetched_data, Peer, DataType}). -rate_gossiped_data(Peer, DataSize) -> - gen_server:cast(?MODULE, {gossiped_data, Peer, DataSize}). +rate_gossiped_data(Peer, DataType, DataSize) -> + gen_server:cast(?MODULE, {gossiped_data, Peer, DataType, DataSize}). %% @doc Print statistics about the current peers. stats() -> @@ -299,7 +299,7 @@ handle_cast({rate_response, Peer, PathLabel, get, Status}, State) -> ]), {noreply, State}; -handle_cast({fetched_data, DataType, Peer, LatencyMicroseconds, DataSize, Concurrency}, State) -> +handle_cast({fetched_data, Peer, DataType, LatencyMicroseconds, DataSize, Concurrency}, State) -> ?LOG_DEBUG([ {event, update_rating}, {update_type, fetched_data}, @@ -313,7 +313,7 @@ handle_cast({fetched_data, DataType, Peer, LatencyMicroseconds, DataSize, Concur {noreply, State}; -handle_cast({invalid_fetched_data, DataType, Peer}, State) -> +handle_cast({invalid_fetched_data, Peer, DataType}, State) -> ?LOG_DEBUG([ {event, update_rating}, {update_type, invalid_fetched_data}, @@ -323,7 +323,7 @@ handle_cast({invalid_fetched_data, DataType, Peer}, State) -> update_rating(Peer, false), {noreply, State}; -handle_cast({gossiped_data, Peer, DataSize}, State) -> +handle_cast({gossiped_data, Peer, DataType, DataSize}, State) -> case check_peer(Peer) of ok -> %% Since gossiped data is pushed to us we don't know the latency, but we do want @@ -343,11 +343,12 @@ handle_cast({gossiped_data, Peer, DataSize}, State) -> ?LOG_DEBUG([ {event, update_rating}, {update_type, gossiped_data}, + {data_type, DataType}, {peer, ar_util:format_peer(Peer)}, {latency, LatencyMicroseconds / 1000}, {data_size, DataSize} ]), - update_rating(Peer, overall, LatencyMicroseconds, DataSize, 1, true); + update_rating(Peer, LatencyMicroseconds, DataSize, 1, true); _ -> ok end, @@ -362,16 +363,6 @@ handle_info({event, peer, {made_request, Peer, Release}}, State) -> add_peer(Peer, Release), {noreply, State}; -handle_info({event, peer, {fetched_tx, Peer, TimeDelta, Size}}, State) -> - % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - % update_rating(Peer, TimeDelta, Size), - {noreply, State}; - -handle_info({event, peer, {fetched_block, Peer, TimeDelta, Size}}, State) -> - % ?LOG_DEBUG([{event, update_rating}, {type, fetched_tx}, {peer, ar_util:format_peer(Peer)}, {time_delta, TimeDelta}, {size, Size}]), - % update_rating(Peer, TimeDelta, Size), - {noreply, State}; - handle_info({event, peer, {bad_response, {Peer, _Type, _Reason}}}, State) -> issue_warning(Peer), {noreply, State}; @@ -392,6 +383,15 @@ handle_info({event, block, {rejected, failed_to_fetch_chunk, _H, Peer}}, State) issue_warning(Peer), {noreply, State}; +handle_info({event, block, {new, B, + #{ source := {peer, Peer}, query_block_tie := QueryBlockTime }}}, State) -> + DataSize = byte_size(term_to_binary(B)), + case QueryBlockTime of + undefined -> ar_peers:rate_gossiped_data(Peer, block, DataSize); + _ -> ar_peers:rate_fetched_data(Peer, block, QueryBlockTime, DataSize) + end, + {noreply, State}; + handle_info({event, block, _}, State) -> {noreply, State}; @@ -727,12 +727,14 @@ calculate_ema(OldEMA, Value, Alpha) -> add_peer(Peer, Release) -> may_be_rotate_peer_ports(Peer), case ets:lookup(?MODULE, {peer, Peer}) of - [{_, Release}] -> + [{_, #performance{ release = Release }}] -> ok; - _ -> + [{_, Performance}] -> + set_performance(Peer, Performance#performance{ release = Release }); + [] -> case check_peer(Peer) of ok -> - ets:insert(?MODULE, [{{peer, Peer}, Release}]); + set_performance(Peer, #performance{ release = Release }); _ -> ok end diff --git a/apps/arweave/src/ar_poller.erl b/apps/arweave/src/ar_poller.erl index ba927a042..1416b388c 100644 --- a/apps/arweave/src/ar_poller.erl +++ b/apps/arweave/src/ar_poller.erl @@ -133,7 +133,7 @@ handle_cast(Msg, State) -> ?LOG_ERROR([{event, unhandled_cast}, {module, ?MODULE}, {message, Msg}]), {noreply, State}. -handle_info({event, block, {discovered, Peer, B, ElapsedMicroseconds, Size}}, State) -> +handle_info({event, block, {discovered, Peer, B, QueryBlockTime}}, State) -> case ar_ignore_registry:member(B#block.indep_hash) of false -> ?LOG_INFO([{event, fetched_block_for_validation}, @@ -142,8 +142,7 @@ handle_info({event, block, {discovered, Peer, B, ElapsedMicroseconds, Size}}, St true -> ok end, - ValidationStatus = ar_block_pre_validator:pre_validate(B, Peer, erlang:timestamp()), - ar_peers:rate_fetched_data(Peer, block, ValidationStatus, ElapsedMicroseconds, Size, 1), + ar_block_pre_validator:pre_validate(B, Peer, QueryBlockTime, erlang:timestamp()), {noreply, State}; handle_info({event, block, _}, State) -> {noreply, State}; diff --git a/apps/arweave/src/ar_poller_worker.erl b/apps/arweave/src/ar_poller_worker.erl index 92138ff06..c893f406a 100644 --- a/apps/arweave/src/ar_poller_worker.erl +++ b/apps/arweave/src/ar_poller_worker.erl @@ -97,7 +97,7 @@ handle_cast({poll, Ref}, #state{ ref = Ref, peer = Peer, {ok, TXs} -> B2 = B#block{ txs = TXs }, ar_ignore_registry:remove_temporary(H), - ar_events:send(block, {discovered, Peer, B2, Time, Size}), + ar_events:send(block, {discovered, Peer, B2, Time}), ok; failed -> ?LOG_WARNING([{event, failed_to_get_block_txs_from_peer}, From 9a4185d203112e453730bb1a90359b0220d48475 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Fri, 7 Jul 2023 16:15:35 +0000 Subject: [PATCH 28/30] some last Timestamp -> QueryBlockTime changes --- apps/arweave/src/ar_block_pre_validator.erl | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/apps/arweave/src/ar_block_pre_validator.erl b/apps/arweave/src/ar_block_pre_validator.erl index 41c088d55..b4cbfd5d0 100644 --- a/apps/arweave/src/ar_block_pre_validator.erl +++ b/apps/arweave/src/ar_block_pre_validator.erl @@ -78,7 +78,7 @@ handle_cast(pre_validate, #state{ pqueue = Q, size = Size, ip_timestamps = IPTim ar_util:cast_after(50, ?MODULE, pre_validate), {noreply, State}; false -> - {{_, {B, PrevB, SolutionResigned, Peer, Timestamp}}, + {{_, {B, PrevB, SolutionResigned, Peer, QueryBlockTime}}, Q2} = gb_sets:take_largest(Q), BlockSize = byte_size(term_to_binary(B)), Size2 = Size - BlockSize, @@ -110,7 +110,7 @@ handle_cast(pre_validate, #state{ pqueue = Q, size = Size, ip_timestamps = IPTim {previous_block, ar_util:encode(PrevB#block.indep_hash)}]), pre_validate_nonce_limiter_seed_data(B, PrevB, - SolutionResigned, Peer, Timestamp), + SolutionResigned, Peer, QueryBlockTime), {IPTimestamps2, HashTimestamps2}; false -> {IPTimestamps2, HashTimestamps} @@ -122,13 +122,13 @@ handle_cast(pre_validate, #state{ pqueue = Q, size = Size, ip_timestamps = IPTim end end; -handle_cast({enqueue, {B, PrevB, SolutionResigned, Peer, Timestamp}}, +handle_cast({enqueue, {B, PrevB, SolutionResigned, Peer, QueryBlockTime}}, State) -> #state{ pqueue = Q, size = Size } = State, Priority = priority(B, Peer), BlockSize = byte_size(term_to_binary(B)), Size2 = Size + BlockSize, - Q2 = gb_sets:add_element({Priority, {B, PrevB, SolutionResigned, Peer, Timestamp}}, Q), + Q2 = gb_sets:add_element({Priority, {B, PrevB, SolutionResigned, Peer, QueryBlockTime}}, Q), {Q3, Size3} = case Size2 > ?MAX_PRE_VALIDATION_QUEUE_SIZE of true -> From 0425e8802d3500eecd058b49060019ad8e720fb6 Mon Sep 17 00:00:00 2001 From: James Piechota Date: Fri, 7 Jul 2023 16:43:29 +0000 Subject: [PATCH 29/30] replace bad_response with issue_warning --- .../src/ar_block_propagation_worker.erl | 6 +-- apps/arweave/src/ar_data_discovery.erl | 2 +- apps/arweave/src/ar_data_sync.erl | 2 - apps/arweave/src/ar_http_iface_client.erl | 18 ++++---- apps/arweave/src/ar_peers.erl | 43 +++++++++---------- 5 files changed, 33 insertions(+), 38 deletions(-) diff --git a/apps/arweave/src/ar_block_propagation_worker.erl b/apps/arweave/src/ar_block_propagation_worker.erl index 12faac972..eab815901 100644 --- a/apps/arweave/src/ar_block_propagation_worker.erl +++ b/apps/arweave/src/ar_block_propagation_worker.erl @@ -49,12 +49,10 @@ handle_cast({send_block2, Peer, SendAnnouncementFun, SendFun, RetryCount, From}, {ok, {{<<"200">>, _}, _, Body, _, _}} -> case catch ar_serialize:binary_to_block_announcement_response(Body) of {'EXIT', Reason} -> - ar_events:send(peer, {bad_response, - {Peer, block_announcement, Reason}}), + ar_peers:issue_warning(Peer, block_announcement, Reason), From ! {worker_sent_block, self()}; {error, Reason} -> - ar_events:send(peer, {bad_response, - {Peer, block_announcement, Reason}}), + ar_peers:issue_warning(Peer, block_announcement, Reason), From ! {worker_sent_block, self()}; {ok, #block_announcement_response{ missing_tx_indices = L, missing_chunk = MissingChunk, missing_chunk2 = MissingChunk2 }} -> diff --git a/apps/arweave/src/ar_data_discovery.erl b/apps/arweave/src/ar_data_discovery.erl index 8ed48be30..5dde7510d 100644 --- a/apps/arweave/src/ar_data_discovery.erl +++ b/apps/arweave/src/ar_data_discovery.erl @@ -163,7 +163,7 @@ handle_info({'EXIT', _, normal}, State) -> handle_info({'DOWN', _, process, _, _}, #state{ peers_pending = N } = State) -> {noreply, State#state{ peers_pending = N - 1 }}; -handle_info({event, peer, {bad_response, {Peer, _Resource, _Reason}}}, State) -> +handle_info({event, peer, {removed, Peer}}, State) -> gen_server:cast(?MODULE, {remove_peer, Peer}), {noreply, State}; diff --git a/apps/arweave/src/ar_data_sync.erl b/apps/arweave/src/ar_data_sync.erl index b7dd36cad..79d721776 100644 --- a/apps/arweave/src/ar_data_sync.erl +++ b/apps/arweave/src/ar_data_sync.erl @@ -944,7 +944,6 @@ handle_cast({store_fetched_chunk, Peer, Byte, Proof} = Cast, State) -> ar_util:cast_after(1000, self(), Cast), {noreply, State}; false -> - % HANDLED ar_events:send(peer, {fetched_chunk, Peer, Time, TransferSize}), ar_packing_server:request_unpack(AbsoluteOffset, ChunkArgs), ?LOG_DEBUG([{event, requested_fetched_chunk_unpacking}, {data_path_hash, ar_util:encode(crypto:hash(sha256, @@ -964,7 +963,6 @@ handle_cast({store_fetched_chunk, Peer, Byte, Proof} = Cast, State) -> decrement_chunk_cache_size(), process_invalid_fetched_chunk(Peer, Byte, State); {true, DataRoot, TXStartOffset, ChunkEndOffset, TXSize, ChunkSize, ChunkID} -> - % HANDLED ar_events:send(peer, {fetched_chunk, Peer, Time, TransferSize}), AbsoluteTXStartOffset = BlockStartOffset + TXStartOffset, AbsoluteEndOffset = AbsoluteTXStartOffset + ChunkEndOffset, ChunkArgs = {unpacked, Chunk, AbsoluteEndOffset, TXRoot, ChunkSize}, diff --git a/apps/arweave/src/ar_http_iface_client.erl b/apps/arweave/src/ar_http_iface_client.erl index 0b64f1615..7310bb4c0 100644 --- a/apps/arweave/src/ar_http_iface_client.erl +++ b/apps/arweave/src/ar_http_iface_client.erl @@ -642,10 +642,10 @@ handle_get_recent_hash_list_response(Response) -> handle_get_recent_hash_list_diff_response({ok, {{<<"200">>, _}, _, Body, _, _}}, HL, Peer) -> case parse_recent_hash_list_diff(Body, HL) of {error, invalid_input} -> - ar_events:send(peer, {bad_response, {Peer, recent_hash_list_diff, invalid_input}}), + ar_peers:issue_warning(Peer, recent_hash_list_diff, invalid_input), {error, invalid_input}; {error, unknown_base} -> - ar_events:send(peer, {bad_response, {Peer, recent_hash_list_diff, unknown_base}}), + ar_peers:issue_warning(Peer, recent_hash_list_diff, unknown_base), {error, unknown_base}; {ok, Reply} -> {ok, Reply} @@ -810,7 +810,7 @@ get_tx_from_remote_peer(Peer, TXID) -> {peer, ar_util:format_peer(Peer)}, {tx, ar_util:encode(TXID)} ]), - ar_events:send(peer, {bad_response, {Peer, tx, invalid}}), + ar_peers:issue_warning(Peer, tx, invalid), {error, invalid_tx}; true -> ar_peers:rate_fetched_data(Peer, tx, Time, Size), @@ -965,7 +965,7 @@ handle_block_response(Peer, Encoding, {ok, {{<<"200">>, _}, _, Body, Start, End} ?LOG_INFO( "event: failed_to_parse_block_response, peer: ~s, reason: ~p", [ar_util:format_peer(Peer), Reason]), - ar_events:send(peer, {bad_response, {Peer, block, Reason}}), + ar_peers:issue_warning(Peer, block, Reason), not_found; {ok, B} -> {ok, B, End - Start, byte_size(term_to_binary(B))}; @@ -975,11 +975,11 @@ handle_block_response(Peer, Encoding, {ok, {{<<"200">>, _}, _, Body, Start, End} ?LOG_INFO( "event: failed_to_parse_block_response, peer: ~s, error: ~p", [ar_util:format_peer(Peer), Error]), - ar_events:send(peer, {bad_response, {Peer, block, Error}}), + ar_peers:issue_warning(Peer, block, Error), not_found end; handle_block_response(Peer, _Encoding, Response) -> - ar_events:send(peer, {bad_response, {Peer, block, Response}}), + ar_peers:issue_warning(Peer, block, Response), not_found. %% @doc Process the response of a GET /unconfirmed_tx call. @@ -1010,14 +1010,14 @@ handle_tx_response(Peer, Encoding, {ok, {{<<"200">>, _}, _, Body, Start, End}}) {ok, TX#tx{ data = <<>> }, End - Start, Size - DataSize} end; {'EXIT', Reason} -> - ar_events:send(peer, {bad_response, {Peer, tx, Reason}}), + ar_peers:issue_warning(Peer, tx, Reason), {error, Reason}; Reply -> - ar_events:send(peer, {bad_response, {Peer, tx, Reply}}), + ar_peers:issue_warning(Peer, tx, Reply), Reply end; handle_tx_response(Peer, _Encoding, Response) -> - ar_events:send(peer, {bad_response, {Peer, tx, Response}}), + ar_peers:issue_warning(Peer, tx, Response), {error, Response}. p2p_headers() -> diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index 4e4f009de..d7c31187b 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -169,6 +169,9 @@ rate_fetched_data(Peer, DataType, _, _LatencyMicroseconds, _DataSize, _Concurren rate_gossiped_data(Peer, DataType, DataSize) -> gen_server:cast(?MODULE, {gossiped_data, Peer, DataType, DataSize}). +issue_warning(Peer, _Type, _Reason) -> + gen_server:cast(?MODULE, {warning, Peer}). + %% @doc Print statistics about the current peers. stats() -> Connected = get_peers(), @@ -355,6 +358,16 @@ handle_cast({gossiped_data, Peer, DataType, DataSize}, State) -> {noreply, State}; +handle_cast({warning, Peer}, State) -> + Performance = update_rating(Peer, false), + case Performance#performance.average_success < ?MINIMUM_SUCCESS of + true -> + remove_peer(Peer); + false -> + ok + end, + {noreply, State}; + handle_cast(Cast, State) -> ?LOG_WARNING("event: unhandled_cast, cast: ~p", [Cast]), {noreply, State}. @@ -363,28 +376,24 @@ handle_info({event, peer, {made_request, Peer, Release}}, State) -> add_peer(Peer, Release), {noreply, State}; -handle_info({event, peer, {bad_response, {Peer, _Type, _Reason}}}, State) -> - issue_warning(Peer), - {noreply, State}; - handle_info({event, peer, {banned, BannedPeer}}, State) -> remove_peer(BannedPeer), {noreply, State}; handle_info({event, block, {rejected, failed_to_fetch_first_chunk, _H, Peer}}, State) -> - issue_warning(Peer), + issue_warning(Peer, block_rejected, failed_to_fetch_first_chunk), {noreply, State}; handle_info({event, block, {rejected, failed_to_fetch_second_chunk, _H, Peer}}, State) -> - issue_warning(Peer), + issue_warning(Peer, block_rejected, failed_to_fetch_second_chunk), {noreply, State}; handle_info({event, block, {rejected, failed_to_fetch_chunk, _H, Peer}}, State) -> - issue_warning(Peer), + issue_warning(Peer, block_rejected, failed_to_fetch_chunk), {noreply, State}; handle_info({event, block, {new, B, - #{ source := {peer, Peer}, query_block_tie := QueryBlockTime }}}, State) -> + #{ source := {peer, Peer}, query_block_time := QueryBlockTime }}}, State) -> DataSize = byte_size(term_to_binary(B)), case QueryBlockTime of undefined -> ar_peers:rate_gossiped_data(Peer, block, DataSize); @@ -719,7 +728,8 @@ update_rating(Peer, LatencyMicroseconds, DataSize, Concurrency, IsSuccess) -> Total2 = Total - Rating + Rating2, may_be_rotate_peer_ports(Peer), set_performance(Peer, Performance2), - set_total_rating(Total2). + set_total_rating(Total2), + Performance2. calculate_ema(OldEMA, Value, Alpha) -> Alpha * Value + (1 - Alpha) * OldEMA. @@ -749,7 +759,8 @@ remove_peer(RemovedPeer) -> Total = get_total_rating(), set_total_rating(Total - Performance#performance.rating), ets:delete(?MODULE, {peer, RemovedPeer}), - remove_peer_port(RemovedPeer). + remove_peer_port(RemovedPeer), + ar_events:send(peer, {removed, RemovedPeer}). remove_peer_port(Peer) -> {IP, Port} = get_ip_port(Peer), @@ -803,18 +814,6 @@ store_peers() -> ar_storage:write_term(peers, Records) end. -issue_warning(Peer) -> - Performance = get_or_init_performance(Peer), - Success = calculate_ema(Performance#performance.average_success, 0, ?SUCCESS_ALPHA), - case Success < ?MINIMUM_SUCCESS of - true -> - remove_peer(Peer); - false -> - Performance2 = Performance#performance{average_success = Success}, - may_be_rotate_peer_ports(Peer), - set_performance(Peer, Performance2) - end. - %%%=================================================================== %%% Tests. %%%=================================================================== From 4e98f13975e4f0b5e5a6577f61b3b441a5145c5b Mon Sep 17 00:00:00 2001 From: James Piechota Date: Fri, 7 Jul 2023 18:36:09 +0000 Subject: [PATCH 30/30] move ban logic into ar_peers --- apps/arweave/src/ar_blacklist_middleware.erl | 1 - apps/arweave/src/ar_block_pre_validator.erl | 15 +--- apps/arweave/src/ar_network_middleware.erl | 2 +- apps/arweave/src/ar_peers.erl | 78 ++++++++++++++------ 4 files changed, 57 insertions(+), 39 deletions(-) diff --git a/apps/arweave/src/ar_blacklist_middleware.erl b/apps/arweave/src/ar_blacklist_middleware.erl index 86a93220f..84d86273f 100644 --- a/apps/arweave/src/ar_blacklist_middleware.erl +++ b/apps/arweave/src/ar_blacklist_middleware.erl @@ -45,7 +45,6 @@ start() -> ban_peer(Peer, TTLSeconds) -> Key = {ban, peer_to_ip_addr(Peer)}, Expires = os:system_time(seconds) + TTLSeconds, - ar_events:send(peer, {banned, Peer}), ets:insert(?MODULE, {Key, Expires}). is_peer_banned(Peer) -> diff --git a/apps/arweave/src/ar_block_pre_validator.erl b/apps/arweave/src/ar_block_pre_validator.erl index b4cbfd5d0..a8d3b806a 100644 --- a/apps/arweave/src/ar_block_pre_validator.erl +++ b/apps/arweave/src/ar_block_pre_validator.erl @@ -401,7 +401,6 @@ pre_validate_previous_solution_hash(B, BDS, PrevB, SolutionResigned, Peer, Query case B#block.previous_solution_hash == PrevB#block.hash of false -> post_block_reject_warn(B, check_previous_solution_hash, Peer), - ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_previous_solution_hash, B#block.indep_hash, Peer}), invalid; @@ -419,7 +418,6 @@ pre_validate_last_retarget(B, BDS, PrevB, SolutionResigned, Peer, QueryBlockTime pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, QueryBlockTime); false -> post_block_reject_warn(B, check_last_retarget, Peer), - ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_last_retarget, B#block.indep_hash, Peer}), invalid @@ -440,7 +438,6 @@ pre_validate_difficulty(B, BDS, PrevB, SolutionResigned, Peer, QueryBlockTime) - QueryBlockTime); _ -> post_block_reject_warn(B, check_difficulty, Peer), - ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_difficulty, B#block.indep_hash, Peer}), invalid end. @@ -451,7 +448,6 @@ pre_validate_cumulative_difficulty(B, BDS, PrevB, SolutionResigned, Peer, QueryB case ar_block:verify_cumulative_diff(B, PrevB) of false -> post_block_reject_warn(B, check_cumulative_difficulty, Peer), - ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_cumulative_difficulty, B#block.indep_hash, Peer}), invalid; @@ -493,7 +489,6 @@ pre_validate_quick_pow(B, PrevB, SolutionResigned, Peer, QueryBlockTime) -> case binary:decode_unsigned(SolutionHash, big) > Diff of false -> post_block_reject_warn(B, check_hash_preimage, Peer), - ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_hash_preimage, B#block.indep_hash, Peer}), invalid; @@ -543,7 +538,6 @@ pre_validate_nonce_limiter_seed_data(B, PrevB, SolutionResigned, Peer, QueryBloc SolutionResigned, Peer, QueryBlockTime); false -> post_block_reject_warn(B, check_nonce_limiter_seed_data, Peer), - ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_nonce_limiter_seed_data, B#block.indep_hash, Peer}), invalid @@ -556,7 +550,6 @@ pre_validate_partition_number(B, PrevB, PartitionUpperBound, SolutionResigned, P case B#block.partition_number > Max of true -> post_block_reject_warn(B, check_partition_number, Peer), - ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_partition_number, B#block.indep_hash, Peer}), invalid; @@ -570,7 +563,6 @@ pre_validate_nonce(B, PrevB, PartitionUpperBound, SolutionResigned, Peer, QueryB case B#block.nonce > Max of true -> post_block_reject_warn(B, check_nonce, Peer), - ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_nonce, B#block.indep_hash, Peer}), invalid; false -> @@ -643,7 +635,6 @@ pre_validate_pow_2_6(B, PrevB, PartitionUpperBound, Peer, QueryBlockTime) -> pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, QueryBlockTime); false -> post_block_reject_warn(B, check_pow, Peer), - ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_pow, B#block.indep_hash, Peer}), invalid end @@ -664,7 +655,6 @@ pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, QueryBlockTime) -> invalid; false -> post_block_reject_warn(B, check_poa, Peer), - ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_poa, B#block.indep_hash, Peer}), invalid; true -> @@ -686,7 +676,6 @@ pre_validate_poa(B, PrevB, PartitionUpperBound, H0, H1, Peer, QueryBlockTime) -> invalid; false -> post_block_reject_warn(B, check_poa2, Peer), - ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_poa2, B#block.indep_hash, Peer}), invalid; @@ -702,11 +691,10 @@ pre_validate_nonce_limiter(B, PrevB, Peer, QueryBlockTime) -> {false, cache_mismatch} -> ar_ignore_registry:add(B#block.indep_hash), post_block_reject_warn(B, check_nonce_limiter, Peer), - ar_events:send(block, {rejected, invalid_nonce_limiter, B#block.indep_hash, Peer}), + ar_events:send(block, {rejected, invalid_nonce_limiter_cache_mismatch, B#block.indep_hash, Peer}), invalid; false -> post_block_reject_warn(B, check_nonce_limiter, Peer), - ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_nonce_limiter, B#block.indep_hash, Peer}), invalid; {true, cache_match} -> @@ -768,7 +756,6 @@ pre_validate_pow(B, BDS, PrevB, Peer, QueryBlockTime) -> ok; false -> post_block_reject_warn(B, check_pow, Peer), - ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), ar_events:send(block, {rejected, invalid_pow, B#block.indep_hash, Peer}), invalid end. diff --git a/apps/arweave/src/ar_network_middleware.erl b/apps/arweave/src/ar_network_middleware.erl index 34ab96416..7b0402b65 100644 --- a/apps/arweave/src/ar_network_middleware.erl +++ b/apps/arweave/src/ar_network_middleware.erl @@ -31,7 +31,7 @@ maybe_add_peer(Peer, Req) -> not_set -> ok; _ -> - ar_events:send(peer, {made_request, Peer, get_release(Req)}) + ar_peers:add_peer(Peer, get_release(Req)) end. wrong_network(Req) -> diff --git a/apps/arweave/src/ar_peers.erl b/apps/arweave/src/ar_peers.erl index d7c31187b..4ffbe2e2d 100644 --- a/apps/arweave/src/ar_peers.erl +++ b/apps/arweave/src/ar_peers.erl @@ -10,8 +10,9 @@ -include_lib("eunit/include/eunit.hrl"). -export([start_link/0, get_peers/0, get_peer_performances/1, get_trusted_peers/0, is_public_peer/1, - get_peer_release/1, stats/0, discover_peers/0, rank_peers/1, resolve_and_cache_peer/2, - rate_response/4, rate_fetched_data/4, rate_fetched_data/6, rate_gossiped_data/3 + get_peer_release/1, stats/0, discover_peers/0, add_peer/2, rank_peers/1, + resolve_and_cache_peer/2, rate_response/4, rate_fetched_data/4, rate_fetched_data/6, + rate_gossiped_data/3 ]). -export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2]). @@ -42,6 +43,34 @@ -define(THROUGHPUT_ALPHA, 0.1). -define(SUCCESS_ALPHA, 0.01). +%% When processing block rejected events for blocks received from a peer, we handle rejections +%% differently based on the rejection reason. +-define(BLOCK_REJECTION_WARNING, [ + failed_to_fetch_first_chunk, + failed_to_fetch_second_chunk, + failed_to_fetch_chunk +]). +-define(BLOCK_REJECTION_BAN, [ + invalid_previous_solution_hash, + invalid_last_retarget, + invalid_difficulty, + invalid_cumulative_difficulty, + invalid_hash_preimage, + invalid_nonce_limiter_seed_data, + invalid_partition_number, + invalid_nonce, + invalid_pow, + invalid_poa, + invalid_poa2, + invalid_nonce_limiter +]). +-define(BLOCK_REJECTION_IGNORE, [ + invalid_signature, + invalid_hash, + invalid_timestamp, + invalid_resigned_solution_hash, + invalid_nonce_limiter_cache_mismatch +]). %% We only do scoring of this many TCP ports per IP address. When there are not enough slots, %% we remove the peer from the first slot. @@ -172,6 +201,9 @@ rate_gossiped_data(Peer, DataType, DataSize) -> issue_warning(Peer, _Type, _Reason) -> gen_server:cast(?MODULE, {warning, Peer}). +add_peer(Peer, Release) -> + gen_server:cast(?MODULE, {add_peer, Peer, Release}). + %% @doc Print statistics about the current peers. stats() -> Connected = get_peers(), @@ -233,7 +265,7 @@ resolve_and_cache_peer(RawPeer, Type) -> init([]) -> process_flag(trap_exit, true), - [ok, ok] = ar_events:subscribe([peer, block]), + [ok, ok] = ar_events:subscribe(block), load_peers(), gen_server:cast(?MODULE, rank_peers), gen_server:cast(?MODULE, ping_peers), @@ -245,7 +277,7 @@ handle_call(Request, _From, State) -> {reply, ok, State}. handle_cast({add_peer, Peer, Release}, State) -> - add_peer(Peer, Release), + maybe_add_peer(Peer, Release), {noreply, State}; handle_cast(rank_peers, State) -> @@ -372,24 +404,24 @@ handle_cast(Cast, State) -> ?LOG_WARNING("event: unhandled_cast, cast: ~p", [Cast]), {noreply, State}. -handle_info({event, peer, {made_request, Peer, Release}}, State) -> - add_peer(Peer, Release), - {noreply, State}; - -handle_info({event, peer, {banned, BannedPeer}}, State) -> - remove_peer(BannedPeer), - {noreply, State}; +handle_info({event, block, {rejected, Reason, _H, Peer}}, State) when Peer /= no_peer -> + IssueBan = lists:member(Reason, ?BLOCK_REJECTION_BAN), + IssueWarning = lists:member(Reason, ?BLOCK_REJECTION_WARNING), + Ignore = lists:member(Reason, ?BLOCK_REJECTION_IGNORE), -handle_info({event, block, {rejected, failed_to_fetch_first_chunk, _H, Peer}}, State) -> - issue_warning(Peer, block_rejected, failed_to_fetch_first_chunk), - {noreply, State}; - -handle_info({event, block, {rejected, failed_to_fetch_second_chunk, _H, Peer}}, State) -> - issue_warning(Peer, block_rejected, failed_to_fetch_second_chunk), - {noreply, State}; - -handle_info({event, block, {rejected, failed_to_fetch_chunk, _H, Peer}}, State) -> - issue_warning(Peer, block_rejected, failed_to_fetch_chunk), + case {IssueBan, IssueWarning, Ignore} of + {true, false, false} -> + ar_blacklist_middleware:ban_peer(Peer, ?BAD_BLOCK_BAN_TIME), + remove_peer(Peer); + {false, true, false} -> + issue_warning(Peer, block_rejected, Reason); + {false, false, true} -> + %% ignore + ok; + _ -> + %% Ever reason should be in exactly 1 list. + error("invalid block rejection reason") + end, {noreply, State}; handle_info({event, block, {new, B, @@ -457,7 +489,7 @@ discover_peers([Peer | Peers]) -> ok -> case ar_http_iface_client:get_info(Peer, release) of {<<"release">>, Release} when is_integer(Release) -> - gen_server:cast(?MODULE, {add_peer, Peer, Release}); + maybe_add_peer(Peer, Release); _ -> ok end; @@ -734,7 +766,7 @@ update_rating(Peer, LatencyMicroseconds, DataSize, Concurrency, IsSuccess) -> calculate_ema(OldEMA, Value, Alpha) -> Alpha * Value + (1 - Alpha) * OldEMA. -add_peer(Peer, Release) -> +maybe_add_peer(Peer, Release) -> may_be_rotate_peer_ports(Peer), case ets:lookup(?MODULE, {peer, Peer}) of [{_, #performance{ release = Release }}] ->