Skip to content

Commit

Permalink
Merge pull request #9079 from MinaProtocol/compatible
Browse files Browse the repository at this point in the history
Merge back to develop
  • Loading branch information
mrmr1993 authored Jun 17, 2021
2 parents c720870 + 7901aa6 commit b137fbd
Show file tree
Hide file tree
Showing 716 changed files with 29,097 additions and 24,241 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ reformat: git_hooks
$(WRAPAPP) dune exec --profile=$(DUNE_PROFILE) src/app/reformat/reformat.exe -- -path .

reformat-diff:
ocamlformat --doc-comments=before --inplace $(shell git status -s | cut -c 4- | grep '\.mli\?$$' | while IFS= read -r f; do stat "$$f" >/dev/null 2>&1 && echo "$$f"; done) || true
@ocamlformat --doc-comments=before --inplace $(shell git status -s | cut -c 4- | grep '\.mli\?$$' | while IFS= read -r f; do stat "$$f" >/dev/null 2>&1 && echo "$$f"; done) || true

check-format:
$(WRAPAPP) dune exec --profile=$(DUNE_PROFILE) src/app/reformat/reformat.exe -- -path . -check
Expand Down
7 changes: 7 additions & 0 deletions automation/services/watchdog/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ def check_google_storage_bucket(v1, namespace, recent_google_bucket_blocks):

newest_age = min([ age for age in blob_ages ])

end = time.time()

print("Checking google storage bucket took {} seconds".format(end-now))

recent_google_bucket_blocks.set(newest_age)

# ========================================================================
Expand Down Expand Up @@ -125,6 +129,7 @@ def get_chain_id(v1, namespace):

def check_seed_list_up(v1, namespace, seeds_reachable):
print('checking seed list up')
start = time.time()

seed_peers_list_url = os.environ.get('SEED_PEERS_URL')

Expand All @@ -150,6 +155,8 @@ def check_seed_list_up(v1, namespace, seeds_reachable):
res = json.loads(val)
#checklibp2p returns whether or not the connection to a peerID errored
fraction_up = sum(res.values())/len(res.values())
end = time.time()
print("checking seed connection took {} seconds".format(end-start))
seeds_reachable.set(fraction_up)

# ========================================================================
12 changes: 12 additions & 0 deletions automation/services/watchdog/node_status_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ def peer_to_multiaddr(peer):
def collect_node_status_metrics(v1, namespace, nodes_synced_near_best_tip, nodes_synced, nodes_queried, nodes_responded, seed_nodes_queried, seed_nodes_responded, nodes_errored, context_deadline_exceeded, failed_security_protocol_negotiation, connection_refused_errors, size_limit_exceeded_errors, timed_out_errors, stream_reset_errors, other_connection_errors, prover_errors):
print('collecting node status metrics')

start = time.time()

pods = v1.list_namespaced_pod(namespace, watch=False)

pod_names = [ p['metadata']['name'] for p in pods.to_dict()['items'] if p['status']['phase'] == 'Running' ]
Expand Down Expand Up @@ -82,6 +84,9 @@ def collect_node_status_metrics(v1, namespace, nodes_synced_near_best_tip, nodes
other_connection_errors.set(err_others)
nodes_synced.set(synced_fraction)

end = time.time()
print("Updating Coda_watchdog_nodes_synced took {} seconds".format(end-start))

# -------------------------------------------------

# TODO: prover_erros
Expand Down Expand Up @@ -145,6 +150,9 @@ def get_deepest_child(p):

print("Number of peers with 'Synced' status: {}\nPeers not synced near the best tip: {}".format(sum(all_synced_peers), peers_out_of_sync))

end2 = time.time()
print("Updating Coda_watchdog_nodes_synced_near_best_tip took {} seconds".format(end2-end))

nodes_synced_near_best_tip.set(synced_near_best_tip_fraction)

# ========================================================================
Expand All @@ -155,6 +163,8 @@ def collect_node_status(v1, namespace, seeds, pods, seed_nodes_responded, seed_n
all_resps = []
peer_set = set()

start = time.time()

def contains_error(resp):
try:
resp['error']
Expand Down Expand Up @@ -203,6 +213,8 @@ def add_resp(raw, peers, seed, seed_node_responded, seed_node_queried):


valid_resps = peer_table.values()
end = time.time()
print("Node status collection took {} seconds".format(end-start))

return (len(peer_set), valid_resps, error_resps)

Expand Down
2 changes: 1 addition & 1 deletion automation/services/watchdog/version.txt
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.4.11
0.4.12
1 change: 1 addition & 0 deletions buildkite/src/Jobs/Release/ArchiveNodeArtifact.dhall
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Pipeline.build
S.strictlyStart (S.contains "src"),
S.strictlyStart (S.contains "scripts/archive"),
S.strictlyStart (S.contains "automation"),
S.strictlyStart (S.contains "dockerfiles"),
S.strictlyStart (S.contains "buildkite/src/Jobs/Release/ArchiveNodeArtifact")
],
path = "Release",
Expand Down
2 changes: 1 addition & 1 deletion dockerfiles/Dockerfile-mina-archive
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ RUN echo "Building image with version $deb_version"
COPY scripts/archive-entrypoint.sh /entrypoint.sh
RUN chmod +x /entrypoint.sh

COPY --chown=${UID} scripts/puppeteer/* /
COPY --chown=${UID} puppeteer-context/* /
RUN chmod +x /mina_daemon_puppeteer.py /find_puppeteer.sh /start.sh /stop.sh

# Workaround terrible postgresql package requirements with man
Expand Down
2 changes: 1 addition & 1 deletion dockerfiles/Dockerfile-mina-daemon
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ COPY --chown=${UID} scripts/daemon-entrypoint.sh /entrypoint.sh
# Solve this by marking scripts executable in git
COPY --chown=${UID} ./auxiliary_entrypoints /entrypoint.d

COPY --chown=${UID} scripts/puppeteer/* /
COPY --chown=${UID} puppeteer-context/* /
RUN chmod +x /mina_daemon_puppeteer.py /find_puppeteer.sh /start.sh /stop.sh

ENV CODA_TIME_OFFSET 0
Expand Down
36 changes: 29 additions & 7 deletions dockerfiles/puppeteer-context/mina_daemon_puppeteer.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
#!/usr/bin/env python3

# This is a temporary hack for the integration test framework to be able to stop
# and start nodes dyamically in a kubernetes environment. This script takes
# mina arguments and will start and monitor a mina process with those arguments.
Expand Down Expand Up @@ -35,16 +37,17 @@ def do_GET(s):
s.end_headers()
s.wfile.write(b'<html><body>The daemon is currently offline.<br/><i>This broadcast was brought to you by the puppeteer mock server</i></body></html>')

# just nooping on this signal suffices, since merely trapping it will cause
# `signal.pause()` to resume
def handle_child_termination(signum, frame):
pass
print("puppeteer script: SIGCHLD received " )
os.waitpid(-1, os.WNOHANG)

def handle_start_request(signum, frame):
print("puppeteer script: SIGUSR1 handle_start_request received, setting active_daemon_request to True" )
global active_daemon_request
active_daemon_request = True

def handle_stop_request(signum, frame):
print("puppeteer script: SIGUSR2 handle_stop_request received, setting inactive_daemon_request to True" )
global inactive_daemon_request
inactive_daemon_request = True

Expand All @@ -68,55 +71,73 @@ def wait_for_pid(pid):
time.sleep(0.25)

def start_daemon():
print("puppeteer script: start_daemon called" )
global mina_process
with open('mina.log', 'a') as f:
mina_process = subprocess.Popen(
['mina'] + daemon_args,
stdout=f,
stderr=subprocess.STDOUT
)
print("puppeteer script: touching /root/daemon-active" )
Path('daemon-active').touch()

def stop_daemon():
print("puppeteer script: stop_daemon called" )
global mina_process
mina_process.send_signal(signal.SIGTERM)

child_pids = get_child_processes(mina_process.pid)
print("stop_daemon, child_pids: " )
print(*child_pids)
mina_process.wait()
for child_pid in child_pids:
print("waiting for child_pid: " + str(child_pid) )
wait_for_pid(child_pid)
print("done waiting for: " + str(child_pid) )
print("puppeteer script: removing /root/daemon-active" )
Path('daemon-active').unlink()
mina_process = None

# technically, doing the loops like this will eventually result in a stack overflow
# however, you would need to do a lot of starts and stops to hit this condition

def inactive_loop():
print("puppeteer script: inactive_loop beginning" )
global active_daemon_request

server = None
try:
server = HTTPServer(('0.0.0.0', 3085), MockRequestHandler)
while True:
server.handle_request()
signal.sigtimedwait(ALL_SIGNALS, 0)
if active_daemon_request:
print("inactive_loop: active_daemon_request received, starting daemon" )
start_daemon()
active_daemon_request = False
break
except Exception as err:
print("puppeteer script: inactive_loop experienced an error: ")
print(err)
finally:
server.shutdown()

if server != None:
server.server_close()
print("puppeteer script: mock server closed. inactive_loop terminating" )

active_loop()

def active_loop():
print("puppeteer script: active_loop beginning" )
global mina_process, inactive_daemon_request

while True:
signal.pause()
status = mina_process.poll()
if status != None:
print("active_loop: status not None, cleaning up and exiting")
cleanup_and_exit(status)
elif inactive_daemon_request:
print("active_loop: inactive daemon request detected, stopping daemon")
stop_daemon()
inactive_daemon_request = False
break
Expand All @@ -130,6 +151,7 @@ def cleanup_and_exit(status):
sys.exit(status)

if __name__ == '__main__':
print("puppeteer script: starting...")
signal.signal(signal.SIGCHLD, handle_child_termination)
signal.signal(signal.SIGUSR1, handle_stop_request)
signal.signal(signal.SIGUSR2, handle_start_request)
Expand All @@ -145,4 +167,4 @@ def cleanup_and_exit(status):
['tail', '-q', '-f', 'mina.log', '-f', '.mina-config/mina-prover.log', '-f', '.mina-config/mina-verifier.log', '-f' , '.mina-config/mina-best-tip.log']
)

inactive_loop()
inactive_loop()
4 changes: 4 additions & 0 deletions dockerfiles/puppeteer-context/start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
#!/bin/bash

kill -s SIGUSR2 $(/find_puppeteer.sh)
while [ ! -f /root/daemon-active ]; do sleep 1; done
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash

kill -s SIGUSR1 $(/find_puppeteer.sh)
while [ -f daemon-active ]; do sleep 1; done
while [ -f /root/daemon-active ]; do sleep 1; done
8 changes: 4 additions & 4 deletions dockerfiles/scripts/cron_job_dump_ledger.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,23 +38,23 @@ done
mina ledger export next-epoch-ledger > next_epoch_ledger.json
echo "next epoch ledger dumped!"

# DATE="$(date +%F_%H%M)"
DATE="$(date +%F_%H%M)"
#extract the epoch number out of mina client status. if the output format of mina client status changes, then this is gonna break
EPOCHNUM="$(mina client status | grep "Best tip consensus time" | grep -o "epoch=[0-9]*" | sed "s/[^0-9]*//g" )"

# rename the file in the required file name format
STAKING_HASH="$(mina ledger hash --ledger-file staking_epoch_ledger.json)"
STAKING_MD5="$(md5sum staking_epoch_ledger.json | cut -d " " -f 1 )"
LEDGER_FILENAME=staking-"$EPOCHNUM"-"$STAKING_HASH"-"$STAKING_MD5".json
LEDGER_FILENAME=staking-"$EPOCHNUM"-"$STAKING_HASH"-"$STAKING_MD5"-"$DATE".json
mv ./staking_epoch_ledger.json ./$LEDGER_FILENAME

NEXT_STAKING_HASH="$(mina ledger hash --ledger-file next_epoch_ledger.json)"
NEXT_STAKING_MD5="$(md5sum next_epoch_ledger.json | cut -d " " -f 1 )"
NEXT_FILENAME=next-staking-"$EPOCHNUM"-"$NEXT_STAKING_HASH"-"$NEXT_STAKING_MD5".json
NEXT_FILENAME=next-staking-"$EPOCHNUM"-"$NEXT_STAKING_HASH"-"$NEXT_STAKING_MD5"-"$DATE".json
mv ./next_epoch_ledger.json ./$NEXT_FILENAME

EXPORTED_LOGS="local-logs"
LOGS_FILENAME="daemon-logs-epoch-$EPOCHNUM.tgz"
LOGS_FILENAME="daemon-logs-epoch-$EPOCHNUM-"$DATE".tgz"
mina client export-local-logs --tarfile $EXPORTED_LOGS
mv /root/.mina-config/exported_logs/$EXPORTED_LOGS.tar.gz $LOGS_FILENAME

Expand Down
Loading

0 comments on commit b137fbd

Please sign in to comment.