Skip to content

Commit 2340926

Browse files
committed
Fix: Restarting the NDP Proxy sometimes crashed
This is likely due because it was restarted too quickly. Solution: Wrap the error in a custom class and retry after 5 seconds. This is still a work in progress as a better approach should limit the frequency of restarts instead, for example using an event or queue.
1 parent c7a2bf9 commit 2340926

File tree

1 file changed

+27
-2
lines changed

1 file changed

+27
-2
lines changed

src/aleph/vm/network/ndp_proxy.py

+27-2
Original file line numberDiff line numberDiff line change
@@ -10,16 +10,24 @@
1010
and restart the service.
1111
"""
1212

13+
import asyncio
1314
import logging
1415
from dataclasses import dataclass
1516
from ipaddress import IPv6Network
1617
from pathlib import Path
18+
from subprocess import CalledProcessError
1719

1820
from aleph.vm.utils import run_in_subprocess
1921

2022
logger = logging.getLogger(__name__)
2123

2224

25+
class NdpProxyTerminatedError(Exception):
26+
"""Raised when restarting the NDP Proxy fails due to a SIGTERM signal."""
27+
28+
pass
29+
30+
2331
@dataclass
2432
class NdpRule:
2533
address_range: IPv6Network
@@ -33,15 +41,32 @@ def __init__(self, host_network_interface: str):
3341
@staticmethod
3442
async def _restart_ndppd():
3543
logger.debug("Restarting ndppd")
36-
await run_in_subprocess(["systemctl", "restart", "ndppd"])
44+
try:
45+
await run_in_subprocess(["systemctl", "restart", "ndppd"])
46+
except CalledProcessError as error:
47+
if "died with <Signals.SIGTERM: 15>." in str(error):
48+
raise NdpProxyTerminatedError("ndppd was terminated by a SIGTERM signal") from error
49+
else:
50+
raise
3751

3852
async def _update_ndppd_conf(self):
3953
config = f"proxy {self.host_network_interface} {{\n"
4054
for interface, address_range in self.interface_address_range_mapping.items():
4155
config += f" rule {address_range} {{\n iface {interface}\n }}\n"
4256
config += "}\n"
4357
Path("/etc/ndppd.conf").write_text(config)
44-
await self._restart_ndppd()
58+
for attempt in range(3):
59+
try:
60+
await self._restart_ndppd()
61+
break
62+
except NdpProxyTerminatedError:
63+
if attempt >= 2:
64+
raise
65+
logger.warning(
66+
"ndppd was terminated by a SIGTERM signal while restarting. Waiting 5 seconds and retrying."
67+
)
68+
await asyncio.sleep(5)
69+
continue
4570

4671
async def add_range(self, interface: str, address_range: IPv6Network):
4772
logger.debug("Proxying range %s -> %s", address_range, interface)

0 commit comments

Comments
 (0)