Skip to content

Commit d05f44a

Browse files
Introduce Clock.add_system_event_trigger(...) to include logcontext by default (#18945)
Introduce `Clock.add_system_event_trigger(...)` to wrap system event callback code in a logcontext, ensuring we can identify which server generated the logs. Background: > Ideally, nothing from the Synapse homeserver would be logged against the `sentinel` > logcontext as we want to know which server the logs came from. In practice, this is not > always the case yet especially outside of request handling. > > Global things outside of Synapse (e.g. Twisted reactor code) should run in the > `sentinel` logcontext. It's only when it calls into application code that a logcontext > gets activated. This means the reactor should be started in the `sentinel` logcontext, > and any time an awaitable yields control back to the reactor, it should reset the > logcontext to be the `sentinel` logcontext. This is important to avoid leaking the > current logcontext to the reactor (which would then get picked up and associated with > the next thing the reactor does). > > *-- `docs/log_contexts.md` Also adds a lint to prefer `Clock.add_system_event_trigger(...)` over `reactor.addSystemEventTrigger(...)` Part of #18905
1 parent 8d5d87f commit d05f44a

File tree

8 files changed

+107
-8
lines changed

8 files changed

+107
-8
lines changed

changelog.d/18945.misc

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Introduce `Clock.add_system_event_trigger(...)` to wrap system event callback code in a logcontext, ensuring we can identify which server generated the logs.

scripts-dev/mypy_synapse_plugin.py

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,12 @@
7474
category="synapse-reactor-clock",
7575
)
7676

77+
PREFER_SYNAPSE_CLOCK_ADD_SYSTEM_EVENT_TRIGGER = ErrorCode(
78+
"prefer-synapse-clock-add-system-event-trigger",
79+
"`synapse.util.Clock.add_system_event_trigger` should be used instead of `reactor.addSystemEventTrigger`",
80+
category="synapse-reactor-clock",
81+
)
82+
7783

7884
class Sentinel(enum.Enum):
7985
# defining a sentinel in this way allows mypy to correctly handle the
@@ -242,6 +248,13 @@ def get_method_signature_hook(
242248
):
243249
return check_call_when_running
244250

251+
if fullname in (
252+
"twisted.internet.interfaces.IReactorCore.addSystemEventTrigger",
253+
"synapse.types.ISynapseThreadlessReactor.addSystemEventTrigger",
254+
"synapse.types.ISynapseReactor.addSystemEventTrigger",
255+
):
256+
return check_add_system_event_trigger
257+
245258
return None
246259

247260

@@ -272,6 +285,33 @@ def check_call_when_running(ctx: MethodSigContext) -> CallableType:
272285
return signature
273286

274287

288+
def check_add_system_event_trigger(ctx: MethodSigContext) -> CallableType:
289+
"""
290+
Ensure that the `reactor.addSystemEventTrigger` callsites aren't used.
291+
292+
`synapse.util.Clock.add_system_event_trigger` should always be used instead of
293+
`reactor.addSystemEventTrigger`.
294+
295+
Since `reactor.addSystemEventTrigger` is a reactor callback, the callback will start out
296+
with the sentinel logcontext. `synapse.util.Clock` starts a default logcontext as we
297+
want to know which server the logs came from.
298+
299+
Args:
300+
ctx: The `FunctionSigContext` from mypy.
301+
"""
302+
signature: CallableType = ctx.default_signature
303+
ctx.api.fail(
304+
(
305+
"Expected all `reactor.addSystemEventTrigger` calls to use `synapse.util.Clock.add_system_event_trigger` instead. "
306+
"This is so all Synapse code runs with a logcontext as we want to know which server the logs came from."
307+
),
308+
ctx.context,
309+
code=PREFER_SYNAPSE_CLOCK_ADD_SYSTEM_EVENT_TRIGGER,
310+
)
311+
312+
return signature
313+
314+
275315
def analyze_prometheus_metric_classes(ctx: ClassDefContext) -> None:
276316
"""
277317
Cross-check the list of Prometheus metric classes against the

synapse/app/_base.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -518,7 +518,9 @@ async def start(hs: "HomeServer") -> None:
518518
# numbers of DNS requests don't starve out other users of the threadpool.
519519
resolver_threadpool = ThreadPool(name="gai_resolver")
520520
resolver_threadpool.start()
521-
reactor.addSystemEventTrigger("during", "shutdown", resolver_threadpool.stop)
521+
hs.get_clock().add_system_event_trigger(
522+
"during", "shutdown", resolver_threadpool.stop
523+
)
522524
reactor.installNameResolver(
523525
GAIResolver(reactor, getThreadPool=lambda: resolver_threadpool)
524526
)
@@ -605,7 +607,7 @@ def log_shutdown() -> None:
605607
logger.info("Shutting down...")
606608

607609
# Log when we start the shut down process.
608-
hs.get_reactor().addSystemEventTrigger("before", "shutdown", log_shutdown)
610+
hs.get_clock().add_system_event_trigger("before", "shutdown", log_shutdown)
609611

610612
setup_sentry(hs)
611613
setup_sdnotify(hs)
@@ -720,7 +722,7 @@ def setup_sdnotify(hs: "HomeServer") -> None:
720722
# we're not using systemd.
721723
sdnotify(b"READY=1\nMAINPID=%i" % (os.getpid(),))
722724

723-
hs.get_reactor().addSystemEventTrigger(
725+
hs.get_clock().add_system_event_trigger(
724726
"before", "shutdown", sdnotify, b"STOPPING=1"
725727
)
726728

synapse/handlers/presence.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -541,7 +541,7 @@ def __init__(self, hs: "HomeServer"):
541541
self.send_stop_syncing, UPDATE_SYNCING_USERS_MS
542542
)
543543

544-
hs.get_reactor().addSystemEventTrigger(
544+
hs.get_clock().add_system_event_trigger(
545545
"before",
546546
"shutdown",
547547
run_as_background_process,
@@ -842,7 +842,7 @@ def __init__(self, hs: "HomeServer"):
842842
# have not yet been persisted
843843
self.unpersisted_users_changes: Set[str] = set()
844844

845-
hs.get_reactor().addSystemEventTrigger(
845+
hs.get_clock().add_system_event_trigger(
846846
"before",
847847
"shutdown",
848848
run_as_background_process,

synapse/server.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1007,7 +1007,7 @@ def get_media_sender_thread_pool(self) -> ThreadPool:
10071007
)
10081008

10091009
media_threadpool.start()
1010-
self.get_reactor().addSystemEventTrigger(
1010+
self.get_clock().add_system_event_trigger(
10111011
"during", "shutdown", media_threadpool.stop
10121012
)
10131013

synapse/storage/databases/main/client_ips.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -455,7 +455,7 @@ def __init__(
455455
self._client_ip_looper = self._clock.looping_call(
456456
self._update_client_ips_batch, 5 * 1000
457457
)
458-
self.hs.get_reactor().addSystemEventTrigger(
458+
self.hs.get_clock().add_system_event_trigger(
459459
"before", "shutdown", self._update_client_ips_batch
460460
)
461461

synapse/storage/databases/main/lock.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,7 @@ def __init__(
9999
# lead to a race, as we may drop the lock while we are still processing.
100100
# However, a) it should be a small window, b) the lock is best effort
101101
# anyway and c) we want to really avoid leaking locks when we restart.
102-
hs.get_reactor().addSystemEventTrigger(
102+
hs.get_clock().add_system_event_trigger(
103103
"before",
104104
"shutdown",
105105
self._on_shutdown,

synapse/util/clock.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,3 +206,59 @@ def wrapped_callback(*args: Any, **kwargs: Any) -> None:
206206
# We can ignore the lint here since this class is the one location
207207
# callWhenRunning should be called.
208208
self._reactor.callWhenRunning(wrapped_callback, *args, **kwargs) # type: ignore[prefer-synapse-clock-call-when-running]
209+
210+
def add_system_event_trigger(
211+
self,
212+
phase: str,
213+
event_type: str,
214+
callback: Callable[P, object],
215+
*args: P.args,
216+
**kwargs: P.kwargs,
217+
) -> None:
218+
"""
219+
Add a function to be called when a system event occurs.
220+
221+
Equivalent to `reactor.addSystemEventTrigger` (see the that docstring for more
222+
details), but ensures that the callback is run in a logging context.
223+
224+
Args:
225+
phase: a time to call the event -- either the string 'before', 'after', or
226+
'during', describing when to call it relative to the event's execution.
227+
eventType: this is a string describing the type of event.
228+
callback: Function to call
229+
*args: Postional arguments to pass to function.
230+
**kwargs: Key arguments to pass to function.
231+
"""
232+
233+
def wrapped_callback(*args: Any, **kwargs: Any) -> None:
234+
assert context.current_context() is context.SENTINEL_CONTEXT, (
235+
"Expected `add_system_event_trigger` callback from the reactor to start with the sentinel logcontext "
236+
f"but saw {context.current_context()}. In other words, another task shouldn't have "
237+
"leaked their logcontext to us."
238+
)
239+
240+
# Because this is a callback from the reactor, we will be using the
241+
# `sentinel` log context at this point. We want the function to log with
242+
# some logcontext as we want to know which server the logs came from.
243+
#
244+
# We use `PreserveLoggingContext` to prevent our new `system_event`
245+
# logcontext from finishing as soon as we exit this function, in case `f`
246+
# returns an awaitable/deferred which would continue running and may try to
247+
# restore the `loop_call` context when it's done (because it's trying to
248+
# adhere to the Synapse logcontext rules.)
249+
#
250+
# This also ensures that we return to the `sentinel` context when we exit
251+
# this function and yield control back to the reactor to avoid leaking the
252+
# current logcontext to the reactor (which would then get picked up and
253+
# associated with the next thing the reactor does)
254+
with context.PreserveLoggingContext(context.LoggingContext("system_event")):
255+
# We use `run_in_background` to reset the logcontext after `f` (or the
256+
# awaitable returned by `f`) completes to avoid leaking the current
257+
# logcontext to the reactor
258+
context.run_in_background(callback, *args, **kwargs)
259+
260+
# We can ignore the lint here since this class is the one location
261+
# `addSystemEventTrigger` should be called.
262+
self._reactor.addSystemEventTrigger(
263+
phase, event_type, wrapped_callback, *args, **kwargs
264+
) # type: ignore[prefer-synapse-clock-add-system-event-trigger]

0 commit comments

Comments
 (0)