WIP: WTF

ctrueden · ctrueden · commit 9c078a3bf7dc · 2024-07-23T20:00:02.000-05:00
diff --git a/src/appose/types.py b/src/appose/types.py
@@ -30,6 +30,7 @@
 import json
 import re
 from math import ceil, prod
+from multiprocessing import resource_tracker
 from multiprocessing.shared_memory import SharedMemory
 from typing import Any, Dict, Sequence, Union
 
@@ -110,11 +111,35 @@ def default(self, obj):
             }
         return super().default(obj)
 
-
+foo = True
 def _appose_object_hook(obj: Dict):
     atype = obj.get("appose_type")
     if atype == "shm":
-        return SharedMemory(name=(obj["name"]), size=(obj["size"]))
+        # Attach to existing shared memory block.
+        shm = SharedMemory(name=(obj["name"]), size=(obj["size"]))
+
+        # HACK: Work around the Python resource trackers's vigorous effort to
+        # garbage collect all shared memory blocks after all known references
+        # are done using them. Unfortunately, due to how Appose invokes Python
+        # worker processes, the resource tracker does not know about the
+        # reference from the service process, and overeagerly eats the memory
+        # when the worker shuts down. To avoid this issue, we tell the worker
+        # process's associated resource tracker to ignore this particular
+        # shared memory block, instead trusting the process that actually
+        # created it to clean up when finished.
+        #
+        # This logic could go wrong if the worker process creates a
+        # SharedMemory, returns it to the service process as an output, and
+        # then the service process subsequently passes it back to the worker as
+        # an input argument: such a sequence of events would lead to the named
+        # shared memory in question being unregistered with the resource
+        # tracker here, even though it in fact *was* this worker process that
+        # created the shared memory block earlier... but I don't have a clear
+        # idea for how to avoid this difficulty at the moment.
+        name = "/" + shm.name
+        resource_tracker.unregister(name, "shared_memory")
+
+        return shm
     elif atype == "ndarray":
         return NDArray(obj["dtype"], obj["shape"], obj["shm"])
     else:
diff --git a/tests/test_shm.py b/tests/test_shm.py
@@ -31,12 +31,29 @@
 from appose.service import TaskStatus
 
 ndarray_inspect = """
+#from multiprocessing import resource_tracker
+#resource_tracker.unregister(f"/{data.shm.name}", "shared_memory")
 task.outputs["size"] = data.shm.size
 task.outputs["dtype"] = data.dtype
 task.outputs["shape"] = data.shape
 task.outputs["sum"] = sum(v for v in data.shm.buf)
 """
 
+# NB: The unregister line above works around the Python resource tracker's
+# vigorous effort to clean up shared memory blocks after all known references
+# are done using them. Unfortunately, due to how Appose invokes Python worker
+# processes, the resource tracker does not know about the reference from the
+# service process, and overeagerly eats the memory when the worker shuts down.
+# To avoid this issue, we tell the worker process's associated resource tracker
+# to ignore this particular shared memory block, instead trusting the process
+# that actually created it to clean up when finished.
+#
+# This logic could go wrong if the worker process creates a SharedMemory,
+# returns it to the service process as an output, and then the service process
+# subsequently passes it back to the worker as an input argument: such a
+# sequence of events would lead to the named shared memory in question being
+# unregistered with the resource tracker in the worker process, even though it
+# in fact *was* that worker process that created the shared memory block...
 
 def test_ndarray():
     env = appose.system()