boxwise · pylipp · Aug 29, 2024 · Aug 29, 2024
diff --git a/back/README.md b/back/README.md
@@ -134,6 +134,7 @@ Mind the following perks of peewee:
 
 1. When creating a model instance referencing another model via a foreign key, use the ID of the FK model instance instead of a model instance, e.g. `Location(base=1)`.
 1. If you want to retrieve only the ID of a foreign key field, access it with the "magic" suffix `_id`, e.g. `location.base_id`. This avoids overhead of an additional select query issued by peewee when using `location.base.id`.
+1. peewee will cache the results of a select operation, leading to large memory consumption for large result sets. This can be avoided by attaching `.iterator()` to the select call. See [docs for more info](http://docs.peewee-orm.com/en/latest/peewee/querying.html#iterating-over-large-result-sets) and also this [insightful question](https://stackoverflow.com/questions/77564291/debug-peewee-cache-hits/77571546#77571546)
 1. You can activate peewee's logging to gain insight into the generated SQL queries:
 ```python
 from .utils import activate_logging

diff --git a/back/boxtribute_server/graph_ql/loaders.py b/back/boxtribute_server/graph_ql/loaders.py
@@ -54,7 +54,9 @@ async def batch_load_fn(self, ids):
             permission = f"{resource}:read"
             authorize(permission=permission)
 
-        rows = {r.id: r for r in self.model.select().where(self.model.id << ids)}
+        rows = {
+            r.id: r for r in self.model.select().where(self.model.id << ids).iterator()
+        }
         return [rows.get(i) for i in ids]
 
 
@@ -112,10 +114,12 @@ class ShipmentLoader(DataLoader):
     async def batch_load_fn(self, keys):
         shipments = {
             s.id: s
-            for s in Shipment.select().orwhere(
+            for s in Shipment.select()
+            .orwhere(
                 authorized_bases_filter(Shipment, base_fk_field_name="source_base_id"),
                 authorized_bases_filter(Shipment, base_fk_field_name="target_base_id"),
             )
+            .iterator()
         }
         return [shipments.get(i) for i in keys]
 
@@ -125,11 +129,12 @@ async def batch_load_fn(self, agreement_ids):
         # Select all shipments with given agreement IDs that the user is authorized for,
         # and group them by agreement ID
         shipments = defaultdict(list)
-        for shipment in Shipment.select().where(
+        result = Shipment.select().where(
             Shipment.transfer_agreement << agreement_ids,
             authorized_bases_filter(Shipment, base_fk_field_name="source_base")
             | authorized_bases_filter(Shipment, base_fk_field_name="target_base"),
-        ):
+        )
+        for shipment in result.iterator():
             shipments[shipment.transfer_agreement_id].append(shipment)
         # Return empty list if agreement has no shipments attached
         return [shipments.get(i, []) for i in agreement_ids]
@@ -139,7 +144,7 @@ class TagsForBoxLoader(DataLoader):
     async def batch_load_fn(self, keys):
         tags = defaultdict(list)
         # maybe need different join type
-        for relation in TagsRelation.select(
+        result = TagsRelation.select(
             TagsRelation.object_type, TagsRelation.object_id, Tag
         ).join(
             Tag,
@@ -149,7 +154,8 @@ async def batch_load_fn(self, keys):
                 & (TagsRelation.object_id << keys)
                 & (authorized_bases_filter(Tag))
             ),
-        ):
+        )
+        for relation in result.iterator():
             tags[relation.object_id].append(relation.tag)
 
         # Keys are in fact box IDs. Return empty list if box has no tags assigned
@@ -330,6 +336,7 @@ async def batch_load_fn(self, box_ids):
             .where(History.table_name == "stock", History.record_id << box_ids)
             .group_by(History.record_id)
             .dicts()
+            .iterator()
         )
 
         # Construct mapping of box IDs and their history information
@@ -360,6 +367,7 @@ async def batch_load_fn(self, shipment_ids):
             ShipmentDetail.select(ShipmentDetail, Shipment)
             .join(Shipment)
             .where(ShipmentDetail.shipment << shipment_ids)
+            .iterator()
         ):
             details[detail.shipment_id].append(detail)
         # Return empty list if shipment has no details attached
@@ -370,12 +378,14 @@ class ShipmentDetailForBoxLoader(DataLoader):
     async def batch_load_fn(self, keys):
         details = {
             detail.box_id: detail
-            for detail in ShipmentDetail.select().where(
+            for detail in ShipmentDetail.select()
+            .where(
                 ShipmentDetail.box << keys,
                 ShipmentDetail.removed_on.is_null(),
                 ShipmentDetail.lost_on.is_null(),
                 ShipmentDetail.received_on.is_null(),
             )
+            .iterator()
         }
         # Keys are in fact box IDs. Return None if box has no shipment detail associated
         return [details.get(i) for i in keys]
@@ -386,7 +396,7 @@ async def batch_load_fn(self, keys):
         authorize(permission="size:read")
         # Mapping of size range ID to list of sizes
         sizes = defaultdict(list)
-        for size in Size.select():
+        for size in Size.select().iterator():
             sizes[size.size_range_id].append(size)
         # Keys are in fact size range IDs. Return empty list if size range has no sizes
         return [sizes.get(i, []) for i in keys]
@@ -404,6 +414,6 @@ async def batch_load_fn(self, standard_product_ids):
             ),
         )
         standard_products = defaultdict(list)
-        for row in result:
+        for row in result.iterator():
             standard_products[row.standard_product_id].append(row.base)
         return [standard_products.get(i, []) for i in standard_product_ids]