diff --git a/back/README.md b/back/README.md index a403d8f73..d75086be1 100755 --- a/back/README.md +++ b/back/README.md @@ -134,6 +134,7 @@ Mind the following perks of peewee: 1. When creating a model instance referencing another model via a foreign key, use the ID of the FK model instance instead of a model instance, e.g. `Location(base=1)`. 1. If you want to retrieve only the ID of a foreign key field, access it with the "magic" suffix `_id`, e.g. `location.base_id`. This avoids overhead of an additional select query issued by peewee when using `location.base.id`. +1. peewee will cache the results of a select operation, leading to large memory consumption for large result sets. This can be avoided by attaching `.iterator()` to the select call. See [docs for more info](http://docs.peewee-orm.com/en/latest/peewee/querying.html#iterating-over-large-result-sets) and also this [insightful question](https://stackoverflow.com/questions/77564291/debug-peewee-cache-hits/77571546#77571546) 1. You can activate peewee's logging to gain insight into the generated SQL queries: ```python from .utils import activate_logging diff --git a/back/boxtribute_server/graph_ql/loaders.py b/back/boxtribute_server/graph_ql/loaders.py index bcc406fe2..467ebfb5f 100644 --- a/back/boxtribute_server/graph_ql/loaders.py +++ b/back/boxtribute_server/graph_ql/loaders.py @@ -54,7 +54,9 @@ async def batch_load_fn(self, ids): permission = f"{resource}:read" authorize(permission=permission) - rows = {r.id: r for r in self.model.select().where(self.model.id << ids)} + rows = { + r.id: r for r in self.model.select().where(self.model.id << ids).iterator() + } return [rows.get(i) for i in ids] @@ -112,10 +114,12 @@ class ShipmentLoader(DataLoader): async def batch_load_fn(self, keys): shipments = { s.id: s - for s in Shipment.select().orwhere( + for s in Shipment.select() + .orwhere( authorized_bases_filter(Shipment, base_fk_field_name="source_base_id"), authorized_bases_filter(Shipment, base_fk_field_name="target_base_id"), ) + .iterator() } return [shipments.get(i) for i in keys] @@ -125,11 +129,12 @@ async def batch_load_fn(self, agreement_ids): # Select all shipments with given agreement IDs that the user is authorized for, # and group them by agreement ID shipments = defaultdict(list) - for shipment in Shipment.select().where( + result = Shipment.select().where( Shipment.transfer_agreement << agreement_ids, authorized_bases_filter(Shipment, base_fk_field_name="source_base") | authorized_bases_filter(Shipment, base_fk_field_name="target_base"), - ): + ) + for shipment in result.iterator(): shipments[shipment.transfer_agreement_id].append(shipment) # Return empty list if agreement has no shipments attached return [shipments.get(i, []) for i in agreement_ids] @@ -139,7 +144,7 @@ class TagsForBoxLoader(DataLoader): async def batch_load_fn(self, keys): tags = defaultdict(list) # maybe need different join type - for relation in TagsRelation.select( + result = TagsRelation.select( TagsRelation.object_type, TagsRelation.object_id, Tag ).join( Tag, @@ -149,7 +154,8 @@ async def batch_load_fn(self, keys): & (TagsRelation.object_id << keys) & (authorized_bases_filter(Tag)) ), - ): + ) + for relation in result.iterator(): tags[relation.object_id].append(relation.tag) # Keys are in fact box IDs. Return empty list if box has no tags assigned @@ -330,6 +336,7 @@ async def batch_load_fn(self, box_ids): .where(History.table_name == "stock", History.record_id << box_ids) .group_by(History.record_id) .dicts() + .iterator() ) # Construct mapping of box IDs and their history information @@ -360,6 +367,7 @@ async def batch_load_fn(self, shipment_ids): ShipmentDetail.select(ShipmentDetail, Shipment) .join(Shipment) .where(ShipmentDetail.shipment << shipment_ids) + .iterator() ): details[detail.shipment_id].append(detail) # Return empty list if shipment has no details attached @@ -370,12 +378,14 @@ class ShipmentDetailForBoxLoader(DataLoader): async def batch_load_fn(self, keys): details = { detail.box_id: detail - for detail in ShipmentDetail.select().where( + for detail in ShipmentDetail.select() + .where( ShipmentDetail.box << keys, ShipmentDetail.removed_on.is_null(), ShipmentDetail.lost_on.is_null(), ShipmentDetail.received_on.is_null(), ) + .iterator() } # Keys are in fact box IDs. Return None if box has no shipment detail associated return [details.get(i) for i in keys] @@ -386,7 +396,7 @@ async def batch_load_fn(self, keys): authorize(permission="size:read") # Mapping of size range ID to list of sizes sizes = defaultdict(list) - for size in Size.select(): + for size in Size.select().iterator(): sizes[size.size_range_id].append(size) # Keys are in fact size range IDs. Return empty list if size range has no sizes return [sizes.get(i, []) for i in keys] @@ -404,6 +414,6 @@ async def batch_load_fn(self, standard_product_ids): ), ) standard_products = defaultdict(list) - for row in result: + for row in result.iterator(): standard_products[row.standard_product_id].append(row.base) return [standard_products.get(i, []) for i in standard_product_ids]