Skip to content

Commit a4d16c2

Browse files
committed
Chunked search reindexing
1 parent 37b1ba6 commit a4d16c2

File tree

2 files changed

+41
-12
lines changed

2 files changed

+41
-12
lines changed

search/management/commands/rebuild_search_index.py

+22-12
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
from posts.models.post import Post
77
from search.models import SearchIndex
88
from users.models.user import User
9+
from utils.queryset import chunked_queryset
910

1011
log = logging.getLogger(__name__)
1112

@@ -16,17 +17,26 @@ class Command(BaseCommand):
1617
def handle(self, *args, **options):
1718
SearchIndex.objects.all().delete()
1819

19-
for comment in Comment.visible_objects().filter(is_deleted=False, post__is_visible=True):
20-
self.stdout.write(f"Indexing comment: {comment.id}")
21-
SearchIndex.update_comment_index(comment)
22-
23-
for post in Post.visible_objects().filter(is_shadow_banned=False):
24-
self.stdout.write(f"Indexing post: {post.slug}")
25-
SearchIndex.update_post_index(post)
26-
27-
for user in User.objects.filter(moderation_status=User.MODERATION_STATUS_APPROVED):
28-
self.stdout.write(f"Indexing user: {user.slug}")
29-
SearchIndex.update_user_index(user)
30-
SearchIndex.update_user_tags(user)
20+
for chunk in chunked_queryset(
21+
Comment.visible_objects().filter(is_deleted=False, post__is_visible=True).order_by("-created_at")
22+
):
23+
for comment in chunk:
24+
self.stdout.write(f"Indexing comment: {comment.id}")
25+
SearchIndex.update_comment_index(comment)
26+
27+
for chunk in chunked_queryset(
28+
Post.visible_objects().filter(is_shadow_banned=False).order_by("-created_at")
29+
):
30+
for post in chunk:
31+
self.stdout.write(f"Indexing post: {post.slug}")
32+
SearchIndex.update_post_index(post)
33+
34+
for chunk in chunked_queryset(
35+
User.objects.filter(moderation_status=User.MODERATION_STATUS_APPROVED).order_by("-created_at")
36+
):
37+
for user in chunk:
38+
self.stdout.write(f"Indexing user: {user.slug}")
39+
SearchIndex.update_user_index(user)
40+
SearchIndex.update_user_tags(user)
3141

3242
self.stdout.write("Done 🥙")

utils/queryset.py

+19
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
def chunked_queryset(queryset, chunk_size=1000):
2+
start_pk = 0
3+
queryset = queryset.order_by("pk")
4+
5+
while True:
6+
# no entries left
7+
if not queryset.filter(pk__gt=start_pk).exists():
8+
break
9+
10+
try:
11+
# fetch chunk_size entries
12+
end_pk = queryset.filter(pk__gt=start_pk).values_list("pk", flat=True)[chunk_size - 1]
13+
except IndexError:
14+
# fetch rest entries if less than chunk_size left
15+
end_pk = queryset.values_list("pk", flat=True).last()
16+
17+
yield queryset.filter(pk__gt=start_pk).filter(pk__lte=end_pk)
18+
19+
start_pk = end_pk

0 commit comments

Comments
 (0)