Add an option to limit the size of query results for efficiency

samhaswon · May 16, 2024 · 1966c2a · 1966c2a
1 parent f574aae
commit 1966c2a
Show file tree

Hide file tree

Showing 5 changed files with 15 additions and 8 deletions.
diff --git a/README.md b/README.md
@@ -31,6 +31,8 @@ The results of each query are ranked by the number of keyword occurrences.
 
 If a query is made with no matches, say "notawordinthebible," the result of the query will be a list of length 0. 
 
+The maximum number of results may be specified with the optional `max_results` parameter.
+
 ### Preloading an Index
 
 Versions are automatically loaded as needed, but you may wish to preload a version for the sake of speed. 

diff --git a/makefile b/makefile
@@ -1,8 +1,8 @@
 build:
 	py -m build
 
-install: dist/multi_bible_search-2.0.0.tar.gz
-	pip install --force-reinstall ./dist/multi_bible_search-2.0.0.tar.gz
+install: dist/multi_bible_search-2.0.1.tar.gz
+	pip install --force-reinstall ./dist/multi_bible_search-2.0.1.tar.gz
 	copy venv\\Lib\\site-packages\\multi_bible_search\\*.pyd src\\multi_bible_search\\
 
 full: build install
diff --git a/pyproject.toml b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "multi_bible_search"
-version = "2.0.0"
+version = "2.0.1"
 authors = [
   { name="Samuel Howard" },
 ]

diff --git a/src/multi_bible_search/bible_search_adapter.py b/src/multi_bible_search/bible_search_adapter.py
@@ -1,6 +1,7 @@
 import bz2
 import json
 import os
+import sys
 from typing import List, Union
 from .multi_bible_search import BibleSearch as cBibleSearch
 
@@ -78,17 +79,18 @@ def unload_version(self, version: str) -> None:
         else:
             raise Exception(f"Invalid version {version}")
 
-    def search(self, query: str, version="KJV") -> List[str]:
+    def search(self, query: str, version: str = "KJV", max_results: int = sys.maxsize) -> List[str]:
         """
         Search for a passage in the Bible.
         :param query: The search query string.
         :param version: The version to search.
+        :param max_results: The maximum number of results to retrieve.
         :return: List of match references (e.g., `["John 11:35", "Matthew 1:7", ...]`).
         """
         # Load the version if it is not already loaded
         if version not in self.__loaded:
             self.load(version)
-        return self.__c_search.search(query, version)
+        return self.__c_search.search(query, version, max_results)
 
     def internal_index_size(self) -> int:
         """

diff --git a/src/multi_bible_search/multi_bible_search.c b/src/multi_bible_search/multi_bible_search.c
@@ -1,3 +1,4 @@
+#include <limits.h>
 #include <Python.h>
 #include <stdint.h>
 #include <string.h>
@@ -264,7 +265,7 @@ static PyObject* rtranslate(long reference) {
 // Tokenizes a given string based on spaces
 char **tokenize(const char *input_string, int *num_tokens, int *len_tokens) {
     // Allocate memory for token array
-    char **tokens = calloc(strlen(input_string), sizeof(char *));
+    char **tokens = calloc(strlen(input_string) + 1, sizeof(char *));
     if (tokens == NULL) {
         // Handle memory allocation failure
         return NULL;
@@ -596,8 +597,10 @@ PyObject *SearchObject_search(SearchObject *self, PyObject *args) {
     char *query1,     // The query string
          *version,    // The version to query
          **tokens;    // The tokenized form of the query
+    // Maximum number of results to return to Python
+    Py_ssize_t max_results = PY_SSIZE_T_MAX;
 
-    if (!PyArg_ParseTuple(args, "ss", &query1, &version)) {
+    if (!PyArg_ParseTuple(args, "ss|n", &query1, &version, &max_results)) {
         PyObject *exception_type = PyExc_RuntimeError;
         PyObject *exception_value = PyUnicode_FromString("Bad search arguments!\n");
         PyObject *exception_traceback = NULL;
@@ -716,7 +719,7 @@ PyObject *SearchObject_search(SearchObject *self, PyObject *args) {
     // Rank the results, storing the length of the deduplicated portion of the array
     result_count = rank(token_result_list, token_result_list_len, num_tokens);
 
-    for (size_t i = 0; i < result_count && i < token_result_list_len; i++) {
+    for (size_t i = 0; i < result_count && i < token_result_list_len && i < max_results; i++) {
         // Translate the reference and add it to the Python list
         str_ref = rtranslate(token_result_list[i]);
         // Make sure the result isn't None. Basically another double check of the Python side of things.