Skip to content

Commit

Permalink
Add an option to limit the size of query results for efficiency
Browse files Browse the repository at this point in the history
  • Loading branch information
samhaswon committed May 16, 2024
1 parent f574aae commit 1966c2a
Show file tree
Hide file tree
Showing 5 changed files with 15 additions and 8 deletions.
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ The results of each query are ranked by the number of keyword occurrences.

If a query is made with no matches, say "notawordinthebible," the result of the query will be a list of length 0.

The maximum number of results may be specified with the optional `max_results` parameter.

### Preloading an Index

Versions are automatically loaded as needed, but you may wish to preload a version for the sake of speed.
Expand Down
4 changes: 2 additions & 2 deletions makefile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
build:
py -m build

install: dist/multi_bible_search-2.0.0.tar.gz
pip install --force-reinstall ./dist/multi_bible_search-2.0.0.tar.gz
install: dist/multi_bible_search-2.0.1.tar.gz
pip install --force-reinstall ./dist/multi_bible_search-2.0.1.tar.gz
copy venv\\Lib\\site-packages\\multi_bible_search\\*.pyd src\\multi_bible_search\\

full: build install
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "multi_bible_search"
version = "2.0.0"
version = "2.0.1"
authors = [
{ name="Samuel Howard" },
]
Expand Down
6 changes: 4 additions & 2 deletions src/multi_bible_search/bible_search_adapter.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import bz2
import json
import os
import sys
from typing import List, Union
from .multi_bible_search import BibleSearch as cBibleSearch

Expand Down Expand Up @@ -78,17 +79,18 @@ def unload_version(self, version: str) -> None:
else:
raise Exception(f"Invalid version {version}")

def search(self, query: str, version="KJV") -> List[str]:
def search(self, query: str, version: str = "KJV", max_results: int = sys.maxsize) -> List[str]:
"""
Search for a passage in the Bible.
:param query: The search query string.
:param version: The version to search.
:param max_results: The maximum number of results to retrieve.
:return: List of match references (e.g., `["John 11:35", "Matthew 1:7", ...]`).
"""
# Load the version if it is not already loaded
if version not in self.__loaded:
self.load(version)
return self.__c_search.search(query, version)
return self.__c_search.search(query, version, max_results)

def internal_index_size(self) -> int:
"""
Expand Down
9 changes: 6 additions & 3 deletions src/multi_bible_search/multi_bible_search.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
#include <limits.h>
#include <Python.h>
#include <stdint.h>
#include <string.h>
Expand Down Expand Up @@ -264,7 +265,7 @@ static PyObject* rtranslate(long reference) {
// Tokenizes a given string based on spaces
char **tokenize(const char *input_string, int *num_tokens, int *len_tokens) {
// Allocate memory for token array
char **tokens = calloc(strlen(input_string), sizeof(char *));
char **tokens = calloc(strlen(input_string) + 1, sizeof(char *));
if (tokens == NULL) {
// Handle memory allocation failure
return NULL;
Expand Down Expand Up @@ -596,8 +597,10 @@ PyObject *SearchObject_search(SearchObject *self, PyObject *args) {
char *query1, // The query string
*version, // The version to query
**tokens; // The tokenized form of the query
// Maximum number of results to return to Python
Py_ssize_t max_results = PY_SSIZE_T_MAX;

if (!PyArg_ParseTuple(args, "ss", &query1, &version)) {
if (!PyArg_ParseTuple(args, "ss|n", &query1, &version, &max_results)) {
PyObject *exception_type = PyExc_RuntimeError;
PyObject *exception_value = PyUnicode_FromString("Bad search arguments!\n");
PyObject *exception_traceback = NULL;
Expand Down Expand Up @@ -716,7 +719,7 @@ PyObject *SearchObject_search(SearchObject *self, PyObject *args) {
// Rank the results, storing the length of the deduplicated portion of the array
result_count = rank(token_result_list, token_result_list_len, num_tokens);

for (size_t i = 0; i < result_count && i < token_result_list_len; i++) {
for (size_t i = 0; i < result_count && i < token_result_list_len && i < max_results; i++) {
// Translate the reference and add it to the Python list
str_ref = rtranslate(token_result_list[i]);
// Make sure the result isn't None. Basically another double check of the Python side of things.
Expand Down

0 comments on commit 1966c2a

Please sign in to comment.