Skip to content

Commit

Permalink
Merge pull request #1579 from dchiller/optimize-suggested-chants
Browse files Browse the repository at this point in the history
Chant create: optimize suggested chants feature
  • Loading branch information
dchiller authored Aug 7, 2024
2 parents f0e5ce6 + f0c1e4d commit 3e8f70c
Show file tree
Hide file tree
Showing 12 changed files with 326 additions and 215 deletions.
6 changes: 3 additions & 3 deletions .github/workflows/django_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,6 @@ jobs:
envkey_AWS_EMAIL_HOST_PASSWORD: test_password
directory: config/envs
file_name: dev_env
- run: docker compose -f docker-compose-development.yml build
- run: docker compose -f docker-compose-development.yml up -d
- run: docker compose -f docker-compose-development.yml exec -T django python manage.py test main_app.tests
- run: docker compose -f docker-compose-test-runner.yml build
- run: docker compose -f docker-compose-test-runner.yml up -d
- run: docker compose -f docker-compose-test-runner.yml exec -T django python manage.py test main_app.tests
1 change: 1 addition & 0 deletions django/cantusdb_project/cantusdb/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,5 +208,6 @@

if DEBUG:
INSTALLED_APPS.append("debug_toolbar")
INSTALLED_APPS.append("django_extensions")
# debug toolbar must be inserted as early in the middleware as possible
MIDDLEWARE.insert(0, "debug_toolbar.middleware.DebugToolbarMiddleware")
173 changes: 81 additions & 92 deletions django/cantusdb_project/cantusindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
"""

import json
from typing import Optional, Union, Callable
from typing import Optional, Union, Callable, TypedDict, Any

import requests
from requests.exceptions import SSLError, Timeout, HTTPError
Expand All @@ -14,122 +14,106 @@
CANTUS_INDEX_DOMAIN: str = "https://cantusindex.uwaterloo.ca"
OLD_CANTUS_INDEX_DOMAIN: str = "https://cantusindex.org"
DEFAULT_TIMEOUT: float = 2 # seconds
NUMBER_OF_SUGGESTED_CHANTS: int = 3 # this number can't be too large,
# since for each suggested chant, we make a request to Cantus Index.
# We haven't yet parallelized this process, so setting this number
# too high will cause the Chant Create page to take a very long time
# to load. If/when we parallelize this process, we want to limit
# the size of the burst of requests sent to CantusIndex.
NUMBER_OF_SUGGESTED_CHANTS: int = 5 # default number of suggested chants to return
# with the get_suggested_chants function


class SuggestedChant(TypedDict):
"""
Dictionary containing information required for
the suggested chants feature on the Chant Create form.
"""

cantus_id: str
occurrences: int
fulltext: Optional[str]
genre_name: Optional[str]
genre_id: Optional[int]


def get_suggested_chants(
cantus_id: str, number_of_suggestions: int = NUMBER_OF_SUGGESTED_CHANTS
) -> Optional[list[dict]]:
) -> Optional[list[SuggestedChant]]:
"""
Given a Cantus ID, query Cantus Index's /nextchants API for a list of
Cantus IDs that follow the given Cantus ID in existing manuscripts.
Sort the list by the number of occurrences of each Cantus ID, and return
a list of dictionaries containing information about the suggested Cantus IDs
with the highest number of occurrences.
Args:
cantus_id (str): a Cantus ID
number_of_suggestions (int): the number of suggested Cantus IDs to return
Returns:
Optional[list[dict]]: A list of dictionaries, each containing information
about a suggested Cantus ID:
- "cantus_id": the suggested Cantus ID
- "occurrences": the number of times the suggested Cantus ID follows
the given Cantus ID in existing manuscripts
- "fulltext": the full text of the suggested Cantus ID
- "genre_name": the genre of the suggested Cantus ID
- "genre_id": the ID of the genre of the suggested Cantus ID
If no suggestions are available, returns None.
"""
endpoint_path: str = f"/json-nextchants/{cantus_id}"
all_suggestions: Union[list, dict, None] = get_json_from_ci_api(endpoint_path)
all_suggestions = get_json_from_ci_api(endpoint_path)

if not isinstance(all_suggestions, list):
# get_json_from_ci_api timed out
# or CI returned a response with no suggestions.
if all_suggestions is None:
return None

# when Cantus ID doesn't exist within CI, CI's api returns a 200 response with `['Cantus ID is not valid']`
# when Cantus ID doesn't exist within CI, CI's api returns a
# 200 response with `['Cantus ID is not valid']`
first_suggestion = all_suggestions[0]
if not isinstance(first_suggestion, dict):
return None

sort_by_occurrences: Callable[[dict], int] = lambda suggestion: int(
sort_by_occurrences: Callable[[dict[Any, Any]], int] = lambda suggestion: int(
suggestion["count"]
)
sorted_suggestions: list = sorted(
sorted_suggestions: list[dict[Any, Any]] = sorted(
all_suggestions, key=sort_by_occurrences, reverse=True
)
trimmed_suggestions: list = sorted_suggestions[:number_of_suggestions]
trimmed_suggestions = sorted_suggestions[:number_of_suggestions]

suggested_chants: list[Optional[dict]] = []
suggested_chants: list[SuggestedChant] = []
for suggestion in trimmed_suggestions:
cantus_id: str = suggestion["cid"]
occurrences: int = int(suggestion["count"])
suggested_chants.append(get_suggested_chant(cantus_id, occurrences))

# filter out Cantus IDs where get_suggested_chant timed out
filtered_suggestions: list[dict] = [
sugg for sugg in suggested_chants if sugg is not None
]

return filtered_suggestions


def get_suggested_chant(
cantus_id: str, occurrences: int, timeout: float = DEFAULT_TIMEOUT
) -> Optional[dict]:
"""Given a Cantus ID and a number of occurrences, query one of Cantus Index's
APIs for information on that Cantus ID and return a dictionary
containing a full text, an incipit, the ID of that Cantus ID's genre, and
the number of occurrences for that Cantus ID
(Number of occurrences: this function is used on the Chant Create page,
to suggest Cantus IDs of chants that might follow a chant with the Cantus ID
of the most recently created chant within the current source. Number of occurrences
is provided by Cantus Index's /nextchants API, based on which chants follow which
other chants in existing manuscripts)
Args:
cantus_id (str): a Cantus ID
occurrences (int): the number of times chants with this Cantus ID follow chants
with the Cantus ID of the most recently created chant.
Returns:
Optional[dict]: A dictionary with the following keys:
- "cantus_id"
- "occurrences"
- "fulltext"
- "incipit"
- "genre_id"
...but if get_json_from_ci_api timed out, returns None instead
"""
endpoint_path: str = f"/json-cid/{cantus_id}"
json: Union[dict, list, None] = get_json_from_ci_api(endpoint_path, timeout=timeout)

if not isinstance(json, dict):
# mostly, in case of a timeout within get_json_from_ci_api
return None
sugg_cantus_id = suggestion["cid"]
occurences = int(suggestion["count"])
suggestion_info = suggestion.get("info")
if suggestion_info:
fulltext = suggestion_info.get("field_full_text")
genre_name = suggestion_info.get("field_genre")
else:
fulltext = None
genre_name = None
try:
genre_id = Genre.objects.get(name=genre_name).id
except Genre.DoesNotExist:
genre_id = None
suggested_chants.append(
{
"cantus_id": sugg_cantus_id,
"occurrences": occurences,
"fulltext": fulltext,
"genre_name": genre_name,
"genre_id": genre_id,
}
)

try:
fulltext: str = json["info"]["field_full_text"]
incipit: str = " ".join(fulltext.split(" ")[:5])
genre_name: str = json["info"]["field_genre"]
except TypeError:
return None
genre_id: Optional[int] = None
try:
genre_id = Genre.objects.get(name=genre_name).id
except Genre.DoesNotExist:
pass

clean_cantus_id = cantus_id.replace(".", "d").replace(":", "c")
# "d"ot "c"olon
return {
"cantus_id": cantus_id,
"occurrences": occurrences,
"fulltext": fulltext,
"incipit": incipit,
"genre_name": genre_name,
"genre_id": genre_id,
"clean_cantus_id": clean_cantus_id,
}
return suggested_chants


def get_suggested_fulltext(cantus_id: str) -> Optional[str]:
endpoint_path: str = f"/json-cid/{cantus_id}"
json: Union[dict, list, None] = get_json_from_ci_api(endpoint_path)
json_response: Union[dict, list, None] = get_json_from_ci_api(endpoint_path)

if not isinstance(json, dict):
if not isinstance(json_response, dict):
# mostly, in case of a timeout within get_json_from_ci_api
return None

try:
suggested_fulltext = json["info"]["field_full_text"]
suggested_fulltext = json_response["info"]["field_full_text"]
except KeyError:
return None

Expand Down Expand Up @@ -207,7 +191,7 @@ def get_ci_text_search(search_term: str) -> Optional[list[Optional[dict]]]:

def get_json_from_ci_api(
path: str, timeout: float = DEFAULT_TIMEOUT
) -> Union[dict, list, None]:
) -> Union[dict[Any, Any], list[Any], None]:
"""Given a path, send a request to Cantus Index at that path,
decode the response to remove its Byte Order Marker, parse it,
and return it as a dictionary or list.
Expand All @@ -221,7 +205,7 @@ def get_json_from_ci_api(
Union[dict, list, None]:
If the JSON returned from Cantus Index is a JSON object, returns a dict.
If the JSON returned is a JSON array, returns a list.
In case the request times out, returns None.
If the request times out, or other types are returned, returns None.
"""

if not path.startswith("/"):
Expand All @@ -243,4 +227,9 @@ def get_json_from_ci_api(
# there are no suggested chants
return None

return response.json()
parsed_response = response.json()

if not isinstance(parsed_response, (dict, list)):
return None

return parsed_response
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ <h5><a id="source" href="{% url 'source-detail' source.id %}">{{ source.siglum }
"{{ suggestion.fulltext | escapejs }}"
)'
>
<strong>{{ suggestion.genre_name }}</strong> - <span title="{{ suggestion.fulltext }}">{{ suggestion.incipit }}</span> (<strong>{{ suggestion.occurrences }}x</strong>)<br>
<strong>{{ suggestion.genre_name }}</strong> - <span title="{{ suggestion.fulltext }}">{{ suggestion.fulltext | truncatechars_html:25 }}</span> (<strong>{{ suggestion.occurrences }}x</strong>)<br>
{% endfor %}
{% else %}
Sorry! No suggestions found. Please use the search form below.<br>
Expand Down
14 changes: 7 additions & 7 deletions django/cantusdb_project/main_app/tests/mock_cantusindex_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,15 @@

mock_json_nextchants_001010_text: str = """
[
{"cid":"008349", "count": "12"},
{"cid":"006928", "count": "17"},
{"cid":"008411c","count":"4"},
{"cid":"008390","count":"3"},
{"cid":"007713","count":"2"},
{"cid":"909030","count":"1"}
{"cid":"008349", "count": "12","info":{"field_genre":"H", "field_full_text": "Nocte surgentes vigilemus omnes semper in psalmis meditemur atque viribus totis domino canamus dulciter hymnos | Ut pio regi pariter canentes cum suis sanctis mereamur aulam ingredi caeli simul et beatam ducere vitam | Praestet hoc nobis deitas beata patris ac nati pariterque sancti spiritus cujus resonat per omnem gloria mundum | Amen"}},
{"cid":"006928", "count": "17","info":{"field_genre": "R", "field_full_text": "In principio fecit deus caelum et terram et creavit in ea hominem ad imaginem et similitudinem suam"}},
{"cid":"008411c","count":"4","info":{"field_genre": "HV", "field_full_text": "Hujus obtentu deus alme nostris parce jam culpis vitiis revulsis quo tibi puri resonet per aevum pectoris hymnus"}},
{"cid":"008390","count":"3","info":{"field_genre": "H", "field_full_text": "Sanctorum meritis inclyta gaudia pangamus socii gestaque fortia gliscit animus promere cantibus victorum genus optimum"}},
{"cid":"007713","count":"2","info":{"field_genre": "R", "field_full_text": "Sub altare dei audivi voces occisorum dicentium quare non defendis sanguinem nostrum et acceperunt divinum responsum adhuc sustinete modicum tempus donec impleatur numerus fratrum vestrorum"}},
{"cid":"909030","count":"1","info":{"field_genre": "IP", "field_full_text": "Venite exsultemus domino jubilemus deo salutari nostro praeoccupemus faciem ejus in confessione et in psalmis jubilemus ei | Quoniam deus magnus dominus et rex magnus super omnes deos quoniam non repellet dominus plebem suam quia in manu ejus sunt omnes fines terrae et altitudines montium ipse conspicit | Quoniam ipsius est mare et ipse fecit illud et aridam fundaverunt manus ejus venite adoremus et procidamus ante deum ploremus coram domino qui fecit nos quia ipse est dominus deus noster nos autem populus ejus et oves pascuae ejus | Hodie si vocem ejus audieritis nolite obdurare corda vestra sicut in exacerbatione secundum diem tentationis in deserto ubi tentaverunt me patres vestri probaverunt et viderunt opera mea | Quadraginta annis proximus fui generationi huic et dixi semper hi errant corde ipsi vero non cognoverunt vias meas quibus juravi in ira mea si introibunt in requiem meam | Gloria patri et filio et spiritui sancto sicut erat in principio et nunc et semper et in saecula saeculorum amen"}}
]
"""
# should be equivalent to:
# should be contained in:
# >>> requests.get("https://cantusindex.uwaterloo.ca/json-nextchants/001010").text
# this doesn't include the BOM which we expect to see beginning response.text
# CI seems to present these, sorted by "count", in descending order.
Expand Down
Loading

0 comments on commit 3e8f70c

Please sign in to comment.