Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: upgrade to python 3.13 #322

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
4 changes: 2 additions & 2 deletions .github/workflows/checks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
- uses: actions/checkout@v4
- uses: actions/setup-python@v5
with:
python-version: "3.12.6"
python-version: "3.13.0"
cache: "pip"
- uses: pre-commit/[email protected]
unittests:
Expand All @@ -29,7 +29,7 @@ jobs:
cache-dependency-glob: |
**/uv.lock
**/pyproject.toml
- run: uv python install 3.12.6
- run: uv python install 3.13.0
- run: uv sync --frozen --all-extras --no-install-project
- run: uv run pyright
- run: uv run xargs -I{} python -c "import nltk; nltk.download('{}')" < ./nltk.txt
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Install Python dependencies
FROM python:3.12.6-bookworm AS python-base
FROM python:3.13.0-bookworm AS python-base

ENV LC_CTYPE=C.utf8 \
PYTHONUNBUFFERED=1 \
Expand Down
6 changes: 4 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ Radiofeed requires the following basic dependencies to get started:
* Python 3.12
* [uv](https://docs.astral.sh)

**Note:** if you don't have the right version of Python you can use `uv python install 3.12.x`.
**Note:** if you don't have the right version of Python you can use `uv python install 3.13.x`.

For ease of local development a `docker-compose.yml` file is provided which includes Docker images:

Expand All @@ -28,7 +28,9 @@ The [justfile](https://github.com/casey/just) has some convenient shortcuts for
* `just update`: update dependencies to latest available versions
* `just clean`: remove all non-committed files and other artifacts
* `just serve`: run the development server and Tailwind JIT compiler
* `just test`: run the test suite
* `just shell`: open a shell in the development environment
* `just test`: run unit tests
* `just check`: run unit tests and linters

The install command will also create a `.env` file with default settings for local development, if one does not already exist.

Expand Down
9 changes: 6 additions & 3 deletions justfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,15 +18,15 @@ check:
serve:
./manage.py tailwind runserver_plus

shell:
./manage.py shell_plus

clean:
git clean -Xdf

test *args:
pytest {{ args }}

precommitall:
pre-commit run -a

typecheck:
pyright

Expand All @@ -48,5 +48,8 @@ precommmitinstall:
precommitupdate:
pre-commit autoupdate

precommitall:
pre-commit run -a

nltkdownload:
uv run xargs -I{} python -c "import nltk; nltk.download('{}')" < ./nltk.txt
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ description = "Podcast aggregator app"
authors = [
{name = "Dan Jacob", email = "[email protected]"},
]
requires-python = "==3.12.*"
requires-python = "==3.13.*"
readme = "README.md"
license = {text = "MIT"}
dependencies = [
Expand Down Expand Up @@ -93,6 +93,7 @@ testpaths = ["radiofeed", "templates"]
env = [
"COVERAGE_CORE=sysmon",
"SECURE_SSL_REDIRECT=false",
"USE_CONNECTION_POOL=false",
"USE_COLLECTSTATIC=false",
"USE_X_FORWARDED_HOST=false",
]
Expand Down
149 changes: 76 additions & 73 deletions radiofeed/feedparser/feed_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,55 +88,39 @@ def parse(self, client: Client) -> None:
response: httpx.Response | None = None
try:
response = self._get_response(client)

content_hash = self._make_content_hash(response)
self._check_duplicates(response, content_hash)
self._handle_update(

self._parse_ok(
response=response,
content_hash=content_hash,
feed=rss_parser.parse_rss(response.content),
)
except FeedParserError as exc:
self._handle_error(exc, response or exc.response)

def _make_content_hash(self, response: httpx.Response) -> str:
content_hash = make_content_hash(response.content)

# check content hash has changed
if content_hash == self._podcast.content_hash:
raise NotModifiedError

return content_hash

def _check_duplicates(self, response: httpx.Response, content_hash: str) -> None:
# check no other podcast with this RSS URL or identical content
if (
Podcast.objects.exclude(pk=self._podcast.pk)
.filter(Q(rss=response.url) | Q(content_hash=content_hash))
.exists()
):
raise DuplicateError
self._parse_error(exc, response or exc.response)

def _handle_update(
def _parse_ok(
self,
*,
response: httpx.Response,
content_hash: str,
feed: Feed,
) -> None:
categories, keywords = self._parse_taxonomy(feed)
categories_dct = get_categories()

try:
with transaction.atomic():
self._podcast_update(
num_retries=0,
parser_error="",
content_hash=content_hash,
keywords=keywords,
rss=response.url,
active=not (feed.complete),
etag=self._parse_etag(response),
modified=self._parse_modified(response),
extracted_text=self._extract_text(feed),
keywords=self._parse_keywords(feed, categories_dct),
extracted_text=self._tokenize_content(feed),
frequency=scheduler.schedule(feed),
**feed.model_dump(
exclude={
Expand All @@ -147,41 +131,20 @@ def _handle_update(
),
)

self._podcast.categories.set(categories)
self._podcast.categories.set(
self._parse_categories(feed, categories_dct)
)

self._episode_updates(feed)

self._logger.success("Feed updated")
except DataError as exc:
raise InvalidDataError from exc

def _get_response(self, client: Client) -> httpx.Response:
try:
try:
return client.get(self._podcast.rss, headers=self._get_headers())
except httpx.HTTPStatusError as exc:
if exc.response.is_redirect:
raise NotModifiedError(response=exc.response) from exc
if exc.response.is_client_error:
raise InaccessibleError(response=exc.response) from exc
raise
except httpx.HTTPError as exc:
raise UnavailableError from exc

def _parse_etag(self, response: httpx.Response) -> str:
return response.headers.get("ETag", "")

def _parse_modified(self, response: httpx.Response) -> datetime | None:
return parse_date(response.headers.get("Last-Modified"))

def _get_headers(self) -> dict[str, str]:
headers = {"Accept": self._accept_header}
if self._podcast.etag:
headers["If-None-Match"] = quote_etag(self._podcast.etag)
if self._podcast.modified:
headers["If-Modified-Since"] = http_date(self._podcast.modified.timestamp())
return headers

def _handle_error(
self, exc: FeedParserError, response: httpx.Response | None = None
def _parse_error(
self,
exc: FeedParserError,
response: httpx.Response | None = None,
) -> None:
active: bool = True
num_retries: int = self._podcast.num_retries
Expand Down Expand Up @@ -229,6 +192,51 @@ def _handle_error(
# re-raise original exception
raise exc

def _get_response(self, client: Client) -> httpx.Response:
try:
try:
return client.get(self._podcast.rss, headers=self._get_headers())
except httpx.HTTPStatusError as exc:
if exc.response.is_redirect:
raise NotModifiedError(response=exc.response) from exc
if exc.response.is_client_error:
raise InaccessibleError(response=exc.response) from exc
raise
except httpx.HTTPError as exc:
raise UnavailableError from exc

def _make_content_hash(self, response: httpx.Response) -> str:
content_hash = make_content_hash(response.content)

# check content hash has changed
if content_hash == self._podcast.content_hash:
raise NotModifiedError

return content_hash

def _check_duplicates(self, response: httpx.Response, content_hash: str) -> None:
# check no other podcast with this RSS URL or identical content
if (
Podcast.objects.exclude(pk=self._podcast.pk)
.filter(Q(rss=response.url) | Q(content_hash=content_hash))
.exists()
):
raise DuplicateError

def _parse_etag(self, response: httpx.Response) -> str:
return response.headers.get("ETag", "")

def _parse_modified(self, response: httpx.Response) -> datetime | None:
return parse_date(response.headers.get("Last-Modified"))

def _get_headers(self) -> dict[str, str]:
headers = {"Accept": self._accept_header}
if self._podcast.etag:
headers["If-None-Match"] = quote_etag(self._podcast.etag)
if self._podcast.modified:
headers["If-Modified-Since"] = http_date(self._podcast.modified.timestamp())
return headers

def _podcast_update(self, **fields) -> None:
now = timezone.now()

Expand All @@ -238,34 +246,29 @@ def _podcast_update(self, **fields) -> None:
**fields,
)

def _parse_taxonomy(self, feed: Feed) -> tuple[list[Category], str]:
categories: list[Category] = []
keywords: str = ""

if category_names := {c.casefold() for c in feed.categories}:
categories_dct = get_categories()

categories = [
categories_dct[name]
for name in category_names
if name in categories_dct
]

keywords = " ".join(
[name for name in category_names if name not in categories_dct]
)
def _parse_keywords(self, feed: Feed, categories_dct: dict[str, Category]) -> str:
return " ".join(
[value for value in feed.categories if value not in categories_dct]
)

return categories, keywords
def _parse_categories(
self, feed: Feed, categories_dct: dict[str, Category]
) -> list[Category]:
return [
categories_dct[value]
for value in feed.categories
if value in categories_dct
]

def _extract_text(self, feed: Feed) -> str:
def _tokenize_content(self, feed: Feed) -> str:
text = " ".join(
value
for value in [
feed.title,
feed.description,
feed.owner,
]
+ feed.categories
+ list(feed.categories)
+ [item.title for item in feed.items][:6]
if value
)
Expand Down
16 changes: 14 additions & 2 deletions radiofeed/feedparser/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ class Item(BaseModel):
guid: str = Field(..., min_length=1)
title: str = Field(..., min_length=1)

categories: list[str] = Field(default_factory=list)
categories: set[str] = Field(default_factory=set)

description: EmptyIfNone = ""
keywords: EmptyIfNone = ""
Expand Down Expand Up @@ -158,6 +158,12 @@ class Item(BaseModel):
),
] = DEFAULT_EPISODE_TYPE

@field_validator("categories", mode="after")
@classmethod
def validate_categories(cls, value: Any) -> set[str]:
"""Ensure categories are unique and not empty."""
return {c.casefold() for c in set(filter(None, value))}

@field_validator("pub_date", mode="before")
@classmethod
def validate_pub_date(cls, value: Any) -> datetime:
Expand Down Expand Up @@ -247,7 +253,7 @@ class Feed(BaseModel):

items: list[Item]

categories: list[str] = Field(default_factory=list)
categories: set[str] = Field(default_factory=set)

@field_validator("language", mode="before")
@classmethod
Expand All @@ -257,6 +263,12 @@ def validate_language(cls, value: Any) -> str:
value.casefold()[:2] if value and len(value) > 1 else cls.DEFAULT_LANGUAGE
)

@field_validator("categories", mode="after")
@classmethod
def validate_categories(cls, value: Any) -> set[str]:
"""Ensure categories are unique and not empty."""
return {c.casefold() for c in set(filter(None, value))}

@field_validator("complete", mode="before")
@classmethod
def validate_complete(cls, value: Any) -> bool:
Expand Down
7 changes: 4 additions & 3 deletions radiofeed/feedparser/tests/test_feed_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -158,9 +158,10 @@ def test_parse_ok(self, categories):
assert podcast.description == "Blog and Podcast specializing in offbeat news"
assert podcast.owner == "8th Kind"

assert (
podcast.extracted_text
== "mysterious universe blog specializing offbeat th kind science medicine science social science religion spirituality spirituality society culture philosophy mu tibetan zombie mu saber tooth tiger king mu kgb cop mu joshua cutchin timothy renner mu squid router mu jim bruton"
tokens = set(podcast.extracted_text.split())

assert tokens == set(
"mysterious universe blog specializing offbeat th kind science spirituality science medicine society culture philosophy social science religion spirituality mu tibetan zombie mu saber tooth tiger king mu kgb cop mu joshua cutchin timothy renner mu squid router mu jim bruton".split()
)

assert podcast.modified
Expand Down
6 changes: 3 additions & 3 deletions radiofeed/feedparser/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ def test_length_valid(self):

def test_default_keywords_from_categories(self):
item = Item(**ItemFactory(categories=["Gaming", "Hobbies", "Video Games"]))
assert item.keywords == "Gaming Hobbies Video Games"
assert set(item.keywords.split()) == {"gaming", "hobbies", "video", "games"}

def test_defaults(self):
item = Item(**ItemFactory())
assert item.explicit is False
assert item.episode_type == "full"
assert item.categories == []
assert item.categories == set()
assert item.keywords == ""

@pytest.mark.parametrize(
Expand Down Expand Up @@ -102,5 +102,5 @@ def test_defaults(self, item):
assert feed.explicit is False
assert feed.language == "en"
assert feed.description == ""
assert feed.categories == []
assert feed.categories == set()
assert feed.pub_date == item.pub_date
Loading