Skip to content

Commit

Permalink
Merge pull request #5 from stickman33/main
Browse files Browse the repository at this point in the history
Lower docker image size
Limit video size
  • Loading branch information
aigoncharov authored Sep 27, 2024
2 parents 7ed740e + 9ebd72a commit 78b70f3
Show file tree
Hide file tree
Showing 5 changed files with 112 additions and 86 deletions.
11 changes: 9 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,9 +1,16 @@
FROM python:3.12-bookworm
FROM python:3.12-slim AS builder

WORKDIR /usr/src/app

COPY requirements.txt ./
RUN pip install --no-cache-dir -r requirements.txt

RUN pip install --no-cache-dir --target=/usr/src/app/dependencies -r requirements.txt

FROM python:3.12-slim

WORKDIR /usr/src/app

COPY --from=builder /usr/src/app/dependencies /usr/local/lib/python3.12/site-packages

COPY telegram_to_rss ./telegram_to_rss

Expand Down
2 changes: 2 additions & 0 deletions telegram_to_rss/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
initial_feed_size = int(os.environ.get("INITIAL_FEED_SIZE") or 50)
base_url = os.environ.get("BASE_URL")
bind = os.environ.get("BIND") or "127.0.0.1:3042"
max_video_size_mb = int(os.environ.get("MAX_VIDEO_SIZE_MB", 10))
max_video_size = max_video_size_mb * 1024 * 1024

loglevel = os.environ.get("LOGLEVEL", "INFO").upper()

Expand Down
6 changes: 6 additions & 0 deletions telegram_to_rss/generate_feed.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,14 @@ def generate_feed(feed_render_dir: Path, feed: Feed):

media_content = ""
media_download_failure = False
media_too_large = False

# processing mediafiles
for media_path in feed_entry.media:
if media_path == "FAIL":
media_download_failure = True
elif media_path == "TOO_LARGE":
media_too_large = True
else:
media_url = "{}/static/{}".format(base_url, media_path)

Expand All @@ -75,6 +78,8 @@ def generate_feed(feed_render_dir: Path, feed: Feed):
content += "<br /><strong>This message has unsupported attachment. Open Telegram to view it.</strong>"
if media_download_failure:
content += "<br /><strong>Downloading some of the media for this message failed. Open Telegram to view it.</strong>"
if media_too_large:
content += "<br /><strong>The video is too large.</strong>"

ET.SubElement(rss_item_el, "description").text = content
ET.SubElement(rss_item_el, "pubDate").text = feed_entry.date.isoformat()
Expand All @@ -89,6 +94,7 @@ def generate_feed(feed_render_dir: Path, feed: Feed):

logging.info("generate_feed -> done %s %s", feed.name, feed.id)


async def update_feeds_cache(feed_render_dir: str):
feeds = await Feed.all().prefetch_related(
Prefetch("entries", queryset=FeedEntry.all().order_by("-date"))
Expand Down
171 changes: 88 additions & 83 deletions telegram_to_rss/poll_telegram.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,18 +11,22 @@ class TelegramPoller:
_message_limit: int
_new_feed_limit: int
_static_path: Path
_max_video_size: int

def __init__(
self,
client: TelegramToRssClient,
message_limit: int,
new_feed_limit: int,
static_path: Path,
max_video_size: int,
) -> None:
self._client = client
self._message_limit = message_limit
self._new_feed_limit = new_feed_limit
self._static_path = static_path
self._max_video_size = max_video_size


async def fetch_dialogs(self):
tg_dialogs = await self._client.list_dialogs()
Expand Down Expand Up @@ -124,90 +128,64 @@ async def _process_new_dialog_messages(
self, feed: Feed, dialog_messages: list[custom.Message]
):
filtered_dialog_messages: list[custom.Message] = []
for dialog_message in dialog_messages:
logging.debug(
"TelegramPoller._process_new_dialog_messages -> processing message %s %s %s %s",
dialog_message.id,
dialog_message.grouped_id,
dialog_message.photo is not None,
dialog_message.text,
)
logging.info(f"Processing {len(dialog_messages)} messages from {feed.name}")

if dialog_message.text is None:
continue
for dialog_message in dialog_messages:
try:
logging.debug(
"Processing message ID: %s, grouped_id: %s, has photo: %s, has media: %s, text: %s",
dialog_message.id,
dialog_message.grouped_id,
dialog_message.photo is not None,
dialog_message.media is not None,
dialog_message.text,
)

dialog_message.downloaded_media = []

if (
dialog_message.grouped_id is None
or len(filtered_dialog_messages) == 0
or dialog_message.grouped_id != filtered_dialog_messages[-1].grouped_id
):
filtered_dialog_messages.append(dialog_message)

if (
len(filtered_dialog_messages) != 0
and dialog_message.grouped_id == filtered_dialog_messages[-1].grouped_id
and len(dialog_message.text) > len(filtered_dialog_messages[-1].text)
):
filtered_dialog_messages[-1].text = dialog_message.text

last_processed_message = filtered_dialog_messages[-1]
if dialog_message.photo:
try:
feed_entry_media_id = "{}-{}".format(
to_feed_entry_id(feed, dialog_message),
len(last_processed_message.downloaded_media),
)
media_path = self._static_path.joinpath(feed_entry_media_id)

def progress_callback(current, total, media_path=media_path):
logging.debug(
"TelegramPoller._process_new_dialog_messages -> downloading media %s: %s out of %s",
media_path,
current,
total,
)

res_path = await dialog_message.download_media(
file=media_path, progress_callback=progress_callback
)
last_processed_message.downloaded_media.append(Path(res_path).name)
except Exception as e:
logging.warning(
f"TelegramPoller._process_new_dialog_messages -> downloading media failed with {e} for message {dialog_message.id} {dialog_message.date} {dialog_message.text}",
)
last_processed_message.downloaded_media.append("FAIL")
elif isinstance(dialog_message.media, types.MessageMediaDocument):
document = dialog_message.media.document
mime_type = getattr(document, 'mime_type', None)
if mime_type and mime_type.startswith("video/"):
try:
feed_entry_media_id = "{}-{}".format(
to_feed_entry_id(feed, dialog_message),
len(last_processed_message.downloaded_media),
)
media_path = self._static_path.joinpath(feed_entry_media_id)

def progress_callback(current, total, media_path=media_path):
if dialog_message.text is None:
continue

dialog_message.downloaded_media = []

if (
dialog_message.grouped_id is None
or len(filtered_dialog_messages) == 0
or dialog_message.grouped_id != filtered_dialog_messages[-1].grouped_id
):
filtered_dialog_messages.append(dialog_message)
else:
if len(dialog_message.text) > len(filtered_dialog_messages[-1].text):
filtered_dialog_messages[-1].text = dialog_message.text

last_processed_message = filtered_dialog_messages[-1]

if dialog_message.photo:
await self._download_media(dialog_message, last_processed_message, feed, 'photo')

if isinstance(dialog_message.media, types.MessageMediaDocument):
document = dialog_message.media.document
mime_type = getattr(document, 'mime_type', None)
if mime_type:
if mime_type.startswith("video/"):
video_size = document.size
if video_size > self._max_video_size:
logging.info(
f"Video in message {dialog_message.id} is too large ({video_size} bytes). Skipping download."
)
last_processed_message.downloaded_media.append("TOO_LARGE")
continue
await self._download_media(dialog_message, last_processed_message, feed, 'video')
elif mime_type.startswith("image/"):
await self._download_media(dialog_message, last_processed_message, feed, 'image')
else:
logging.debug(
"TelegramPoller._process_new_dialog_messages -> downloading video %s: %s out of %s",
media_path,
current,
total,
f"Unsupported media type '{mime_type}' in message {dialog_message.id}"
)

res_path = await dialog_message.download_media(
file=media_path, progress_callback=progress_callback
)
last_processed_message.downloaded_media.append(Path(res_path).name)
except Exception as e:
logging.warning(
f"TelegramPoller._process_new_dialog_messages -> downloading video failed with {e} for message {dialog_message.id} {dialog_message.date} {dialog_message.text}",
)
last_processed_message.downloaded_media.append("FAIL")

# creating FeedEntry with mediafiles
last_processed_message.has_unsupported_media = True

except Exception as e:
logging.error(f"Error processing message {dialog_message.id}: {e}", exc_info=True)
continue

feed_entries: list[FeedEntry] = []
for dialog_message in filtered_dialog_messages:
feed_entry_id = to_feed_entry_id(feed, dialog_message)
Expand All @@ -218,20 +196,47 @@ def progress_callback(current, total, media_path=media_path):
message=dialog_message.text,
date=dialog_message.date,
media=dialog_message.downloaded_media,
has_unsupported_media=dialog_message.media is not None
and not isinstance(dialog_message.media, (types.MessageMediaPhoto, types.MessageMediaDocument)),
has_unsupported_media=getattr(dialog_message, 'has_unsupported_media', False),
)
)
return feed_entries

async def _download_media(self, dialog_message, last_processed_message, feed, media_type):
try:
feed_entry_media_id = "{}-{}".format(
to_feed_entry_id(feed, dialog_message),
len(last_processed_message.downloaded_media),
)
media_path = self._static_path.joinpath(feed_entry_media_id)

def progress_callback(current, total, media_path=media_path):
logging.debug(
"Downloading %s %s: %s out of %s",
media_type,
media_path,
current,
total,
)

res_path = await dialog_message.download_media(
file=media_path, progress_callback=progress_callback
)
last_processed_message.downloaded_media.append(Path(res_path).name)
logging.debug(f"Downloaded {media_type} to {res_path}")
except Exception as e:
logging.warning(
f"Downloading {media_type} failed with {e} for message {dialog_message.id} {dialog_message.date} {dialog_message.text}",
)
last_processed_message.downloaded_media.append("FAIL")


def to_feed_entry_id(feed: Feed, dialog_message: custom.Message):
return "{}--{}".format(feed.id, dialog_message.id)


def parse_feed_entry_id(id: str):
[channel_id, message_id] = id.split("--")
return (int(channel_id), int(message_id))
return int(channel_id), int(message_id)


async def reset_feeds_in_db(telegram_poller: TelegramPoller):
Expand Down
8 changes: 7 additions & 1 deletion telegram_to_rss/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
update_interval_seconds,
db_path,
loglevel,
max_video_size,
)
from telegram_to_rss.qr_code import get_qr_code_image
from telegram_to_rss.db import init_feeds_db, close_feeds_db
Expand All @@ -24,7 +25,11 @@
from telegram_to_rss.models import Feed
import logging

logging.basicConfig(level=loglevel)
logging.basicConfig(
level=loglevel,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
datefmt='%Y-%m-%d %H:%M:%S'
)

app = Quart(__name__, static_folder=static_path, static_url_path="/static")
client = TelegramToRssClient(
Expand All @@ -35,6 +40,7 @@
message_limit=feed_size_limit,
new_feed_limit=initial_feed_size,
static_path=static_path,
max_video_size=max_video_size,
)
rss_task: asyncio.Task | None = None

Expand Down

0 comments on commit 78b70f3

Please sign in to comment.