diff --git a/functions/appFunctions.py b/functions/appFunctions.py index f2d0d2b..74dafd9 100644 --- a/functions/appFunctions.py +++ b/functions/appFunctions.py @@ -15,6 +15,7 @@ def initializeFolders(): MOUNT_PATH, os.path.join(MOUNT_PATH, "movies"), os.path.join(MOUNT_PATH, "series"), + os.path.join(MOUNT_PATH, "sports"), ] for folder in folders: diff --git a/functions/fuseFilesystemFunctions.py b/functions/fuseFilesystemFunctions.py index ec8f25f..460673b 100644 --- a/functions/fuseFilesystemFunctions.py +++ b/functions/fuseFilesystemFunctions.py @@ -32,9 +32,10 @@ def __init__(self, files_list): def _build_structure(self): structure = { - '/': ['movies', 'series'], + '/': ['movies', 'series', 'sports'], '/movies': set(), - '/series': set() + '/series': set(), + '/sports': set(), } @@ -62,6 +63,13 @@ def _build_structure(self): if season_path not in structure: structure[season_path] = set() structure[season_path].add(f.get('metadata_filename')) + + else: + path = f'/sports' + + if path not in structure: + structure[path] = set() + structure[path].add(f.get('metadata_filename')) # consistent ordering for key in structure: @@ -76,9 +84,12 @@ def _build_file_map(self): if f.get('metadata_mediatype') == 'movie': path = f'/movies/{f.get("metadata_rootfoldername")}/{f.get("metadata_filename")}' file_map[path] = f - else: # series + elif f.get('metadata_mediatype') == 'series': path = f'/series/{f.get("metadata_rootfoldername")}/{f.get("metadata_foldername")}/{f.get("metadata_filename")}' file_map[path] = f + else: + path = f'/sports/{f.get("metadata_filename")}' + file_map[path] = f return file_map diff --git a/functions/mediaFunctions.py b/functions/mediaFunctions.py index 057a578..7a0aca1 100644 --- a/functions/mediaFunctions.py +++ b/functions/mediaFunctions.py @@ -1,4 +1,6 @@ import re +from datetime import datetime +from functions.regexPatterns import DATE_FIND_PATTERN, DATE_FORMAT_PATTERNS, DATE_CLEAN_PATTERN, SPORT_SEPARATOR_PATTERN, WHITESPACE_PATTERN def constructSeriesTitle(season = None, episode = None, folder: bool = False): """ @@ -54,3 +56,101 @@ def cleanYear(year: str | int): year = year.split("-")[0] if year and year != "None": return int(year) + +def cleanDate(date_str: str | None) -> str | None: + """ + Extracts and returns a standardized date (YYYY.MM.DD) from various formats, + automatically resolving ambiguous day/month orders (US-style by default). + """ + if not date_str: + return None + + cleaned = str(date_str).strip() + if not cleaned: + return None + + for pattern, format_type in DATE_FORMAT_PATTERNS: + match = pattern.fullmatch(cleaned) + if not match: + continue + + groups = match.groups() + year = month = day = None + + try: + if format_type in ('DD.MM.YYYY', 'DD-MM-YYYY', 'DD/MM/YYYY'): + day, month, year = map(int, groups) + elif format_type in ('MM.DD.YYYY', 'MM/DD/YYYY', 'MM-DD-YYYY'): + month, day, year = map(int, groups) + elif format_type in ('YYYY-MM-DD', 'YYYY/MM/DD', 'YYYY.MM.DD'): + year, month, day = map(int, groups) + + # Automatic ambiguity resolution (US-style default) + if format_type in ('DD.MM.YYYY', 'DD/MM/YYYY', 'DD-MM-YYYY', + 'MM.DD.YYYY', 'MM/DD/YYYY', 'MM-DD-YYYY'): + if day <= 12 and month <= 12: + # Try both interpretations + try: + dt_us = datetime(year, month, day) # US-style + dt_eu = datetime(year, day, month) # European-style + dt = dt_us # Pick US-style by default + except ValueError: + # If one fails, fallback to the other + try: + dt = datetime(year, day, month) + except ValueError: + continue + else: + dt = datetime(year, month, day) + else: + dt = datetime(year, month, day) + + return dt.strftime('%Y.%m.%d') + + except (ValueError, TypeError): + continue + + return None + +def detectSports(file_name_no_ext: str): + file_name_no_ext = file_name_no_ext.strip() + + # Step 1: Extract ANY date in the string (start, middle, end) + date_match = DATE_FIND_PATTERN.search(file_name_no_ext) + if not date_match: + return None + potential_date = date_match.group(1) + cleaned_date = cleanDate(potential_date) + if not cleaned_date: + return None + + # Step 2: Remove the date from the string + file_no_date = DATE_FIND_PATTERN.sub('', file_name_no_ext).strip() + # Clean extra whitespace + file_no_date = WHITESPACE_PATTERN.sub(' ', file_no_date).strip() + + # Step 3: Find the separator + separator_match = SPORT_SEPARATOR_PATTERN.search(file_no_date) + if not separator_match: + return None + + sport_separator = separator_match.group(0).strip() + + # Split on separator + parts = SPORT_SEPARATOR_PATTERN.split(file_no_date, maxsplit=1) + if len(parts) != 3: + return None + + team_1 = parts[0].strip() + team_2 = parts[2].strip() # parts[1] is the separator + + if not team_1 or not team_2: + return None + + return { + "type": "sports", + "date": cleaned_date, + "sport_separator": sport_separator, + "team_1": team_1, + "team_2": team_2 + } \ No newline at end of file diff --git a/functions/regexPatterns.py b/functions/regexPatterns.py new file mode 100644 index 0000000..2b6ebdd --- /dev/null +++ b/functions/regexPatterns.py @@ -0,0 +1,36 @@ +""" +Centralized regex patterns for media parsing. +All patterns are compiled at import time for optimal performance during repeated use. +""" + +import re + +# Regex to clean non-date characters from a string +DATE_CLEAN_PATTERN = re.compile(r'[^\d\-\./]') + +# Regex to find a date anywhere in the filename +DATE_FIND_PATTERN = re.compile(r'(\d+\.\d+\.\d+|\d+\.\d+|\d+/\d+/\d+|\d+-\d+-\d+)') + +# Regex to find sports separator: vs, vs., v, v., at, @, versus +SPORT_SEPARATOR_PATTERN = re.compile(r'\s+(vs\.?|v\.?|at|@|versus)\s+', re.IGNORECASE) +# Regex to normalize whitespace (multiple spaces → single space) +WHITESPACE_PATTERN = re.compile(r'\s+') + +# Pre-compiled patterns for parsing specific date formats +DATE_FORMAT_PATTERNS = [ + # US formats + (re.compile(r'^(\d{1,2})/(\d{1,2})/(\d{4})$'), 'MM/DD/YYYY'), + (re.compile(r'^(\d{1,2})\.(\d{1,2})\.(\d{4})$'), 'MM.DD.YYYY'), + (re.compile(r'^(\d{1,2})-(\d{1,2})-(\d{4})$'), 'MM-DD-YYYY'), + + # ISO formats + (re.compile(r'^(\d{4})-(\d{1,2})-(\d{1,2})$'), 'YYYY-MM-DD'), + (re.compile(r'^(\d{4})/(\d{1,2})/(\d{1,2})$'), 'YYYY/MM/DD'), + (re.compile(r'^(\d{4})\.(\d{1,2})\.(\d{1,2})$'), 'YYYY.MM.DD'), + + # European formats + (re.compile(r'^(\d{1,2})\.(\d{1,2})\.(\d{4})$'), 'DD.MM.YYYY'), + (re.compile(r'^(\d{1,2})-(\d{1,2})-(\d{4})$'), 'DD-MM-YYYY'), + (re.compile(r'^(\d{1,2})/(\d{1,2})/(\d{4})$'), 'DD/MM/YYYY'), + +] \ No newline at end of file diff --git a/functions/torboxFunctions.py b/functions/torboxFunctions.py index 46037fa..0f01898 100644 --- a/functions/torboxFunctions.py +++ b/functions/torboxFunctions.py @@ -3,7 +3,7 @@ from enum import Enum import PTN from library.torbox import TORBOX_API_KEY -from functions.mediaFunctions import constructSeriesTitle, cleanTitle, cleanYear +from functions.mediaFunctions import constructSeriesTitle, cleanTitle, cleanYear, detectSports from functions.databaseFunctions import insertData import os import logging @@ -144,7 +144,24 @@ def searchMetadata(query: str, title_data: dict, file_name: str, full_title: str "metadata_filename": file_name, "metadata_rootfoldername": title_data.get("title", None), } - extension = os.path.splitext(file_name)[-1] + + file_name_no_ext, extension = os.path.splitext(file_name) + sports_data = detectSports(file_name_no_ext) + + if sports_data and sports_data.get("type") == "sports": + sport_separator = sports_data.get("sport_separator") + base_metadata["metadata_mediatype"] = "sports" + base_metadata["metadata_title"] = cleanTitle(f"{sports_data.get('team_1')} {sport_separator} {sports_data.get('team_2')}") + base_metadata["metadata_years"] = cleanYear(title_data.get("year", None)) + base_metadata["metadata_filename"] = f"{sports_data.get('date')} {sports_data.get('team_1')} {sport_separator} {sports_data.get('team_2')}{extension}" + base_metadata["metadata_rootfoldername"] = f"{base_metadata['metadata_title']} ({base_metadata['metadata_years'] or ''})" + base_metadata["team_1"] = sports_data.get("team_1") + base_metadata["team_2"] = sports_data.get("team_2") + base_metadata["date"] = sports_data.get("date") + + logging.debug(base_metadata) + return base_metadata, True, "Sports metadata detected." + try: response = search_api_http_client.get(f"/meta/search/{full_title}", params={"type": "file"}) except Exception as e: