diff --git a/reflex/app.py b/reflex/app.py index eb2c4a1b17..d71f76eef4 100644 --- a/reflex/app.py +++ b/reflex/app.py @@ -25,13 +25,14 @@ Callable, Coroutine, Dict, + Mapping, MutableMapping, Type, get_args, get_type_hints, ) -from fastapi import FastAPI, HTTPException, Request +from fastapi import FastAPI, HTTPException, Request, Response from fastapi import UploadFile as FastAPIUploadFile from fastapi.middleware import cors from fastapi.responses import JSONResponse, StreamingResponse @@ -87,6 +88,7 @@ replace_brackets_with_keywords, verify_route_validity, ) +from reflex.sitemap import PageConfig, generate_sitemaps, read_sitemap_file from reflex.state import ( BaseState, RouterData, @@ -411,6 +413,8 @@ class App(MiddlewareMixin, LifespanMixin): # Put the toast provider in the app wrap. toaster: Component | None = dataclasses.field(default_factory=toast.provider) + _sitemap_properties: Dict[str, PageConfig] = dataclasses.field(default_factory=dict) + @property def api(self) -> FastAPI | None: """Get the backend api. @@ -605,6 +609,19 @@ def _add_default_endpoints(self): self.api.get(str(constants.Endpoint.PING))(ping) self.api.get(str(constants.Endpoint.HEALTH))(health) + self.api.get(str(constants.Endpoint.SITEMAP))(self.serve_sitemap) + + async def serve_sitemap(self) -> Response: + """Asynchronously serve the sitemap as an XML response. + + This function checks if a sitemap.xml file exists in the root directory of the app. If so, this file is served + as a Response. Otherwise, a new sitemap is generated and saved to sitemap.xml before being served. + + Returns: + Response: An HTTP response with the XML sitemap content and the media type set to "application/xml". + """ + sitemaps = read_sitemap_file() + return Response(content=sitemaps, media_type="application/xml") def _add_optional_endpoints(self): """Add optional api endpoints (_upload).""" @@ -682,6 +699,8 @@ def add_page( image: str = constants.DefaultPage.IMAGE, on_load: EventType[()] | None = None, meta: list[dict[str, str]] = constants.DefaultPage.META_LIST, + sitemap_priority: float = constants.DefaultPage.SITEMAP_PRIORITY, + sitemap_changefreq: str = constants.DefaultPage.SITEMAP_CHANGEFREQ, context: dict[str, Any] | None = None, ): """Add a page to the app. @@ -697,6 +716,8 @@ def add_page( image: The image to display on the page. on_load: The event handler(s) that will be called each time the page load. meta: The metadata of the page. + sitemap_priority: The priority of the page in the sitemap. If None, the priority is calculated based on the depth of the route. + sitemap_changefreq: The change frequency of the page in the sitemap. Default to 'weekly' context: Values passed to page for custom page-specific logic. Raises: @@ -748,7 +769,7 @@ def add_page( ) # Setup dynamic args for the route. - # this state assignment is only required for tests using the deprecated state kwarg for App + # This state assignment is only required for tests using the deprecated state kwarg for App state = self._state if self._state else State state.setup_dynamic_args(get_route_args(route)) @@ -757,6 +778,11 @@ def add_page( on_load if isinstance(on_load, list) else [on_load] ) + self._sitemap_properties[route] = { + "priority": sitemap_priority, + "changefreq": sitemap_changefreq, + } + self._unevaluated_pages[route] = UnevaluatedPage( component=component, route=route, @@ -793,6 +819,16 @@ def _compile_page(self, route: str, save_page: bool = True): if save_page: self._pages[route] = component + def get_sitemap_properties(self) -> Mapping[str, PageConfig]: + """Get the sitemap properties. + + Returns: + The sitemap properties. + """ + return { + route: value.copy() for route, value in self._sitemap_properties.items() + } + def get_load_events(self, route: str) -> list[IndividualEventType[()]]: """Get the load events for a route. @@ -1083,6 +1119,9 @@ def _compile(self, export: bool = False): self._pages = {} + # generate sitemaps from sitemap properties + generate_sitemaps(self._sitemap_properties) + def get_compilation_time() -> str: return str(datetime.now().time()).split(".")[0] diff --git a/reflex/constants/event.py b/reflex/constants/event.py index 7b58c99cf6..1eb6bf8f42 100644 --- a/reflex/constants/event.py +++ b/reflex/constants/event.py @@ -13,6 +13,7 @@ class Endpoint(Enum): AUTH_CODESPACE = "auth-codespace" HEALTH = "_health" ALL_ROUTES = "_all_routes" + SITEMAP = "sitemap.xml" def __str__(self) -> str: """Get the string representation of the endpoint. diff --git a/reflex/constants/route.py b/reflex/constants/route.py index ab00fab153..510b2c0cd3 100644 --- a/reflex/constants/route.py +++ b/reflex/constants/route.py @@ -61,6 +61,10 @@ class DefaultPage(SimpleNamespace): IMAGE = "favicon.ico" # The default meta list to show for Reflex apps. META_LIST = [] + # The default changefrequency for sitemap generation. + SITEMAP_CHANGEFREQ = "weekly" + # The default priority for sitemap generation. + SITEMAP_PRIORITY = 10.0 # 404 variables diff --git a/reflex/sitemap.py b/reflex/sitemap.py new file mode 100644 index 0000000000..06a83be064 --- /dev/null +++ b/reflex/sitemap.py @@ -0,0 +1,153 @@ +"""This module contains functions to generate and manage the sitemap.xml file.""" + +from pathlib import Path +from typing import Dict, List, TypedDict +from xml.dom import minidom +from xml.etree.ElementTree import Element, SubElement, tostring + +from reflex import constants +from reflex.config import get_config +from reflex.utils import prerequisites + +# _static folder in the .web directory containing the sitemap.xml file. +_sitemap_folder_path: Path = ( + Path.cwd() / prerequisites.get_web_dir() / constants.Dirs.STATIC +) + +# sitemap file path +_sitemap_file_path: Path = _sitemap_folder_path / "sitemap.xml" + + +def check_sitemap_file_exists() -> bool: + """Check if the sitemap file exists. + + Returns: + bool: True if the sitemap file exists in the .web/_static folder. + """ + return _sitemap_folder_path.exists() & _sitemap_file_path.exists() + + +def read_sitemap_file() -> str: + """Read the sitemap file. + + Returns: + str: The contents of the sitemap file. + """ + with _sitemap_file_path.open("r") as f: + return f.read() + + +def generate_xml(links: List[Dict[str, str]]) -> str: + """Generate an XML sitemap from a list of links. + + Args: + links (List[Dict[str, Any]]): A list of dictionaries where each dictionary contains + 'loc' (URL of the page), 'changefreq' (frequency of changes), and 'priority' (priority of the page). + + Returns: + str: A pretty-printed XML string representing the sitemap. + """ + urlset = Element("urlset", xmlns="https://www.sitemaps.org/schemas/sitemap/0.9") + for link in links: + url = SubElement(urlset, "url") + loc = SubElement(url, "loc") + loc.text = link["loc"] + changefreq = SubElement(url, "changefreq") + changefreq.text = link["changefreq"] + priority = SubElement(url, "priority") + priority.text = str(link["priority"]) + rough_string = tostring(urlset, "utf-8") + reparsed = minidom.parseString(rough_string) + return reparsed.toprettyxml(indent=" ") + + +class PageConfig(TypedDict): + """TypedDict for page configuration in sitemap.""" + + priority: float + changefreq: str + + +def generate_sitemaps(sitemap_config: Dict[str, PageConfig]) -> None: + """Generate the sitemap.xml file. + + This function generates the sitemap.xml file by crawling through the available pages in the app and generating a list + of links with their respective sitemap properties such as location (URL), change frequency, and priority. Dynamic + routes and the 404 page are excluded from the sitemap. + + Args: + sitemap_config: A dictionary containing the sitemap properties for each route. + """ + links = generate_links_for_sitemap(sitemap_config) + generate_static_sitemap(links) + + +def generate_links_for_sitemap( + sitemap_config: Dict[str, PageConfig], +) -> List[dict[str, str]]: + """Generate a list of links for which sitemaps are generated. + + This function loops through sitemap_config and generates a list of links with their respective sitemap properties + such as location (URL), change frequency, and priority. Dynamic routes and the 404 page are excluded from the + sitemap. + + Args: + sitemap_config: A dictionary containing the sitemap properties for each route. + + Returns: + List: A list of dictionaries where each dictionary contains the 'loc' (URL of the page), 'priority' and + 'changefreq' of each route. + """ + links = [] + + # find link of pages that are not dynamically created. + for route in sitemap_config: + # Ignore dynamic routes and 404 + if ("[" in route and "]" in route) or route == "404": + continue + + sitemap_changefreq = sitemap_config[route]["changefreq"] + sitemap_priority = sitemap_config[route]["priority"] + + # Handle the index route + if route == "index": + route = "/" + + if not route.startswith("/"): + route = f"/{route}" + + if ( + sitemap_priority == constants.DefaultPage.SITEMAP_PRIORITY + ): # indicates that user didn't set priority + depth = route.count("/") + sitemap_priority = max(0.5, 1.0 - (depth * 0.1)) + + deploy_url = get_config().deploy_url # pick domain url from the config file. + + links.append( + { + "loc": f"{deploy_url}{route}", + "changefreq": sitemap_changefreq, + "priority": sitemap_priority, + } + ) + return links + + +def generate_static_sitemap(links: List[Dict[str, str]]) -> None: + """Generates the sitemaps for the pages stored in _pages. Store it in sitemap.xml. + + This method is called from two methods: + 1. Every time the web app is deployed onto the server. + 2. When the user (or crawler) requests for the sitemap.xml file. + + Args: + links: The list of urls for which the sitemap is to be generated. + """ + sitemap = generate_xml(links) + Path(_sitemap_folder_path).mkdir(parents=True, exist_ok=True) + + # this method is only called when old sitemap.xml is not retrieved. So we can safely replace an already existing xml + # file. + with _sitemap_file_path.open("w") as f: + f.write(sitemap) diff --git a/reflex/utils/build.py b/reflex/utils/build.py index fdac26500d..64a5e5e9fd 100644 --- a/reflex/utils/build.py +++ b/reflex/utils/build.py @@ -203,13 +203,6 @@ def build( "Collecting build traces", ] - # Generate a sitemap if a deploy URL is provided. - if deploy_url is not None: - generate_sitemap_config(deploy_url, export=for_export) - command = "export-sitemap" - - checkpoints.extend(["Loading next-sitemap", "Generation completed"]) - # Start the subprocess with the progress bar. process = processes.new_process( [*prerequisites.get_js_package_executor(raise_on_none=True)[0], "run", command], diff --git a/tests/units/test_app.py b/tests/units/test_app.py index 2df12b2518..de0b4cc0cd 100644 --- a/tests/units/test_app.py +++ b/tests/units/test_app.py @@ -352,6 +352,27 @@ def test_add_duplicate_page_route_error(app, first_page, second_page, route): app.add_page(second_page, route="/" + route.strip("/") if route else None) +def test_add_page_with_sitemap_properties(app: App): + """Test if the sitemap properties of the app instance is set properly or not. + + Args: + app: The app to test. + """ + # check with given values. + app.add_page( + page1, route="/page1", sitemap_priority=0.9, sitemap_changefreq="daily" + ) + assert app._sitemap_properties["page1"] == {"priority": 0.9, "changefreq": "daily"} + + # check default values added. + app.add_page(page2, route="/page2") + print(app._sitemap_properties) + assert app._sitemap_properties["page2"] == { + "priority": 10.0, + "changefreq": "weekly", + } + + def test_initialize_with_admin_dashboard(test_model): """Test setting the admin dashboard of an app. diff --git a/tests/units/test_sitemap.py b/tests/units/test_sitemap.py new file mode 100644 index 0000000000..39239259e9 --- /dev/null +++ b/tests/units/test_sitemap.py @@ -0,0 +1,159 @@ +import unittest.mock +from pathlib import Path + +import pytest + +import reflex as rx +from reflex import constants +from reflex.app import App +from reflex.sitemap import ( + generate_links_for_sitemap, + generate_static_sitemap, + generate_xml, +) +from reflex.utils import prerequisites + +sitemap_folder_path: Path = ( + Path.cwd() / prerequisites.get_web_dir() / constants.Dirs.STATIC +) + +# sitemap file path +sitemap_file_path: Path = sitemap_folder_path / "sitemap.xml" + + +@pytest.fixture +def app_instance(): + """Fixture to create an instance of the app. + + Returns: + An instance of the App class. + """ + app = App() + return app + + +def page(text: str): + """A simple page component for testing. + + Args: + text: The text to display on the page. + + Returns: + A Reflex component with the given text. + """ + return rx.box(text) + + +@pytest.fixture +def index_page(): + """Fixture that returns an IndexPage instance. + + Returns: + An instance of IndexPage. + """ + return page(text="Index") + + +@pytest.fixture +def about_page(): + """Fixture that returns an AboutPage instance. + + Returns: + An instance of AboutPage. + """ + return page(text="About") + + +mock_xml = """ + + + http://localhost:3000/ + weekly + 0.9 + + + http://localhost:3000/about + weekly + 0.9 + + +""" + +mock_links = [ + {"loc": "http://localhost:3000/", "changefreq": "weekly", "priority": 0.9}, + {"loc": "http://localhost:3000/about", "changefreq": "weekly", "priority": 0.9}, +] + + +def test_generate_xml(): + """Test the generate_xml function.""" + result = generate_xml(mock_links) + assert result.strip() == mock_xml.strip() + + +def test_generate_static_sitemaps(app_instance, index_page, about_page): + """Test if the generated sitemap file is currently stored in static website or not. + + Args: + app_instance: The app instance. + index_page: The index page fixture. + about_page: The about page fixture. + """ + pages = {"index": index_page, "about": about_page} + # remove the sitemap.xml file if it exists. + sitemap_file_path.unlink(missing_ok=True) + assert ( + not sitemap_file_path.exists() + ) # check if the sitemap.xml file does not exist. + + with unittest.mock.patch.object(app_instance, "_pages", pages): + generate_static_sitemap(mock_links) + + assert sitemap_file_path.exists() # check if the sitemap.xml file exists. + + +def test_generate_links_for_sitemap(): + """Test if the links are generated correctly for the sitemap from the sitemap config file when no deploy url is + given. + """ + links = generate_links_for_sitemap( + { + "index": {"priority": 0.9, "changefreq": "weekly"}, + "about": {"priority": 0.9, "changefreq": "weekly"}, + } + ) + + # Assert that the links are generated correctly + assert links == [ + {"loc": "http://localhost:3000/", "changefreq": "weekly", "priority": 0.9}, + { + "loc": "http://localhost:3000/about", + "changefreq": "weekly", + "priority": 0.9, + }, + ] + + +def test_generate_links_for_sitemap_deploy_url(): + """Test if the links are generated correctly for the sitemap from the sitemap config file when a deploy url is + given. + """ + with unittest.mock.patch("reflex.sitemap.get_config") as mock_get_config: + mock_get_config().deploy_url = "http://www.google.com" + + links = generate_links_for_sitemap( + { + "index": {"priority": 0.9, "changefreq": "weekly"}, + "about": {"priority": 0.9, "changefreq": "weekly"}, + } + ) + + # Assert that the links are generated correctly + assert links == [ + {"loc": "http://www.google.com/", "changefreq": "weekly", "priority": 0.9}, + { + "loc": "http://www.google.com/about", + "changefreq": "weekly", + "priority": 0.9, + }, + ]