Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Generate sitemap for dynamic deploy url #4923

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 35 additions & 2 deletions reflex/app.py
Original file line number Diff line number Diff line change
@@ -31,7 +31,7 @@
get_type_hints,
)

from fastapi import FastAPI, HTTPException, Request
from fastapi import FastAPI, HTTPException, Request, Response
from fastapi import UploadFile as FastAPIUploadFile
from fastapi.middleware import cors
from fastapi.responses import JSONResponse, StreamingResponse
@@ -89,6 +89,7 @@
replace_brackets_with_keywords,
verify_route_validity,
)
from reflex.sitemap import generate_sitemaps, read_sitemap_file
from reflex.state import (
BaseState,
RouterData,
@@ -410,6 +411,8 @@ class App(MiddlewareMixin, LifespanMixin):
# Put the toast provider in the app wrap.
toaster: Component | None = dataclasses.field(default_factory=toast.provider)

_sitemap_properties: Dict[str, Dict] = dataclasses.field(default_factory=dict)

@property
def api(self) -> FastAPI | None:
"""Get the backend api.
@@ -602,6 +605,19 @@ def _add_default_endpoints(self):

self.api.get(str(constants.Endpoint.PING))(ping)
self.api.get(str(constants.Endpoint.HEALTH))(health)
self.api.get(str(constants.Endpoint.SITEMAP))(self.serve_sitemap)

async def serve_sitemap(self) -> Response:
"""Asynchronously serve the sitemap as an XML response.

This function checks if a sitemap.xml file exists in the root directory of the app. If so, this file is served
as a Response. Otherwise, a new sitemap is generated and saved to sitemap.xml before being served.

Returns:
Response: An HTTP response with the XML sitemap content and the media type set to "application/xml".
"""
sitemaps = read_sitemap_file()
return Response(content=sitemaps, media_type="application/xml")

def _add_optional_endpoints(self):
"""Add optional api endpoints (_upload)."""
@@ -679,6 +695,8 @@ def add_page(
image: str = constants.DefaultPage.IMAGE,
on_load: EventType[()] | None = None,
meta: list[dict[str, str]] = constants.DefaultPage.META_LIST,
sitemap_priority: float = constants.DefaultPage.SITEMAP_PRIORITY,
sitemap_changefreq: str = constants.DefaultPage.SITEMAP_CHANGEFREQ,
):
"""Add a page to the app.

@@ -693,6 +711,9 @@ def add_page(
image: The image to display on the page.
on_load: The event handler(s) that will be called each time the page load.
meta: The metadata of the page.
sitemap_priority: The priority of the page in the sitemap. If None, the priority is calculated based on the
depth of the route.
sitemap_changefreq: The change frequency of the page in the sitemap. Default to 'weekly'

Raises:
PageValueError: When the component is not set for a non-404 page.
@@ -743,7 +764,7 @@ def add_page(
)

# Setup dynamic args for the route.
# this state assignment is only required for tests using the deprecated state kwarg for App
# This state assignment is only required for tests using the deprecated state kwarg for App
state = self._state if self._state else State
state.setup_dynamic_args(get_route_args(route))

@@ -752,6 +773,11 @@ def add_page(
on_load if isinstance(on_load, list) else [on_load]
)

self._sitemap_properties[route] = {
"priority": sitemap_priority,
"changefreq": sitemap_changefreq,
}

self._unevaluated_pages[route] = UnevaluatedPage(
component=component,
route=route,
@@ -787,6 +813,10 @@ def _compile_page(self, route: str, save_page: bool = True):
if save_page:
self._pages[route] = component

def get_sitemap_properties(self) -> Dict[str, Dict]:
"""Get the sitemap properties."""
return self._sitemap_properties

def get_load_events(self, route: str) -> list[IndividualEventType[()]]:
"""Get the load events for a route.

@@ -1068,6 +1098,9 @@ def _compile(self, export: bool = False):

self._pages = {}

# generate sitemaps from sitemap properties
generate_sitemaps(self._sitemap_properties)

def get_compilation_time() -> str:
return str(datetime.now().time()).split(".")[0]

1 change: 1 addition & 0 deletions reflex/constants/event.py
Original file line number Diff line number Diff line change
@@ -13,6 +13,7 @@ class Endpoint(Enum):
AUTH_CODESPACE = "auth-codespace"
HEALTH = "_health"
ALL_ROUTES = "_all_routes"
SITEMAP = "sitemap.xml"

def __str__(self) -> str:
"""Get the string representation of the endpoint.
4 changes: 4 additions & 0 deletions reflex/constants/route.py
Original file line number Diff line number Diff line change
@@ -61,6 +61,10 @@ class DefaultPage(SimpleNamespace):
IMAGE = "favicon.ico"
# The default meta list to show for Reflex apps.
META_LIST = []
# The default changefrequency for sitemap generation.
SITEMAP_CHANGEFREQ = "weekly"
# The default priority for sitemap generation.
SITEMAP_PRIORITY = 10.0


# 404 variables
146 changes: 146 additions & 0 deletions reflex/sitemap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
"""This module contains functions to generate and manage the sitemap.xml file."""

from pathlib import Path
from typing import Dict, List
from xml.dom import minidom
from xml.etree.ElementTree import Element, SubElement, tostring

from reflex import constants
from reflex.config import get_config
from reflex.utils import prerequisites

# _static folder in the .web directory containing the sitemap.xml file.
_sitemap_folder_path: Path = (
Path.cwd() / prerequisites.get_web_dir() / constants.Dirs.STATIC
)

# sitemap file path
_sitemap_file_path: Path = _sitemap_folder_path / "sitemap.xml"


def check_sitemap_file_exists() -> bool:
"""Check if the sitemap file exists.

Returns:
bool: True if the sitemap file exists in the .web/_static folder.
"""
return _sitemap_folder_path.exists() & _sitemap_file_path.exists()


def read_sitemap_file() -> str:
"""Read the sitemap file.

Returns:
str: The contents of the sitemap file.
"""
with _sitemap_file_path.open("r") as f:
return f.read()


def generate_xml(links: List[Dict[str, str]]) -> str:
"""Generate an XML sitemap from a list of links.

Args:
links (List[Dict[str, Any]]): A list of dictionaries where each dictionary contains
'loc' (URL of the page), 'changefreq' (frequency of changes), and 'priority' (priority of the page).

Returns:
str: A pretty-printed XML string representing the sitemap.
"""
urlset = Element("urlset", xmlns="https://www.sitemaps.org/schemas/sitemap/0.9")
for link in links:
url = SubElement(urlset, "url")
loc = SubElement(url, "loc")
loc.text = link["loc"]
changefreq = SubElement(url, "changefreq")
changefreq.text = link["changefreq"]
priority = SubElement(url, "priority")
priority.text = str(link["priority"])
rough_string = tostring(urlset, "utf-8")
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent=" ")


def generate_sitemaps(sitemap_config: Dict[str, Dict[str, str]]) -> None:
"""Generate the sitemap.xml file.

This function generates the sitemap.xml file by crawling through the available pages in the app and generating a list
of links with their respective sitemap properties such as location (URL), change frequency, and priority. Dynamic
routes and the 404 page are excluded from the sitemap.

Args:
sitemap_config: A dictionary containing the sitemap properties for each route.
"""
links = generate_links_for_sitemap(sitemap_config)
generate_static_sitemap(links)


def generate_links_for_sitemap(
sitemap_config: Dict[str, Dict[str, str]],
) -> List[dict[str, str]]:
"""Generate a list of links for which sitemaps are generated.

This function loops through sitemap_config and generates a list of links with their respective sitemap properties
such as location (URL), change frequency, and priority. Dynamic routes and the 404 page are excluded from the
sitemap.

Args:
sitemap_config: A dictionary containing the sitemap properties for each route.

Returns:
List: A list of dictionaries where each dictionary contains the 'loc' (URL of the page), 'priority' and
'changefreq' of each route.
"""
links = []

# find link of pages that are not dynamically created.
for route in sitemap_config:
# Ignore dynamic routes and 404
if ("[" in route and "]" in route) or route == "404":
continue

sitemap_changefreq = sitemap_config[route]["changefreq"]
sitemap_priority = sitemap_config[route]["priority"]

# Handle the index route
if route == "index":
route = "/"

if not route.startswith("/"):
route = f"/{route}"

if (
sitemap_priority == constants.DefaultPage.SITEMAP_PRIORITY
): # indicates that user didn't set priority
depth = route.count("/")
sitemap_priority = max(0.5, 1.0 - (depth * 0.1))

deploy_url = get_config().deploy_url # pick domain url from the config file.

links.append(
{
"loc": f"{deploy_url}{route}",
"changefreq": sitemap_changefreq,
"priority": sitemap_priority,
}
)
return links


def generate_static_sitemap(links: List[Dict[str, str]]) -> None:
"""Generates the sitemaps for the pages stored in _pages. Store it in sitemap.xml.

This method is called from two methods:
1. Every time the web app is deployed onto the server.
2. When the user (or crawler) requests for the sitemap.xml file.

Args:
links: The list of urls for which the sitemap is to be generated.
"""
sitemap = generate_xml(links)
Path(_sitemap_folder_path).mkdir(parents=True, exist_ok=True)

# this method is only called when old sitemap.xml is not retrieved. So we can safely replace an already existing xml
# file.
with _sitemap_file_path.open("w") as f:
f.write(sitemap)
7 changes: 0 additions & 7 deletions reflex/utils/build.py
Original file line number Diff line number Diff line change
@@ -203,13 +203,6 @@ def build(
"Collecting build traces",
]

# Generate a sitemap if a deploy URL is provided.
if deploy_url is not None:
generate_sitemap_config(deploy_url, export=for_export)
command = "export-sitemap"

checkpoints.extend(["Loading next-sitemap", "Generation completed"])

# Start the subprocess with the progress bar.
process = processes.new_process(
[*prerequisites.get_js_package_executor(raise_on_none=True)[0], "run", command],
17 changes: 17 additions & 0 deletions tests/units/test_app.py
Original file line number Diff line number Diff line change
@@ -351,6 +351,23 @@ def test_add_duplicate_page_route_error(app, first_page, second_page, route):
app.add_page(second_page, route="/" + route.strip("/") if route else None)


def test_add_page_with_sitemap_properties(app):
"""Test if the sitemap properties of the app instance is set properly or not."""
# check with given values.
app.add_page(
page1, route="/page1", sitemap_priority=0.9, sitemap_changefreq="daily"
)
assert app._sitemap_properties["page1"] == {"priority": 0.9, "changefreq": "daily"}

# check default values added.
app.add_page(page2, route="/page2")
print(app._sitemap_properties)
assert app._sitemap_properties["page2"] == {
"priority": 10.0,
"changefreq": "weekly",
}


def test_initialize_with_admin_dashboard(test_model):
"""Test setting the admin dashboard of an app.

150 changes: 150 additions & 0 deletions tests/units/test_sitemap.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
import unittest.mock
from pathlib import Path

import pytest

import reflex as rx
from reflex import Component, constants
from reflex.app import App
from reflex.sitemap import (
generate_links_for_sitemap,
generate_static_sitemap,
generate_xml,
)
from reflex.utils import prerequisites

sitemap_folder_path: Path = (
Path.cwd() / prerequisites.get_web_dir() / constants.Dirs.STATIC
)

# sitemap file path
sitemap_file_path: Path = sitemap_folder_path / "sitemap.xml"


@pytest.fixture
def app_instance():
"""Fixture to create an instance of the app."""
app = App()
return app


class Page(Component):
"""A simple Page component."""

def __init__(self, text, **kwargs):
"""Initialize the Page component."""
super().__init__(**kwargs)
self.text = text

def render(self):
"""Render the Page component."""
return rx.box(self.text)


@pytest.fixture
def index_page() -> Page:
"""Fixture that returns an IndexPage instance.
Returns:
An instance of IndexPage.
"""
return Page(text="Index")


@pytest.fixture
def about_page() -> Page:
"""Fixture that returns an AboutPage instance.
Returns:
An instance of AboutPage.
"""
return Page(text="About")


mock_xml = """<?xml version="1.0" ?>
<urlset xmlns="https://www.sitemaps.org/schemas/sitemap/0.9">
<url>
<loc>http://localhost:3000/</loc>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
<url>
<loc>http://localhost:3000/about</loc>
<changefreq>weekly</changefreq>
<priority>0.9</priority>
</url>
</urlset>
"""

mock_links = [
{"loc": "http://localhost:3000/", "changefreq": "weekly", "priority": 0.9},
{"loc": "http://localhost:3000/about", "changefreq": "weekly", "priority": 0.9},
]


def test_generate_xml():
"""Test the generate_xml function."""
result = generate_xml(mock_links)
assert result.strip() == mock_xml.strip()


def test_generate_static_sitemaps(app_instance, index_page, about_page):
"""Test if the generated sitemap file is currently stored in static website or not."""
pages = {"index": index_page, "about": about_page}
# remove the sitemap.xml file if it exists.
sitemap_file_path.unlink(missing_ok=True)
assert (
not sitemap_file_path.exists()
) # check if the sitemap.xml file does not exist.

with unittest.mock.patch.object(app_instance, "_pages", pages):
generate_static_sitemap(mock_links)

assert sitemap_file_path.exists() # check if the sitemap.xml file exists.


def test_generate_links_for_sitemap():
"""Test if the links are generated correctly for the sitemap from the sitemap config file when no deploy url is
given.
"""
sitemap_properties = {
"index": {"priority": 0.9, "changefreq": "weekly"},
"about": {"priority": 0.9, "changefreq": "weekly"},
}

links = generate_links_for_sitemap(sitemap_properties)

# Assert that the links are generated correctly
assert links == [
{"loc": "http://localhost:3000/", "changefreq": "weekly", "priority": 0.9},
{
"loc": "http://localhost:3000/about",
"changefreq": "weekly",
"priority": 0.9,
},
]


def test_generate_links_for_sitemap_deploy_url():
"""Test if the links are generated correctly for the sitemap from the sitemap config file when a deploy url is
given.
"""
sitemap_properties = {
"index": {"priority": 0.9, "changefreq": "weekly"},
"about": {"priority": 0.9, "changefreq": "weekly"},
}

with unittest.mock.patch("reflex.sitemap.get_config") as mock_get_config:
mock_get_config().deploy_url = "http://www.google.com"

links = generate_links_for_sitemap(sitemap_properties)

# Assert that the links are generated correctly
assert links == [
{"loc": "http://www.google.com/", "changefreq": "weekly", "priority": 0.9},
{
"loc": "http://www.google.com/about",
"changefreq": "weekly",
"priority": 0.9,
},
]