apache · inamdarzaid · Jun 4, 2025 · Aug 22, 2025 · Sep 13, 2025 · eschutho
diff --git a/superset/commands/report/execute.py b/superset/commands/report/execute.py
@@ -506,9 +506,13 @@ def _get_notification_content(self) -> NotificationContent:  # noqa: C901
                 if not screenshot_data:
                     error_text = "Unexpected missing screenshot"
             elif self._report_schedule.report_format == ReportDataFormat.PDF:
-                pdf_data = self._get_pdf()
-                if not pdf_data:
-                    error_text = "Unexpected missing pdf"
+                # For dashboards, we generate a PDF from a screenshot.
+                # For charts, we will generate a tabular PDF in the notification
+                # from embedded_data.
+                if self._report_schedule.dashboard:
+                    pdf_data = self._get_pdf()
+                    if not pdf_data:
+                        error_text = "Unexpected missing pdf"
             elif (
                 self._report_schedule.chart
                 and self._report_schedule.report_format == ReportDataFormat.CSV
@@ -522,11 +526,12 @@ def _get_notification_content(self) -> NotificationContent:  # noqa: C901
                     text=error_text,
                     header_data=header_data,
                     url=url,
+                    report_format=self._report_schedule.report_format,
                 )
 
-        if (
-            self._report_schedule.chart
-            and self._report_schedule.report_format == ReportDataFormat.TEXT
+        if self._report_schedule.chart and self._report_schedule.report_format in (
+            ReportDataFormat.TEXT,
+            ReportDataFormat.PDF,
         ):
             embedded_data = self._get_embedded_data()
 
@@ -553,6 +558,7 @@ def _get_notification_content(self) -> NotificationContent:  # noqa: C901
             csv=csv_data,
             embedded_data=embedded_data,
             header_data=header_data,
+            report_format=self._report_schedule.report_format,
         )
 
     def _send(

diff --git a/superset/config.py b/superset/config.py
@@ -1501,6 +1501,28 @@ def EMAIL_HEADER_MUTATOR(  # pylint: disable=invalid-name,unused-argument  # noq
 # The text for call-to-action link in Alerts & Reports emails
 EMAIL_REPORTS_CTA = "Explore in Superset"
 
+# ---------------------------------------------------
+# PDF Export Configuration for Reports
+# ---------------------------------------------------
+# Enable or disable headers and footers in PDF exports
+PDF_EXPORT_HEADERS_FOOTERS_ENABLED = True
+
+# Template for the header of the PDF.
+# Available placeholders: {report_name}, {page_number}, {total_pages}
+# Example: "Report: {report_name} - Page {page_number} of {total_pages}"
+PDF_EXPORT_HEADER_TEMPLATE = "Report: {report_name} - Page {page_number} of {total_pages}"
+
+# Template for the footer of the PDF.
+# Available placeholders: {generation_date}, {report_name}
+# Example: "Generated: {generation_date}"
+PDF_EXPORT_FOOTER_TEMPLATE = "Generated: {generation_date}"
+
+# Default page size for PDF exports (e.g., "A4", "Letter", "A3")
+PDF_EXPORT_PAGE_SIZE = "A4"
+
+# Default page orientation for PDF exports (e.g., "portrait", "landscape")
+PDF_EXPORT_ORIENTATION = "portrait"
+
 # Slack API token for the superset reports, either string or callable
 SLACK_API_TOKEN: Callable[[], str] | str | None = None
 SLACK_PROXY = None

diff --git a/superset/reports/notifications/base.py b/superset/reports/notifications/base.py
@@ -34,6 +34,7 @@ class NotificationContent:
     description: Optional[str] = ""
     url: Optional[str] = None  # url to chart/dashboard for this screenshot
     embedded_data: Optional[pd.DataFrame] = None
+    report_format: Optional[str] = None
 
 
 class BaseNotification:  # pylint: disable=too-few-public-methods

diff --git a/superset/reports/notifications/email.py b/superset/reports/notifications/email.py
@@ -17,6 +17,7 @@
 import logging
 import textwrap
 from dataclasses import dataclass
+from weasyprint import HTML, CSS
 from datetime import datetime
 from email.utils import make_msgid, parseaddr
 from typing import Any, Optional
@@ -132,19 +133,124 @@ def _get_content(self) -> EmailContent:
             attributes=ALLOWED_ATTRIBUTES,
         )
 
-        # Strip malicious HTML from embedded data, allowing only table elements
-        if self._content.embedded_data is not None:
+        pdf_data = None
+        html_table = ""
+
+        # Check if the report format is PDF and embedded data is available
+        # Assuming self._content.report_format exists and holds the report format string
+        if hasattr(self._content, 'report_format') and \
+           self._content.report_format == "PDF" and \
+           self._content.embedded_data is not None:
             df = self._content.embedded_data
-            # pylint: disable=no-member
-            html_table = nh3.clean(
-                df.to_html(na_rep="", index=True, escape=True),
-                # pandas will escape the HTML in cells already, so passing
-                # more allowed tags here will not work
-                tags=TABLE_TAGS,
-                attributes=ALLOWED_TABLE_ATTRIBUTES,
-            )
-        else:
+            report_name_val = self._name # Renamed to avoid clash with CSS variable name
+            generation_date_val = self.now.strftime('%Y-%m-%d %H:%M:%S UTC')
+
+            # Retrieve PDF export configurations
+            pdf_headers_footers_enabled = app.config.get("PDF_EXPORT_HEADERS_FOOTERS_ENABLED", True)
+            pdf_header_template = app.config.get("PDF_EXPORT_HEADER_TEMPLATE", "Report: {report_name} - Page {page_number} of {total_pages}")
+            pdf_footer_template = app.config.get("PDF_EXPORT_FOOTER_TEMPLATE", "Generated: {generation_date}")
+            pdf_page_size = app.config.get("PDF_EXPORT_PAGE_SIZE", "A4")
+            pdf_orientation = app.config.get("PDF_EXPORT_ORIENTATION", "portrait")
+
+            # Prepare header and footer content based on templates and config
+            header_content_str = ""
+            footer_content_str = ""
+
+            if pdf_headers_footers_enabled:
+                # Sanitize report_name_val for CSS content (simple escaping for quotes)
+                css_report_name = report_name_val.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\A")
+                css_generation_date = generation_date_val.replace("\\", "\\\\").replace("\"", "\\\"").replace("\n", "\\A")
+
+                # For header: replace {report_name}, keep {page_number} and {total_pages} for CSS counters
+                header_content_str = pdf_header_template.replace("{report_name}", css_report_name)
+                header_content_str = header_content_str.replace("{page_number}", "counter(page)")
+                header_content_str = header_content_str.replace("{total_pages}", "counter(pages)")
+
+                # For footer: replace {generation_date} and {report_name}
+                footer_content_str = pdf_footer_template.replace("{generation_date}", css_generation_date)
+                footer_content_str = footer_content_str.replace("{report_name}", css_report_name)
+
+
+            pdf_html_content = f"""
+            <html>
+            <head>
+                <meta charset="UTF-8">
+                <style>
+                    :root {{
+                        /* Keeping these for potential use in body styles if needed */
+                        --report-name-var: "{report_name_val.replace('"', '&quot;').replace("'", "&apos;")}";
+                        --generation-date-var: "{generation_date_val}";
+                    }}
+                </style>
+            </head>
+            <body>
+                <div class="report-description">{description}</div>
+                <br>
+                {df.to_html(na_rep="", index=True, escape=True)}
+            </body>
+            </html>
+            """
+
+            # Construct @page CSS string
+            page_css_parts = [f"@page {{ size: {pdf_page_size} {pdf_orientation}; margin: 2.5cm 1.5cm 2cm 1.5cm; }}"]
+            if pdf_headers_footers_enabled:
+                # Assuming header template is for @top-center and footer for @bottom-center
+                # A more complex mapping from template to specific corners would require more logic
+                page_css_parts.append(f"@page @top-center {{ content: \"{header_content_str}\"; font-size: 9pt; color: #333; }}")
+                page_css_parts.append(f"@page @bottom-center {{ content: \"{footer_content_str}\"; font-size: 9pt; color: #333; }}")
+            else:
+                # Ensure no headers/footers if disabled
+                page_css_parts.append("@page @top-left { content: \"\"; }")
+                page_css_parts.append("@page @top-center { content: \"\"; }")
+                page_css_parts.append("@page @top-right { content: \"\"; }")
+                page_css_parts.append("@page @bottom-left { content: \"\"; }")
+                page_css_parts.append("@page @bottom-center { content: \"\"; }")
+                page_css_parts.append("@page @bottom-right { content: \"\"; }")
+
+
+            pdf_css_string = f'''
+                {" ".join(page_css_parts)}
+
+                body {{ font-family: sans-serif; }}
+                table {{
+                    border-collapse: collapse;
+                    width: 100%;
+                    page-break-inside: auto;
+                }}
+                tr {{
+                    page-break-inside: avoid;
+                    page-break-after: auto;
+                }}
+                th, td {{
+                    border: 1px solid black;
+                    padding: 4px;
+                    text-align: left;
+                    font-size: 8pt;
+                }}
+                th {{ background-color: #f0f0f0; }}
+                .report-description {{ margin-bottom: 1em; font-size: 10pt; }}
+            '''
+            pdf_css = CSS(string=pdf_css_string)
+            pdf_bytes = HTML(string=pdf_html_content).write_pdf(stylesheets=[pdf_css])
+            pdf_data = {__("%(name)s.pdf", name=report_name_val): pdf_bytes}
+            # Set html_table to empty as the table is in the PDF
             html_table = ""
+        else:
+            # Existing logic for HTML email
+            if self._content.embedded_data is not None:
+                df = self._content.embedded_data
+                # pylint: disable=no-member
+                html_table = nh3.clean(
+                    df.to_html(na_rep="", index=True, escape=True),
+                    # pandas will escape the HTML in cells already, so passing
+                    # more allowed tags here will not work
+                    tags=TABLE_TAGS,
+                    attributes=ALLOWED_TABLE_ATTRIBUTES,
+                )
+            # Fallback for existing PDF data if not generated by WeasyPrint
+            if self._content.pdf:
+                pdf_data = {__("%(name)s.pdf", name=self._name): self._content.pdf}
+
 
         img_tags = []
         for msgid in images.keys():
@@ -187,14 +293,14 @@ def _get_content(self) -> EmailContent:
         if self._content.csv:
             csv_data = {__("%(name)s.csv", name=self._name): self._content.csv}
 
-        pdf_data = None
-        if self._content.pdf:
-            pdf_data = {__("%(name)s.pdf", name=self._name): self._content.pdf}
+        # pdf_data is already defined above
+        # if self._content.pdf and not pdf_data: # if pdf_data was not set by WeasyPrint
+        #     pdf_data = {__("%(name)s.pdf", name=self._name): self._content.pdf}
 
         return EmailContent(
             body=body,
             images=images,
-            pdf=pdf_data,
+            pdf=pdf_data, # Use the pdf_data populated by WeasyPrint or existing logic
             data=csv_data,
             header_data=self._content.header_data,
         )