Skip to content

Commit

Permalink
added option for disabling HTML Sanitize (#2831)
Browse files Browse the repository at this point in the history
# Description of Changes

Please provide a summary of the changes, including:

- added disableSanitize: false # set to 'true' to disable Sanitize HTML,
set to false to enable Sanitize HTML; (can lead to injections in HTML)
- Some users uses this on local boxes, and uses Google Fonts, and base64
image src.


### General

- [x] I have read the [Contribution
Guidelines](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/CONTRIBUTING.md)
- [x] I have read the [Stirling-PDF Developer
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md)
(if applicable)
- [x] I have read the [How to add new languages to
Stirling-PDF](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md)
(if applicable)
- [x] I have performed a self-review of my own code
- [ ] My changes generate no new warnings

### Documentation

- [x] I have updated relevant docs on [Stirling-PDF's doc
repo](https://github.com/Stirling-Tools/Stirling-Tools.github.io/blob/main/docs/)
(if functionality has heavily changed)
- [ ] I have read the section [Add New Translation
Tags](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/HowToAddNewLanguage.md#add-new-translation-tags)
(for new translation tags only)

### Testing (if applicable)

- [ ] I have tested my changes locally. Refer to the [Testing
Guide](https://github.com/Stirling-Tools/Stirling-PDF/blob/main/DeveloperGuide.md#6-testing)
for more details.

---------

Co-authored-by: blaz.carli <[email protected]>
Co-authored-by: Anthony Stirling <[email protected]>
  • Loading branch information
3 people authored Jan 31, 2025
1 parent c5cffdc commit 6ae2fdd
Show file tree
Hide file tree
Showing 6 changed files with 35 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import io.swagger.v3.oas.annotations.tags.Tag;

import stirling.software.SPDF.model.api.converters.HTMLToPdfRequest;
import stirling.software.SPDF.model.ApplicationProperties;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.WebResponseUtils;
Expand All @@ -27,12 +28,16 @@ public class ConvertHtmlToPDF {

private final CustomPDDocumentFactory pdfDocumentFactory;

private final ApplicationProperties applicationProperties;

@Autowired
public ConvertHtmlToPDF(
CustomPDDocumentFactory pdfDocumentFactory,
@Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) {
@Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled,
ApplicationProperties applicationProperties) {
this.pdfDocumentFactory = pdfDocumentFactory;
this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled;
this.applicationProperties = applicationProperties;
}

@PostMapping(consumes = "multipart/form-data", value = "/html/pdf")
Expand All @@ -54,12 +59,16 @@ public ResponseEntity<byte[]> HtmlToPdf(@ModelAttribute HTMLToPdfRequest request
|| (!originalFilename.endsWith(".html") && !originalFilename.endsWith(".zip"))) {
throw new IllegalArgumentException("File must be either .html or .zip format.");
}

boolean disableSanitize = Boolean.TRUE.equals(applicationProperties.getSystem().getDisableSanitize());

byte[] pdfBytes =
FileToPdf.convertHtmlToPdf(
request,
fileInput.getBytes(),
originalFilename,
bookAndHtmlFormatsInstalled);
bookAndHtmlFormatsInstalled,
disableSanitize);

pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import io.swagger.v3.oas.annotations.tags.Tag;

import stirling.software.SPDF.model.api.GeneralFile;
import stirling.software.SPDF.model.ApplicationProperties;
import stirling.software.SPDF.service.CustomPDDocumentFactory;
import stirling.software.SPDF.utils.FileToPdf;
import stirling.software.SPDF.utils.WebResponseUtils;
Expand All @@ -37,12 +38,16 @@ public class ConvertMarkdownToPdf {

private final CustomPDDocumentFactory pdfDocumentFactory;

private final ApplicationProperties applicationProperties;

@Autowired
public ConvertMarkdownToPdf(
CustomPDDocumentFactory pdfDocumentFactory,
@Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled) {
@Qualifier("bookAndHtmlFormatsInstalled") boolean bookAndHtmlFormatsInstalled,
ApplicationProperties applicationProperties) {
this.pdfDocumentFactory = pdfDocumentFactory;
this.bookAndHtmlFormatsInstalled = bookAndHtmlFormatsInstalled;
this.applicationProperties = applicationProperties;
}

@PostMapping(consumes = "multipart/form-data", value = "/markdown/pdf")
Expand Down Expand Up @@ -76,12 +81,15 @@ public ResponseEntity<byte[]> markdownToPdf(@ModelAttribute GeneralFile request)

String htmlContent = renderer.render(document);

boolean disableSanitize = Boolean.TRUE.equals(applicationProperties.getSystem().getDisableSanitize());

byte[] pdfBytes =
FileToPdf.convertHtmlToPdf(
null,
htmlContent.getBytes(),
"converted.html",
bookAndHtmlFormatsInstalled);
bookAndHtmlFormatsInstalled,
disableSanitize);
pdfBytes = pdfDocumentFactory.createNewBytesBasedOnOldDocument(pdfBytes);
String outputFilename =
originalFilename.replaceFirst("[.][^.]+$", "")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ public static class System {
private Boolean enableAlphaFunctionality;
private String enableAnalytics;
private Datasource datasource;
private Boolean disableSanitize;
}

@Data
Expand Down
16 changes: 8 additions & 8 deletions src/main/java/stirling/software/SPDF/utils/FileToPdf.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ public static byte[] convertHtmlToPdf(
HTMLToPdfRequest request,
byte[] fileBytes,
String fileName,
boolean htmlFormatsInstalled)
boolean htmlFormatsInstalled,
boolean disableSanitize)
throws IOException, InterruptedException {

Path tempOutputFile = Files.createTempFile("output_", ".pdf");
Expand All @@ -35,13 +36,12 @@ public static byte[] convertHtmlToPdf(
try {
if (fileName.endsWith(".html")) {
tempInputFile = Files.createTempFile("input_", ".html");
String sanitizedHtml =
sanitizeHtmlContent(new String(fileBytes, StandardCharsets.UTF_8));
String sanitizedHtml = sanitizeHtmlContent(new String(fileBytes, StandardCharsets.UTF_8), disableSanitize);
Files.write(tempInputFile, sanitizedHtml.getBytes(StandardCharsets.UTF_8));
} else if (fileName.endsWith(".zip")) {
tempInputFile = Files.createTempFile("input_", ".zip");
Files.write(tempInputFile, fileBytes);
sanitizeHtmlFilesInZip(tempInputFile);
sanitizeHtmlFilesInZip(tempInputFile, disableSanitize);
} else {
throw new IllegalArgumentException("Unsupported file format: " + fileName);
}
Expand Down Expand Up @@ -89,11 +89,11 @@ public static byte[] convertHtmlToPdf(
return pdfBytes;
}

private static String sanitizeHtmlContent(String htmlContent) {
return CustomHtmlSanitizer.sanitize(htmlContent);
private static String sanitizeHtmlContent(String htmlContent, boolean disableSanitize) {
return (!disableSanitize) ? CustomHtmlSanitizer.sanitize(htmlContent) : htmlContent;
}

private static void sanitizeHtmlFilesInZip(Path zipFilePath) throws IOException {
private static void sanitizeHtmlFilesInZip(Path zipFilePath, boolean disableSanitize) throws IOException {
Path tempUnzippedDir = Files.createTempDirectory("unzipped_");
try (ZipInputStream zipIn =
ZipSecurity.createHardenedInputStream(
Expand All @@ -106,7 +106,7 @@ private static void sanitizeHtmlFilesInZip(Path zipFilePath) throws IOException
if (entry.getName().toLowerCase().endsWith(".html")
|| entry.getName().toLowerCase().endsWith(".htm")) {
String content = new String(zipIn.readAllBytes(), StandardCharsets.UTF_8);
String sanitizedContent = sanitizeHtmlContent(content);
String sanitizedContent = sanitizeHtmlContent(content, disableSanitize);
Files.write(filePath, sanitizedContent.getBytes(StandardCharsets.UTF_8));
} else {
Files.copy(zipIn, filePath);
Expand Down
5 changes: 3 additions & 2 deletions src/main/resources/settings.yml.template
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ security:
csrfDisabled: false # set to 'true' to disable CSRF protection (not recommended for production)
loginAttemptCount: 5 # lock user account after 5 tries; when using e.g. Fail2Ban you can deactivate the function with -1
loginResetTimeMinutes: 120 # lock account for 2 hours after x attempts
loginMethod: all # Accepts values like 'all' and 'normal'(only Login with Username/Password), 'oauth2'(only Login with OAuth2) or 'saml2'(only Login with SAML2)
loginMethod: all # Accepts values like 'all' and 'normal'(only Login with Username/Password), 'oauth2'(only Login with OAuth2) or 'saml2'(only Login with SAML2)
initialLogin:
username: '' # initial username for the first login
password: '' # initial password for the first login
Expand Down Expand Up @@ -86,6 +86,7 @@ system:
customHTMLFiles: false # enable to have files placed in /customFiles/templates override the existing template HTML files
tessdataDir: /usr/share/tessdata # path to the directory containing the Tessdata files. This setting is relevant for Windows systems. For Windows users, this path should be adjusted to point to the appropriate directory where the Tessdata files are stored.
enableAnalytics: 'true' # set to 'true' to enable analytics, set to 'false' to disable analytics; for enterprise users, this is set to true
disableSanitize: false # set to true to disable Sanitize HTML; (can lead to injections in HTML)
datasource:
enableCustomDatabase: false # Enterprise users ONLY, set this property to 'true' if you would like to use your own custom database configuration
customDatabaseUrl: '' # eg jdbc:postgresql://localhost:5432/postgres, set the url for your own custom database connection. If provided, the type, hostName, port and name are not necessary and will not be used
Expand Down Expand Up @@ -113,7 +114,7 @@ AutomaticallyGenerated:
key: example
UUID: example
appVersion: 0.35.0

processExecutor:
sessionLimit: # Process executor instances limits
libreOfficeSessionLimit: 1
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,10 +15,11 @@ public void testConvertHtmlToPdf() {
byte[] fileBytes = new byte[0]; // Sample file bytes
String fileName = "test.html"; // Sample file name
boolean htmlFormatsInstalled = true; // Sample boolean value
boolean disableSanitize = false; // Sample boolean value

// Check if the method throws IOException
assertThrows(IOException.class, () -> {
FileToPdf.convertHtmlToPdf(request, fileBytes, fileName, htmlFormatsInstalled);
FileToPdf.convertHtmlToPdf(request, fileBytes, fileName, htmlFormatsInstalled, disableSanitize);
});
}

Expand Down

0 comments on commit 6ae2fdd

Please sign in to comment.