Skip to content

Commit

Permalink
Merge pull request #9 from spassarop/develop
Browse files Browse the repository at this point in the history
Changes for v1.0.4
  • Loading branch information
spassarop authored Jun 1, 2022
2 parents 3185480 + 3a1eb0b commit 550e9b7
Show file tree
Hide file tree
Showing 16 changed files with 178 additions and 90 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@ http://www.w3.org/TR/html401/struct/global.html
xsi:noNamespaceSchemaLocation="antisamy.xsd">

<directives>
<directive name="omitXmlDeclaration" value="true"/>
<directive name="omitDoctypeDeclaration" value="true"/>
<directive name="maxInputSize" value="200000"/>
<directive name="useXHTML" value="true"/>
<directive name="formatOutput" value="true"/>

<!--
Expand Down
2 changes: 0 additions & 2 deletions OWASP.AntiSamy/AntiSamyPolicyExamples/antisamy-ebay.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@ http://www.w3.org/TR/html401/struct/global.html
xsi:noNamespaceSchemaLocation="antisamy.xsd">

<directives>
<directive name="omitXmlDeclaration" value="true"/>
<directive name="omitDoctypeDeclaration" value="true"/>
<directive name="maxInputSize" value="20000"/>
<directive name="useXHTML" value="true"/>
<directive name="formatOutput" value="true"/>

<!--
Expand Down
2 changes: 0 additions & 2 deletions OWASP.AntiSamy/AntiSamyPolicyExamples/antisamy-myspace.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,8 @@ http://www.w3.org/TR/html401/struct/global.html
xsi:noNamespaceSchemaLocation="antisamy.xsd">

<directives>
<directive name="omitXmlDeclaration" value="true"/>
<directive name="omitDoctypeDeclaration" value="true"/>
<directive name="maxInputSize" value="15000"/>
<directive name="useXHTML" value="true"/>
<directive name="formatOutput" value="true"/>

<!--
Expand Down
2 changes: 0 additions & 2 deletions OWASP.AntiSamy/AntiSamyPolicyExamples/antisamy-slashdot.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,8 @@ Slashdot allowed tags taken from "Reply" page:
xsi:noNamespaceSchemaLocation="antisamy.xsd">

<directives>
<directive name="omitXmlDeclaration" value="true"/>
<directive name="omitDoctypeDeclaration" value="true"/>
<directive name="maxInputSize" value="5000"/>
<directive name="useXHTML" value="true"/>
<directive name="formatOutput" value="true"/>

<directive name="embedStyleSheets" value="false"/>
Expand Down
2 changes: 0 additions & 2 deletions OWASP.AntiSamy/AntiSamyPolicyExamples/antisamy-tinymce.xml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,9 @@
xsi:noNamespaceSchemaLocation="antisamy.xsd">

<directives>
<directive name="omitXmlDeclaration" value="true" />
<directive name="omitDoctypeDeclaration" value="false" />
<directive name="maxInputSize" value="100000" />
<directive name="embedStyleSheets" value="false" />
<directive name="useXHTML" value="true" />
<directive name="formatOutput" value="true" />
</directives>

Expand Down
1 change: 0 additions & 1 deletion OWASP.AntiSamy/AntiSamyPolicyExamples/antisamy.xml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@ http://www.w3.org/TR/html401/struct/global.html
<directive name="omitXmlDeclaration" value="true"/>
<directive name="omitDoctypeDeclaration" value="true"/>
<directive name="maxInputSize" value="200000"/>
<directive name="useXHTML" value="true"/>
<directive name="formatOutput" value="true"/>
<directive name="nofollowAnchors" value="true" />
<directive name="validateParamAsEmbed" value="true" />
Expand Down
10 changes: 7 additions & 3 deletions OWASP.AntiSamy/Css/CssScanner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,10 @@ public CssScanner(Policy policy)
IBrowsingContext browsingContext = BrowsingContext.New(Configuration.Default
.WithCss()
.With(new DefaultHttpRequester(userAgent: null, setup: SetupHttpRequest))
.WithDefaultLoader(new LoaderOptions { IsResourceLoadingEnabled = true }));
.WithDefaultLoader(new LoaderOptions {
IsResourceLoadingEnabled = policy.EmbedsStyleSheets,
IsNavigationDisabled = true
}));
parser = new CssParser(cssParserOptions, browsingContext);
}

Expand Down Expand Up @@ -188,9 +191,9 @@ private CleanResults DoScan(string taintedCss, bool isInlineCss, string tagName)
}
}

if (isCData && !policy.UsesXhtml)
if (isCData)
{
cleanStylesheet = $"<![CDATA[[{cleanStylesheet}]]>";
cleanStylesheet = $"<![CDATA[{cleanStylesheet}]]>";
}

return new CleanResults(startOfScan, new DateTime(), cleanStylesheet, ErrorMessages);
Expand Down Expand Up @@ -461,6 +464,7 @@ private string GetPropertyErrorMessage(string propertyName, string propertyValue
private void SetupHttpRequest(HttpWebRequest httpWebRequest)
{
httpWebRequest.Timeout = policy.ConnectionTimeout;
httpWebRequest.AllowAutoRedirect = false;
}

private void AddError(string errorKey, params object[] arguments)
Expand Down
7 changes: 3 additions & 4 deletions OWASP.AntiSamy/Html/InternalPolicy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
using System.Collections.Generic;
using OWASP.AntiSamy.Html.Scan;
using Tag = OWASP.AntiSamy.Html.Model.Tag;
using Property = OWASP.AntiSamy.Html.Model.Property;

namespace OWASP.AntiSamy.Html
{
Expand All @@ -40,8 +41,8 @@ public InternalPolicy(ParseContext parseContext) : base(parseContext)
SetProperties();
}

public InternalPolicy(Policy old, Dictionary<string, string> directives, Dictionary<string, Tag> tagRules)
: base(old, directives, tagRules)
public InternalPolicy(Policy old, Dictionary<string, string> directives, Dictionary<string, Tag> tagRules, Dictionary<string, Property> cssRules)
: base(old, directives, tagRules, cssRules)
{
SetProperties();
}
Expand All @@ -54,10 +55,8 @@ private void SetProperties()
ValidatesParamAsEmbed = IsTrue(Constants.VALIDATE_PARAM_AS_EMBED);
FormatsOutput = IsTrue(Constants.FORMAT_OUTPUT);
PreservesSpace = IsTrue(Constants.PRESERVE_SPACE);
OmitsXmlDeclaration = IsTrue(Constants.OMIT_XML_DECLARATION);
OmitsDoctypeDeclaration = IsTrue(Constants.OMIT_DOCTYPE_DECLARATION);
EntityEncodesInternationalCharacters = IsTrue(Constants.ENTITY_ENCODE_INERNATIONAL_CHARS);
UsesXhtml = IsTrue(Constants.USE_XHTML);
string onUnknownTagActionValue = GetDirectiveByName(Constants.ON_UNKNOWN_TAG_ACTION);
OnUnknownTagAction = string.IsNullOrEmpty(onUnknownTagActionValue) ? string.Empty : onUnknownTagActionValue.ToLowerInvariant();
PreservesComments = IsTrue(Constants.PRESERVE_COMMENTS);
Expand Down
4 changes: 2 additions & 2 deletions OWASP.AntiSamy/Html/Model/Property.cs
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ namespace OWASP.AntiSamy.Html.Model
/// <summary> A model for CSS properties and the "rules" they must follow (either literals
/// or regular expressions) in order to be considered valid.</summary>
// Author: Jason Li
internal class Property
public class Property
{
public List<string> AllowedRegExp { get; set; } = new List<string>();
public List<string> AllowedValues { get; set; } = new List<string>();
Expand All @@ -38,7 +38,7 @@ internal class Property
public string OnInvalid { get; set; }
public string Description { get; set; }

public Property(string name) => this.Name = name;
public Property(string name) => Name = name;

/// <summary> Add the specified value to the allowed list of valid values.</summary>
/// <param name="safeValue">The new valid value to add to the list.</param>
Expand Down
2 changes: 0 additions & 2 deletions OWASP.AntiSamy/Html/ParseContext.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,10 @@ public class ParseContext
internal Dictionary<string, Attribute> globalAttributes = new Dictionary<string, Attribute>();
internal Dictionary<string, Attribute> dynamicAttributes = new Dictionary<string, Attribute>();
internal List<string> allowedEmptyTags = new List<string>();
internal List<string> requireClosingTags = new List<string>();

internal void ResetParametersWhereLastConfigurationWins()
{
allowedEmptyTags.Clear();
requireClosingTags.Clear();
}
}
}
51 changes: 9 additions & 42 deletions OWASP.AntiSamy/Html/Policy.cs
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,12 @@ public class Policy

private readonly Dictionary<string, string> commonRegularExpressions;
private readonly Dictionary<string, Attribute> commonAttributes;
private readonly Dictionary<string, Tag> tagRules;
private readonly Dictionary<string, Property> cssRules;
private readonly Dictionary<string, string> directives;
internal readonly Dictionary<string, Tag> tagRules;
internal readonly Dictionary<string, Property> cssRules;
internal readonly Dictionary<string, string> directives;
private readonly Dictionary<string, Attribute> globalAttributes;
private readonly Dictionary<string, Attribute> dynamicAttributes;
private readonly TagMatcher allowedEmptyTagsMatcher;
private readonly TagMatcher requireClosingTagsMatcher;

/// <summary>Maximum input size for the HTML to read.</summary>
/// <remarks> If this value is not specified by the policy, the <c>DEFAULT_MAX_INPUT_SIZE</c> is used.</remarks>
Expand All @@ -71,15 +70,10 @@ public class Policy
internal protected bool FormatsOutput { get; set; }
/// <summary>Determines if HTML output gets trimmed.</summary>
internal protected bool PreservesSpace { get; set; }
/// <summary>Avoids prepending prepend the <c>"&lt;?xml ...&gt;"</c> initial tag when using XHTML.</summary>
internal protected bool OmitsXmlDeclaration { get; set; }
/// <summary>Avoids prepending prepend the <c>"&lt;!DOCTYPE html ...&gt;"</c> initial tag.</summary>
internal protected bool OmitsDoctypeDeclaration { get; set; }
/// <summary>Determines if HTML output gets encoded regarding special characters, like accents.</summary>
internal protected bool EntityEncodesInternationalCharacters { get; set; }
/// <summary>Determines if parser uses XHTML.</summary>
/// <remarks>Explicitly used for CDATA handling when scanning CSS.</remarks>
internal protected bool UsesXhtml { get; set; }
/// <summary>Determines if comments are removed from the HTML.</summary>
internal protected bool PreservesComments { get; set; }
/// <summary>Determines if style sheets can be embedded/imported to be parsed.</summary>
Expand Down Expand Up @@ -107,24 +101,23 @@ protected Policy(ParseContext parseContext)
globalAttributes = parseContext.globalAttributes;
tagRules = parseContext.tagRules;
allowedEmptyTagsMatcher = new TagMatcher(parseContext.allowedEmptyTags);
requireClosingTagsMatcher = new TagMatcher(parseContext.requireClosingTags);
}

/// <summary>Create policy with full paramterers.</summary>
/// <param name="old">Old policy to copy from.</param>
/// <param name="directives">Directives to override.</param>
/// <param name="tagRules">Tag rules to override.</param>
protected Policy(Policy old, Dictionary<string, string> directives, Dictionary<string, Tag> tagRules)
/// <param name="cssRules">CSS rules to override.</param>
protected Policy(Policy old, Dictionary<string, string> directives, Dictionary<string, Tag> tagRules, Dictionary<string, Property> cssRules)
{
commonAttributes = old.commonAttributes;
commonRegularExpressions = old.commonRegularExpressions;
cssRules = old.cssRules;
this.cssRules = cssRules;
this.directives = directives;
dynamicAttributes = old.dynamicAttributes;
globalAttributes = old.globalAttributes;
this.tagRules = tagRules;
allowedEmptyTagsMatcher = old.allowedEmptyTagsMatcher;
requireClosingTagsMatcher = old.requireClosingTagsMatcher;
}

/// <summary> This retrieves a policy based on a default location ("AntiSamyPolicyExamples/antisamy.xml") or from the embedded XML.</summary>
Expand Down Expand Up @@ -214,7 +207,7 @@ public Policy CloneWithDirective(string name, string value)
newDirectives.Add(name, value);
}

return new InternalPolicy(this, newDirectives, tagRules);
return new InternalPolicy(this, newDirectives, tagRules, cssRules);
}

/// <summary>A simple method for returning one of the &lt;common-regexp&gt; entries by name.</summary>
Expand Down Expand Up @@ -269,24 +262,7 @@ internal Attribute GetDynamicAttributeByName(string name)
return dynamicAttribute;
}

internal Policy MutateTag(Tag tag)
{
var newTagRules = new Dictionary<string, Tag>(tagRules);
string tagNameToLower = tag.Name.ToLowerInvariant();

if (newTagRules.ContainsKey(tagNameToLower))
{
newTagRules[tagNameToLower] = tag;
}
else
{
newTagRules.Add(tagNameToLower, tag);
}

return new InternalPolicy(this, directives, newTagRules);
}

private static ParseContext GetParseContext(XmlDocument document)
internal static ParseContext GetParseContext(XmlDocument document)
{
var parseContext = new ParseContext();

Expand All @@ -300,7 +276,7 @@ private static ParseContext GetParseContext(XmlDocument document)
/// <param name="filename">The name of the file which contains the policy XML.</param>
/// <returns>The loaded <see cref="XmlDocument"/>.</returns>
/// <exception cref="PolicyException"/>
private static XmlDocument GetXmlDocumentFromFile(string filename)
internal static XmlDocument GetXmlDocumentFromFile(string filename)
{
try
{
Expand Down Expand Up @@ -396,7 +372,6 @@ private static void ParsePolicy(XmlDocument document, ParseContext parseContext)
ParseTagRules(document.GetElementsByTagName("tag-rules").Item(0), parseContext);
ParseCssRules(document.GetElementsByTagName("css-rules").Item(0), parseContext);
ParseAllowedEmptyTags(document.GetElementsByTagName("allowed-empty-tags").Item(0), parseContext);
ParseRequireClosingTags(document.GetElementsByTagName("require-closing-tags").Item(0), parseContext);
}
catch (Exception ex)
{
Expand Down Expand Up @@ -678,14 +653,6 @@ private static void ParseAllowedEmptyTags(XmlNode allowedEmptyTagListNode, Parse
ParseTagListWithLiterals(allowedEmptyTagListNode, parseContext.allowedEmptyTags, Constants.DEFAULT_ALLOWED_EMPTY_TAGS);
}

/// <summary> Go through the &lt;require-closing-tags&gt; section of the policy file.</summary>
/// <param name="requireClosingTagListNode">Top level of &lt;require-closing-tags&gt;.</param>
/// <param name="parseContext">The <see cref="ParseContext"/> containing the require closing tags list to fill.</param>
private static void ParseRequireClosingTags(XmlNode requireClosingTagListNode, ParseContext parseContext)
{
ParseTagListWithLiterals(requireClosingTagListNode, parseContext.requireClosingTags, Constants.DEFAULT_REQUIRE_CLOSING_TAGS);
}

private static void ParseTagListWithLiterals(XmlNode nodeList, List<string> tagListToFill, List<string> defaultTagsList)
{
if (nodeList != null)
Expand Down
9 changes: 2 additions & 7 deletions OWASP.AntiSamy/Html/Scan/AntiSamyDomScanner.cs
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ public CleanResults Scan(string html)
{
OptionAutoCloseOnEnd = true, // Add closing tags
OptionMaxNestedChildNodes = Constants.MAX_NESTED_TAGS, // TODO: Add directive for this like in MaxInputSize?
OptionOutputAsXml = Policy.UsesXhtml, // Enforces XML rules, encodes big 5
OptionOutputAsXml = true, // Enforces XML rules, encodes big 5
OptionXmlForceOriginalComment = true // Fix provided by the library for weird added spaces in HTML comments
};

Expand Down Expand Up @@ -143,17 +143,12 @@ public CleanResults Scan(string html)
finalCleanHTML = SpecialCharactersEncoder.Encode(finalCleanHTML);
}

if (!Policy.UsesXhtml && !Policy.OmitsDoctypeDeclaration)
if (!Policy.OmitsDoctypeDeclaration)
{
finalCleanHTML = "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Strict//EN\" " +
"\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd\">" + finalCleanHTML;
}

if (Policy.UsesXhtml && !Policy.OmitsXmlDeclaration)
{
finalCleanHTML = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>" + finalCleanHTML;
}

// Grab end time (to be put in the result set along with start time)
var end = DateTime.Now;
Results = new CleanResults(start, end, finalCleanHTML, errorMessages);
Expand Down
6 changes: 0 additions & 6 deletions OWASP.AntiSamy/Html/Scan/Constants.cs
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,6 @@ internal static class Constants
"base", "param", "meta", "input", "textarea", "embed", "basefont", "col"
};

public static readonly List<string> DEFAULT_REQUIRE_CLOSING_TAGS = new List<string> {
"iframe", "script", "link"
};

// For Tag regular expression building
public static readonly string REGEXP_CHARACTERS = "\\(){}.*?$^-+";
public static readonly string ANY_NORMAL_WHITESPACES = "(\\s)*";
Expand All @@ -47,9 +43,7 @@ internal static class Constants
public static readonly string CLOSE_TAG_ATTRIBUTES = CLOSE_ATTRIBUTE + "*";

// Policy
public static readonly string OMIT_XML_DECLARATION = "omitXmlDeclaration";
public static readonly string OMIT_DOCTYPE_DECLARATION = "omitDoctypeDeclaration";
public static readonly string USE_XHTML = "useXHTML";
public static readonly string FORMAT_OUTPUT = "formatOutput";
public static readonly string EMBED_STYLESHEETS = "embedStyleSheets";
public static readonly string CONNECTION_TIMEOUT = "connectionTimeout";
Expand Down
4 changes: 2 additions & 2 deletions OWASP.AntiSamy/OWASP.AntiSamy.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ Another way of saying that could be: It's an API that helps you make sure that c
</PropertyGroup>
<ItemGroup>
<PackageReference Include="AngleSharp" Version="0.16.1" />
<PackageReference Include="AngleSharp.Css" Version="0.16.3" />
<PackageReference Include="HtmlAgilityPack" Version="1.11.40" />
<PackageReference Include="AngleSharp.Css" Version="0.16.4" />
<PackageReference Include="HtmlAgilityPack" Version="1.11.42" />
</ItemGroup>
<ItemGroup>
<!-- Added so NuGet copies this folder to the output package -->
Expand Down
Loading

0 comments on commit 550e9b7

Please sign in to comment.