Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

3 failing test examples that previously worked with AntiSamy 1.7.3. #388

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions src/test/java/org/owasp/validator/html/test/FailingESAPITest.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
/**
* Copyright - The OWASP Foundation - 2021-2023 - All rights reserved.
*
* <p>This is examples of ESAPI JUnit tests that previously were working until we updated to
* AntiSamy 1.7.4. It appears that AntiSamy is now sanitizing these differently.
*
* <p>Note: I am NOT asking how to "fix" these to make them work. That is pretty much obvious.
* Rather, I am curious if this is SURPRISING to you? Nothing is mentioned in your 1.7.4 release
* notes that would allude to this change in behavor. At the very least, people should be made aware
* of this is clearly can break developer's regression tests against AntiSamy.
*
* @author: [email protected]
*/
import static org.junit.Assert.assertEquals;

import java.net.URL;
import org.junit.Before;
import org.junit.Test;
import org.owasp.validator.html.AntiSamy;
import org.owasp.validator.html.CleanResults;
import org.owasp.validator.html.Policy;

public class FailingESAPITest {
private AntiSamy as = new AntiSamy();
private Policy policy = null;

@Before
public void setUp() throws Exception {
// Load up ESAPI's AntiSamy policy file. (This was from ESAPI 2.5.2.0.)
URL url = getClass().getResource("/antisamy-esapi.xml");
policy = Policy.getInstance(url);
}

@Test
public void testAntiSamyRegressionCDATAWithJavascriptURL() throws Exception {
String tainted = "<style/>b<![cdata[</style><a href=javascript:alert(1)>test";
String expected = "b&lt;/style&gt;&lt;a href=javascript:alert(1)&gt;test";

CleanResults cr = as.scan(tainted, policy, AntiSamy.DOM); // ESAPI 2.5.2.0 uses DOM parser.
String cleansed = cr.getCleanHTML();

assertEquals(expected, cleansed);
}

@Test
public void testOnfocusAfterStyleClosing() throws Exception {
String tainted = "<select<style/>k<input<</>input/onfocus=alert(1)>";
String expected =
"k&lt;input/onfocus=alert(1)&gt;"; // Suspicious? Doesn't agree w/ AntiSamy test.

CleanResults cr = as.scan(tainted, policy, AntiSamy.DOM); // ESAPI 2.5.2.0 uses DOM parser.
String cleansed = cr.getCleanHTML();

assertEquals(expected, cleansed);
}

@Test
public void testScriptTagAfterStyleClosing() throws Exception {
String tainted = "<select<style/>W<xmp<script>alert(1)</script>";
String expected = "W&lt;script&gt;alert(1)&lt;/script&gt;";

CleanResults cr = as.scan(tainted, policy, AntiSamy.DOM); // ESAPI 2.5.2.0 uses DOM parser.
String cleansed = cr.getCleanHTML();

assertEquals(expected, cleansed);
}
}
156 changes: 156 additions & 0 deletions src/test/resources/antisamy-esapi.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
<?xml version="1.0" encoding="ISO-8859-1"?>

<!-- W3C rules retrieved from: http://www.w3.org/TR/html401/struct/global.html -->

<!-- Slashdot allowed tags taken from "Reply" page: <b> <i> <p> <br> <a> <ol> <ul> <li> <dl> <dt> <dd> <em> <strong> <tt> <blockquote> <div>
<ecode> <quote> -->

<anti-samy-rules xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="antisamy.xsd">

<directives>
<directive name="omitXmlDeclaration" value="true" />
<directive name="omitDoctypeDeclaration" value="true" />
<directive name="maxInputSize" value="500000" />
<directive name="embedStyleSheets" value="false" />
<directive name="noopenerAndNoreferrerAnchors" value="true" />
</directives>


<common-regexps>

<!-- From W3C: This attribute assigns a class name or set of class names to an element. Any number of elements may be assigned the same
class name or names. Multiple class names must be separated by white space characters. -->

<regexp name="htmlTitle" value="[\p{L}\p{N}\s\-_',:\[\]!\./\\\(\)&amp;]*" />
<regexp name="onsiteURL"
value="^(?!//)(?![\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*(&amp;colon))[\p{L}\p{N}\\\.\#@\$%\+&amp;;\-_~,\?=/!]*" />
<regexp name="offsiteURL"
value="(\s)*((ht|f)tp(s?)://|mailto:)[\p{L}\p{N}]+[\p{L}\p{N}\p{Zs}\.\#@\$%\+&amp;;:\-_~,\?=/!\(\)]*(\s)*" />

</common-regexps>

<!-- Tag.name = a, b, div, body, etc. Tag.action = filter: remove tags, but keep content, validate: keep content as long as it passes rules,
remove: remove tag and contents Attribute.name = id, class, href, align, width, etc. Attribute.onInvalid = what to do when the attribute is invalid,
e.g., remove the tag (removeTag), remove the attribute (removeAttribute), filter the tag (filterTag) Attribute.description = What rules in English
you want to tell the users they can have for this attribute. Include helpful things so they'll be able to tune their HTML -->

<!-- Some attributes are common to all (or most) HTML tags. There aren't many that qualify for this. You have to make sure there's no collisions
between any of these attribute names with attribute names of other tags that are for different purposes. -->

<common-attributes>


<attribute name="lang"
description="The 'lang' attribute tells the browser what language the element's attribute values and content are written in">
<regexp-list>
<regexp value="[a-zA-Z]{2,20}" />
</regexp-list>
</attribute>

<attribute name="title"
description="The 'title' attribute provides text that shows up in a 'tooltip' when a user hovers their mouse over the element">
<regexp-list>
<regexp name="htmlTitle" />
</regexp-list>
</attribute>

<attribute name="href" onInvalid="filterTag">
<regexp-list>
<regexp name="onsiteURL" />
<regexp name="offsiteURL" />
</regexp-list>
</attribute>

<attribute name="align"
description="The 'align' attribute of an HTML element is a direction word, like 'left', 'right' or 'center'">
<literal-list>
<literal value="center" />
<literal value="left" />
<literal value="right" />
<literal value="justify" />
<literal value="char" />
</literal-list>
</attribute>

</common-attributes>


<!-- This requires normal updates as browsers continue to diverge from the W3C and each other. As long as the browser wars continue this
is going to continue. I'm not sure war is the right word for what's going on. Doesn't somebody have to win a war after a while? -->

<global-tag-attributes>
<attribute name="title" />
<attribute name="lang" />
</global-tag-attributes>


<tag-rules>

<!-- Tags related to JavaScript -->

<tag name="script" action="remove" />
<tag name="noscript" action="remove" />

<!-- Frame & related tags -->

<tag name="iframe" action="remove" />
<tag name="frameset" action="remove" />
<tag name="frame" action="remove" />
<tag name="noframes" action="remove" />


<!-- All reasonable formatting tags -->

<tag name="p" action="validate">
<attribute name="align" />
</tag>

<tag name="div" action="validate" />
<tag name="i" action="validate" />
<tag name="b" action="validate" />
<tag name="em" action="validate" />
<tag name="blockquote" action="validate" />
<tag name="tt" action="validate" />

<tag name="br" action="truncate" />

<!-- Custom Slashdot tags, though we're trimming the idea of having a possible mismatching end tag with the endtag="" attribute -->

<tag name="quote" action="validate" />
<tag name="ecode" action="validate" />


<!-- Anchor and anchor related tags -->

<tag name="a" action="validate">

<attribute name="href" onInvalid="filterTag" />
<attribute name="nohref">
<literal-list>
<literal value="nohref" />
<literal value="" />
</literal-list>
</attribute>
<attribute name="rel">
<literal-list>
<literal value="nofollow" />
</literal-list>
</attribute>
</tag>

<!-- List tags -->

<tag name="ul" action="validate" />
<tag name="ol" action="validate" />
<tag name="li" action="validate" />

</tag-rules>



<!-- No CSS on Slashdot posts -->

<css-rules>
</css-rules>

</anti-samy-rules>
Loading