Skip to content

Commit

Permalink
Relax HTML validation for Fortunes test
Browse files Browse the repository at this point in the history
Some frameworks, such as Next.js, automatically inject `<meta>`,
`<link>`, and `<script>` tags into pages rendered by their templating
system.  Since the Fortunes test is meant to exercise the templating
system (as opposed to raw string concatenation), it should allow these
tags.

Furthermore, React warns against nesting a `<tr>` directly inside a
`<table>` ("<tr> cannot be a child of <table>") because browsers will
automatically wrap the `<tr>` elements in a `<tbody>`, causing a
mismatch with the virtual DOM.  Therefore, the Fortunes test should
allow optional `<tbody>` (and `<thead>`) tags.

This commit relaxes the HTML validation for the Fortunes test to allow
these tags by simply ignoring them when building the comparison string.
  • Loading branch information
jonathanhefner committed Jan 8, 2025
1 parent ddd0952 commit 7f84497
Showing 1 changed file with 31 additions and 16 deletions.
47 changes: 31 additions & 16 deletions toolset/test_types/fortune/fortune_html_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,17 @@


class FortuneHTMLParser(HTMLParser):
IGNORED_TAGS = (
"<meta>", "</meta>",
"<link>", "</link>",
"<script>", "</script>",
"<thead>", "</thead>",
"<tbody>", "</tbody>",
)

def __init__(self):
HTMLParser.__init__(self, convert_charrefs=False)
self.ignore_content = False
self.body = []

valid_fortune = '''<!doctype html><html>
Expand Down Expand Up @@ -41,7 +50,7 @@ def handle_decl(self, decl):
# and since we did not specify xml compliance (where
# incorrect casing would throw a syntax error), we must
# allow all casings. We will lower for our normalization.
self.body.append("<!{d}>".format(d=decl.lower()))
self.append("<!{d}>".format(d=decl.lower()))

def handle_charref(self, name):
'''
Expand All @@ -63,58 +72,58 @@ def handle_charref(self, name):
# equality.
if val == "34" or val == "034" or val == "x22":
# Append our normalized entity reference to our body.
self.body.append("&quot;")
self.append("&quot;")
# "&#39;" is a valid escaping of "-", but it is not
# required, so we normalize for equality checking.
if val == "39" or val == "039" or val == "x27":
self.body.append("&apos;")
self.append("&apos;")
# Again, "&#43;" is a valid escaping of the "+", but
# it is not required, so we need to normalize for out
# final parse and equality check.
if val == "43" or val == "043" or val == "x2b":
self.body.append("+")
self.append("+")
# Again, "&#62;" is a valid escaping of ">", but we
# need to normalize to "&gt;" for equality checking.
if val == "62" or val == "062" or val == "x3e":
self.body.append("&gt;")
self.append("&gt;")
# Again, "&#60;" is a valid escaping of "<", but we
# need to normalize to "&lt;" for equality checking.
if val == "60" or val == "060" or val == "x3c":
self.body.append("&lt;")
self.append("&lt;")
# Not sure why some are escaping '/'
if val == "47" or val == "047" or val == "x2f":
self.body.append("/")
self.append("/")
# "&#40;" is a valid escaping of "(", but
# it is not required, so we need to normalize for out
# final parse and equality check.
if val == "40" or val == "040" or val == "x28":
self.body.append("(")
self.append("(")
# "&#41;" is a valid escaping of ")", but
# it is not required, so we need to normalize for out
# final parse and equality check.
if val == "41" or val == "041" or val == "x29":
self.body.append(")")
self.append(")")

def handle_entityref(self, name):
'''
Again, "&mdash;" is a valid escaping of "—", but we
need to normalize to "—" for equality checking.
'''
if name == "mdash":
self.body.append("—")
self.append("—")
else:
self.body.append("&{n};".format(n=name))
self.append("&{n};".format(n=name))

def handle_starttag(self, tag, attrs):
'''
This is called every time a tag is opened. We append
each one wrapped in "<" and ">".
'''
self.body.append("<{t}>".format(t=tag))
self.append("<{t}>".format(t=tag))

# Append a newline after the <table> and <html>
if tag.lower() == 'table' or tag.lower() == 'html':
self.body.append(os.linesep)
self.append(os.linesep)

def handle_data(self, data):
'''
Expand Down Expand Up @@ -146,18 +155,24 @@ def handle_data(self, data):
data = data.replace('"', '&quot;')
data = data.replace('>', '&gt;')

self.body.append("{d}".format(d=data))
self.append("{d}".format(d=data))

def handle_endtag(self, tag):
'''
This is called every time a tag is closed. We append
each one wrapped in "</" and ">".
'''
self.body.append("</{t}>".format(t=tag))
self.append("</{t}>".format(t=tag))

# Append a newline after each </tr> and </head>
if tag.lower() == 'tr' or tag.lower() == 'head':
self.body.append(os.linesep)
self.append(os.linesep)

def append(self, item):
self.ignore_content = item == "<script>" or (self.ignore_content and item != "</script>")

if not (self.ignore_content or item in self.IGNORED_TAGS):
self.body.append(item)

def isValidFortune(self, name, out):
'''
Expand Down

0 comments on commit 7f84497

Please sign in to comment.