diff --git a/toolset/test_types/fortune/fortune_html_parser.py b/toolset/test_types/fortune/fortune_html_parser.py index c01a5870386..b076b367575 100644 --- a/toolset/test_types/fortune/fortune_html_parser.py +++ b/toolset/test_types/fortune/fortune_html_parser.py @@ -8,8 +8,17 @@ class FortuneHTMLParser(HTMLParser): + IGNORED_TAGS = ( + "", "", + "", "", + "", + "", "", + "", "", + ) + def __init__(self): HTMLParser.__init__(self, convert_charrefs=False) + self.ignore_content = False self.body = [] valid_fortune = ''' @@ -41,7 +50,7 @@ def handle_decl(self, decl): # and since we did not specify xml compliance (where # incorrect casing would throw a syntax error), we must # allow all casings. We will lower for our normalization. - self.body.append("".format(d=decl.lower())) + self.append("".format(d=decl.lower())) def handle_charref(self, name): ''' @@ -63,37 +72,37 @@ def handle_charref(self, name): # equality. if val == "34" or val == "034" or val == "x22": # Append our normalized entity reference to our body. - self.body.append(""") + self.append(""") # "'" is a valid escaping of "-", but it is not # required, so we normalize for equality checking. if val == "39" or val == "039" or val == "x27": - self.body.append("'") + self.append("'") # Again, "+" is a valid escaping of the "+", but # it is not required, so we need to normalize for out # final parse and equality check. if val == "43" or val == "043" or val == "x2b": - self.body.append("+") + self.append("+") # Again, ">" is a valid escaping of ">", but we # need to normalize to ">" for equality checking. if val == "62" or val == "062" or val == "x3e": - self.body.append(">") + self.append(">") # Again, "<" is a valid escaping of "<", but we # need to normalize to "<" for equality checking. if val == "60" or val == "060" or val == "x3c": - self.body.append("<") + self.append("<") # Not sure why some are escaping '/' if val == "47" or val == "047" or val == "x2f": - self.body.append("/") + self.append("/") # "(" is a valid escaping of "(", but # it is not required, so we need to normalize for out # final parse and equality check. if val == "40" or val == "040" or val == "x28": - self.body.append("(") + self.append("(") # ")" is a valid escaping of ")", but # it is not required, so we need to normalize for out # final parse and equality check. if val == "41" or val == "041" or val == "x29": - self.body.append(")") + self.append(")") def handle_entityref(self, name): ''' @@ -101,20 +110,20 @@ def handle_entityref(self, name): need to normalize to "—" for equality checking. ''' if name == "mdash": - self.body.append("—") + self.append("—") else: - self.body.append("&{n};".format(n=name)) + self.append("&{n};".format(n=name)) def handle_starttag(self, tag, attrs): ''' This is called every time a tag is opened. We append each one wrapped in "<" and ">". ''' - self.body.append("<{t}>".format(t=tag)) + self.append("<{t}>".format(t=tag)) # Append a newline after the and if tag.lower() == 'table' or tag.lower() == 'html': - self.body.append(os.linesep) + self.append(os.linesep) def handle_data(self, data): ''' @@ -146,18 +155,24 @@ def handle_data(self, data): data = data.replace('"', '"') data = data.replace('>', '>') - self.body.append("{d}".format(d=data)) + self.append("{d}".format(d=data)) def handle_endtag(self, tag): ''' This is called every time a tag is closed. We append each one wrapped in "". ''' - self.body.append("".format(t=tag)) + self.append("".format(t=tag)) # Append a newline after each and if tag.lower() == 'tr' or tag.lower() == 'head': - self.body.append(os.linesep) + self.append(os.linesep) + + def append(self, item): + self.ignore_content = item == "") + + if not (self.ignore_content or item in self.IGNORED_TAGS): + self.body.append(item) def isValidFortune(self, name, out): '''