From d1922eb6f17578774866a13fd5428cdd3bc2280d Mon Sep 17 00:00:00 2001 From: John MacFarlane Date: Sat, 27 Dec 2014 14:39:47 -0700 Subject: Updated tests to use python3. --- test/normalize.py | 42 +++++++++++++++++++++--------------------- 1 file changed, 21 insertions(+), 21 deletions(-) (limited to 'test/normalize.py') diff --git a/test/normalize.py b/test/normalize.py index 894a837..03d958e 100644 --- a/test/normalize.py +++ b/test/normalize.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -from HTMLParser import HTMLParser, HTMLParseError -from htmlentitydefs import name2codepoint +from html.parser import HTMLParser, HTMLParseError +from html.entities import name2codepoint import sys import re import cgi @@ -14,7 +14,7 @@ class MyHTMLParser(HTMLParser): HTMLParser.__init__(self) self.last = "starttag" self.in_pre = False - self.output = u"" + self.output = "" self.last_tag = "" def handle_data(self, data): after_tag = self.last == "endtag" or self.last == "starttag" @@ -74,7 +74,7 @@ class MyHTMLParser(HTMLParser): self.last = "pi" def handle_entityref(self, name): try: - c = unichr(name2codepoint[name]) + c = chr(name2codepoint[name]) except KeyError: c = None self.output_char(c, '&' + name + ';') @@ -82,22 +82,22 @@ class MyHTMLParser(HTMLParser): def handle_charref(self, name): try: if name.startswith("x"): - c = unichr(int(name[1:], 16)) + c = chr(int(name[1:], 16)) else: - c = unichr(int(name)) + c = chr(int(name)) except ValueError: c = None self.output_char(c, '&' + name + ';') self.last = "ref" # Helpers. def output_char(self, c, fallback): - if c == u'<': + if c == '<': self.output += "<" - elif c == u'>': + elif c == '>': self.output += ">" - elif c == u'&': + elif c == '&': self.output += "&" - elif c == u'"': + elif c == '"': self.output += """ elif c == None: self.output += fallback @@ -122,43 +122,43 @@ def normalize_html(html): in pre tags): >>> normalize_html("

a \t b

") - u'

a b

' + '

a b

' >>> normalize_html("

a \t\nb

") - u'

a b

' + '

a b

' * Whitespace surrounding block-level tags is removed. >>> normalize_html("

a b

") - u'

a b

' + '

a b

' >>> normalize_html("

a b

") - u'

a b

' + '

a b

' >>> normalize_html("

a b

") - u'

a b

' + '

a b

' >>> normalize_html("\n\t

\n\t\ta b\t\t

\n\t") - u'

a b

' + '

a b

' >>> normalize_html("a b ") - u'a b ' + 'a b ' * Self-closing tags are converted to open tags. >>> normalize_html("
") - u'
' + '
' * Attributes are sorted and lowercased. >>> normalize_html('x') - u'x' + 'x' * References are converted to unicode, except that '<', '>', '&', and '"' are rendered using entities. >>> normalize_html("∀&><"") - u'\u2200&><"' + '\u2200&><"' """ html_chunk_re = re.compile("(\|\<[^>]*\>|[^<]+)") @@ -171,7 +171,7 @@ def normalize_html(html): if chunk.group(0)[:8] == "