diff options
| author | John MacFarlane <jgm@berkeley.edu> | 2014-11-20 08:22:20 -0800 | 
|---|---|---|
| committer | John MacFarlane <jgm@berkeley.edu> | 2014-11-20 08:22:20 -0800 | 
| commit | c9875cbbbe293e6727a7a25b79e7ea4949ef5670 (patch) | |
| tree | 23d9aaec1026d64d117c3dfd2acdeb5ff63a4085 /runtests.py | |
| parent | 698dab76847e5d671cce42a0c0ce2c98c5f07776 (diff) | |
runtests.py: catch HTMLParser errors in normalizer.
Diffstat (limited to 'runtests.py')
| -rwxr-xr-x | runtests.py | 14 | 
1 files changed, 9 insertions, 5 deletions
| diff --git a/runtests.py b/runtests.py index b3c8d98..83c331d 100755 --- a/runtests.py +++ b/runtests.py @@ -7,7 +7,7 @@ import platform  from difflib import unified_diff  from subprocess import *  import argparse -from HTMLParser import HTMLParser +from HTMLParser import HTMLParser, HTMLParseError  from htmlentitydefs import name2codepoint  import re  import cgi @@ -180,10 +180,14 @@ def normalize_html(html):      * HTMLParser just swallows CDATA.      * HTMLParser seems to treat unknown declarations as comments.      """ -    parser = MyHTMLParser() -    parser.feed(html.decode(encoding='UTF-8')) -    parser.close() -    return parser.output +    try: +        parser = MyHTMLParser() +        parser.feed(html.decode(encoding='UTF-8')) +        parser.close() +        return parser.output +    except HTMLParseError as e: +        sys.stderr.write("Normalization error: " + e.msg + "\n") +        return html  # on error, return unnormalized HTML  def print_test_header(headertext, example_number, start_line, end_line):      print "Example %d (lines %d-%d) %s" % (example_number,start_line,end_line,headertext) | 
