stop parsing the html page once the parser has left the <head> of the page
This commit is contained in:
parent
c4b20aa595
commit
e4ef17975c
|
@ -256,6 +256,8 @@ class TrackerIcons(Component):
|
||||||
parser = FaviconParser()
|
parser = FaviconParser()
|
||||||
for line in f:
|
for line in f:
|
||||||
parser.feed(line)
|
parser.feed(line)
|
||||||
|
if parser.left_head:
|
||||||
|
break
|
||||||
parser.close()
|
parser.close()
|
||||||
f.close()
|
f.close()
|
||||||
os.remove(page)
|
os.remove(page)
|
||||||
|
@ -410,6 +412,7 @@ class FaviconParser(HTMLParser):
|
||||||
"""
|
"""
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.icons = []
|
self.icons = []
|
||||||
|
self.left_head = False
|
||||||
HTMLParser.__init__(self)
|
HTMLParser.__init__(self)
|
||||||
|
|
||||||
def handle_starttag(self, tag, attrs):
|
def handle_starttag(self, tag, attrs):
|
||||||
|
@ -424,6 +427,10 @@ class FaviconParser(HTMLParser):
|
||||||
if href and type:
|
if href and type:
|
||||||
self.icons.append((href, type))
|
self.icons.append((href, type))
|
||||||
|
|
||||||
|
def handle_endtag(self, tag):
|
||||||
|
if tag == "head":
|
||||||
|
self.left_head = True
|
||||||
|
|
||||||
def get_icons(self):
|
def get_icons(self):
|
||||||
"""
|
"""
|
||||||
Returns a list of favicons extracted from the HTML page
|
Returns a list of favicons extracted from the HTML page
|
||||||
|
|
|
@ -36,3 +36,11 @@ class TrackerIconsTestCase(unittest.TestCase):
|
||||||
d.addCallback(self.assertNotIdentical, None)
|
d.addCallback(self.assertNotIdentical, None)
|
||||||
d.addCallback(self.assertEquals, icon)
|
d.addCallback(self.assertEquals, icon)
|
||||||
return d
|
return d
|
||||||
|
|
||||||
|
def test_get_ubuntu_ico(self):
|
||||||
|
# ubuntu.com has inline css which causes HTMLParser issues
|
||||||
|
icon = TrackerIcon("../ubuntu.png")
|
||||||
|
d = icons.get("www.ubuntu.com")
|
||||||
|
d.addCallback(self.assertNotIdentical, None)
|
||||||
|
d.addCallback(self.assertEquals, icon)
|
||||||
|
return d
|
||||||
|
|
Loading…
Reference in New Issue