[TrackerIcon] Fixed parse error on UTF-8 sites with non-english chars

When parsing the site's page in search for the FAVICON, the page gets opens.
The default file encoding in dependent on the running OS, and might not
be `UTF-8` on Windows.
Therefor, some trackers might not get their icon downloaded at all because of
an error:
`UnicodeDecodeError: 'charmap' codec can't decode byte 0x90 in position 2158: character maps to <undefined>`.
This fix adds a detection of file encoding using the optional `chardet` dependency, and also a test.

Closes: deluge-torrent/deluge#333
Closes: https://dev.deluge-torrent.org/ticket/3479
This commit is contained in:
DjLegolas 2022-01-05 02:02:32 +02:00 committed by Calum Lind
parent 44dcbee5f4
commit 517b2c653b
No known key found for this signature in database
GPG Key ID: 90597A687B836BA3
2 changed files with 20 additions and 2 deletions

View File

@ -14,7 +14,6 @@ from . import common
from .basetest import BaseTestCase
common.set_tmp_config_dir()
deluge.ui.tracker_icons.PIL_INSTALLED = False
common.disable_new_release_check()
@ -45,6 +44,14 @@ class TrackerIconsTestCase(BaseTestCase):
d.addCallback(self.assertEqual, icon)
return d
def test_get_google_ico_hebrew(self):
"""Test that Google.co.il page is read as UTF-8"""
icon = TrackerIcon(common.get_test_data_file('google.ico'))
d = self.icons.fetch('www.google.co.il')
d.addCallback(self.assertNotIdentical, None)
d.addCallback(self.assertEqual, icon)
return d
def test_get_google_ico_with_redirect(self):
# google.com redirects to www.google.com
icon = TrackerIcon(common.get_test_data_file('google.ico'))

View File

@ -21,6 +21,11 @@ from deluge.configmanager import get_config_dir
from deluge.decorators import proxy
from deluge.httpdownloader import download_file
try:
import chardet
except ImportError:
chardet = None
try:
from PIL import Image
except ImportError:
@ -289,7 +294,13 @@ class TrackerIcons(Component):
:returns: a Deferred which callbacks a list of available favicons (url, type)
:rtype: Deferred
"""
with open(page) as _file:
encoding = 'UTF-8'
if chardet:
with open(page, 'rb') as _file:
result = chardet.detect(_file.read())
encoding = result['encoding']
with open(page, encoding=encoding) as _file:
parser = FaviconParser()
for line in _file:
parser.feed(line)