Add gzip decoding support to httpdownloader (implements #1012).

This commit is contained in:
John Garland 2010-01-14 12:43:59 +00:00
parent cbac2fbd5a
commit 0e2dd9f389
2 changed files with 27 additions and 4 deletions

View File

@ -2,6 +2,7 @@
==== Core ==== ==== Core ====
* Implement #1063 option to delete torrent file copy on torrent removal - patch from Ghent * Implement #1063 option to delete torrent file copy on torrent removal - patch from Ghent
* Implement #457 progress bars for folders * Implement #457 progress bars for folders
* Implement #1012 httpdownloader supports gzip decoding
* #496: Remove deprecated functions in favour of get_session_status() * #496: Remove deprecated functions in favour of get_session_status()
==== Blocklist ==== ==== Blocklist ====

View File

@ -39,6 +39,7 @@ from twisted.internet import reactor
from deluge.log import setupLogger, LOG as log from deluge.log import setupLogger, LOG as log
from common import get_version from common import get_version
import os.path import os.path
import zlib
class HTTPDownloader(client.HTTPDownloader): class HTTPDownloader(client.HTTPDownloader):
""" """
@ -56,8 +57,9 @@ class HTTPDownloader(client.HTTPDownloader):
:param headers: any optional headers to send :param headers: any optional headers to send
:type headers: dictionary :type headers: dictionary
""" """
self.__part_callback = part_callback self.part_callback = part_callback
self.current_length = 0 self.current_length = 0
self.decoder = None
self.value = filename self.value = filename
self.force_filename = force_filename self.force_filename = force_filename
agent = "Deluge/%s (http://deluge-torrent.org)" % get_version() agent = "Deluge/%s (http://deluge-torrent.org)" % get_version()
@ -74,6 +76,11 @@ class HTTPDownloader(client.HTTPDownloader):
else: else:
self.total_length = 0 self.total_length = 0
if "content-encoding" in headers and headers["content-encoding"][0] in ("gzip", "x-gzip", "deflate"):
# Adding 32 to the wbits enables gzip & zlib decoding (with automatic header detection)
# Adding 16 just enables gzip decoding (no zlib)
self.decoder = zlib.decompressobj(zlib.MAX_WBITS + 32)
if "content-disposition" in headers and not self.force_filename: if "content-disposition" in headers and not self.force_filename:
try: try:
new_file_name = str(headers["content-disposition"][0]).split(";")[1].split("=")[1] new_file_name = str(headers["content-disposition"][0]).split(";")[1].split("=")[1]
@ -95,11 +102,19 @@ class HTTPDownloader(client.HTTPDownloader):
def pagePart(self, data): def pagePart(self, data):
if self.code == http.OK: if self.code == http.OK:
self.current_length += len(data) self.current_length += len(data)
if self.__part_callback: if self.decoder:
self.__part_callback(data, self.current_length, self.total_length) data = self.decoder.decompress(data)
if self.part_callback:
self.part_callback(data, self.current_length, self.total_length)
return client.HTTPDownloader.pagePart(self, data) return client.HTTPDownloader.pagePart(self, data)
def pageEnd(self):
if self.decoder:
client.HTTPDownloader.pagePart(self, self.decoder.flush())
return client.HTTPDownloader.pageEnd(self)
def sanitise_filename(filename): def sanitise_filename(filename):
""" """
Sanitises a filename to use as a download destination file. Sanitises a filename to use as a download destination file.
@ -132,7 +147,7 @@ def sanitise_filename(filename):
return filename return filename
def download_file(url, filename, callback=None, headers=None, force_filename=False): def download_file(url, filename, callback=None, headers=None, force_filename=False, allow_compression=True):
""" """
Downloads a file from a specific URL and returns a Deferred. You can also Downloads a file from a specific URL and returns a Deferred. You can also
specify a callback function to be called as parts are received. specify a callback function to be called as parts are received.
@ -149,6 +164,8 @@ def download_file(url, filename, callback=None, headers=None, force_filename=Fal
:param force_filename: force us to use the filename specified rather than :param force_filename: force us to use the filename specified rather than
one the server may suggest one the server may suggest
:type force_filename: boolean :type force_filename: boolean
:param allow_compression: allows gzip & deflate decoding
:type allow_compression: boolean
:returns: the filename of the downloaded file :returns: the filename of the downloaded file
:rtype: Deferred :rtype: Deferred
@ -163,6 +180,11 @@ def download_file(url, filename, callback=None, headers=None, force_filename=Fal
for key, value in headers.items(): for key, value in headers.items():
headers[str(key)] = str(value) headers[str(key)] = str(value)
if allow_compression:
if not headers:
headers = {}
headers["accept-encoding"] = "gzip, deflate"
scheme, host, port, path = client._parse(url) scheme, host, port, path = client._parse(url)
factory = HTTPDownloader(url, filename, callback, headers, force_filename) factory = HTTPDownloader(url, filename, callback, headers, force_filename)
if scheme == "https": if scheme == "https":