deluge/deluge/httpdownloader.py

218 lines
8.3 KiB
Python

#
# httpdownloader.py
#
# Copyright (C) 2009 Andrew Resch <andrewresch@gmail.com>
#
# Deluge is free software.
#
# You may redistribute it and/or modify it under the terms of the
# GNU General Public License, as published by the Free Software
# Foundation; either version 3 of the License, or (at your option)
# any later version.
#
# deluge is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
# See the GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with deluge. If not, write to:
# The Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor
# Boston, MA 02110-1301, USA.
#
# In addition, as a special exception, the copyright holders give
# permission to link the code of portions of this program with the OpenSSL
# library.
# You must obey the GNU General Public License in all respects for all of
# the code used other than OpenSSL. If you modify file(s) with this
# exception, you may extend this exception to your version of the file(s),
# but you are not obligated to do so. If you do not wish to do so, delete
# this exception statement from your version. If you delete this exception
# statement from all source files in the program, then also delete it here.
#
from twisted.web import client, http
from twisted.web.error import PageRedirect
from twisted.python.failure import Failure
from twisted.internet import reactor
from common import get_version
import logging
import os.path
import zlib
log = logging.getLogger(__name__)
class HTTPDownloader(client.HTTPDownloader):
"""
Factory class for downloading files and keeping track of progress.
"""
def __init__(self, url, filename, part_callback=None, headers=None,
force_filename=False, allow_compression=True):
"""
:param url: the url to download from
:type url: string
:param filename: the filename to save the file as
:type filename: string
:param force_filename: forces use of the supplied filename, regardless of header content
:type force_filename: bool
:param part_callback: a function to be called when a part of data
is received, it's signature should be: func(data, current_length, total_length)
:type part_callback: function
:param headers: any optional headers to send
:type headers: dictionary
"""
self.part_callback = part_callback
self.current_length = 0
self.decoder = None
self.value = filename
self.force_filename = force_filename
self.allow_compression = allow_compression
agent = "Deluge/%s (http://deluge-torrent.org)" % get_version()
client.HTTPDownloader.__init__(self, url, filename, headers=headers, agent=agent)
def gotStatus(self, version, status, message):
self.code = int(status)
client.HTTPDownloader.gotStatus(self, version, status, message)
def gotHeaders(self, headers):
if self.code == http.OK:
if "content-length" in headers:
self.total_length = int(headers["content-length"][0])
else:
self.total_length = 0
if self.allow_compression and "content-encoding" in headers and \
headers["content-encoding"][0] in ("gzip", "x-gzip", "deflate"):
# Adding 32 to the wbits enables gzip & zlib decoding (with automatic header detection)
# Adding 16 just enables gzip decoding (no zlib)
self.decoder = zlib.decompressobj(zlib.MAX_WBITS + 32)
if "content-disposition" in headers and not self.force_filename:
new_file_name = str(headers["content-disposition"][0]).split(";")[1].split("=")[1]
new_file_name = sanitise_filename(new_file_name)
new_file_name = os.path.join(os.path.split(self.fileName)[0], new_file_name)
count = 1
fileroot = os.path.splitext(new_file_name)[0]
fileext = os.path.splitext(new_file_name)[1]
while os.path.isfile(new_file_name):
# Increment filename if already exists
new_file_name = "%s-%s%s" % (fileroot, count, fileext)
count += 1
self.fileName = new_file_name
self.value = new_file_name
elif self.code in (http.MOVED_PERMANENTLY, http.FOUND, http.SEE_OTHER, http.TEMPORARY_REDIRECT):
location = headers["location"][0]
error = PageRedirect(self.code, location=location)
self.noPage(Failure(error))
return client.HTTPDownloader.gotHeaders(self, headers)
def pagePart(self, data):
if self.code == http.OK:
self.current_length += len(data)
if self.decoder:
data = self.decoder.decompress(data)
if self.part_callback:
self.part_callback(data, self.current_length, self.total_length)
return client.HTTPDownloader.pagePart(self, data)
def pageEnd(self):
if self.decoder:
data = self.decoder.flush()
self.current_length -= len(data)
self.decoder = None
self.pagePart(data)
return client.HTTPDownloader.pageEnd(self)
def sanitise_filename(filename):
"""
Sanitises a filename to use as a download destination file.
Logs any filenames that could be considered malicious.
:param filename: the filename to sanitise
:type filename: string
:returns: the sanitised filename
:rtype: string
"""
# Remove any quotes
filename = filename.strip("'\"")
if os.path.basename(filename) != filename:
# Dodgy server, log it
log.warning("Potentially malicious server: trying to write to file '%s'" % filename)
# Only use the basename
filename = os.path.basename(filename)
filename = filename.strip()
if filename.startswith(".") or ";" in filename or "|" in filename:
# Dodgy server, log it
log.warning("Potentially malicious server: trying to write to file '%s'" % filename)
return filename
def download_file(url, filename, callback=None, headers=None,
force_filename=False, allow_compression=True):
"""
Downloads a file from a specific URL and returns a Deferred. You can also
specify a callback function to be called as parts are received.
:param url: the url to download from
:type url: string
:param filename: the filename to save the file as
:type filename: string
:param callback: a function to be called when a part of data is received,
it's signature should be: func(data, current_length, total_length)
:type callback: function
:param headers: any optional headers to send
:type headers: dictionary
:param force_filename: force us to use the filename specified rather than
one the server may suggest
:type force_filename: boolean
:param allow_compression: allows gzip & deflate decoding
:type allow_compression: boolean
:returns: the filename of the downloaded file
:rtype: Deferred
:raises t.w.e.PageRedirect: when server responds with a temporary redirect
or permanently moved.
:raises t.w.e.Error: for all other HTTP response errors (besides OK)
"""
url = str(url)
filename = str(filename)
if headers:
for key, value in headers.items():
headers[str(key)] = str(value)
if allow_compression:
if not headers:
headers = {}
headers["accept-encoding"] = "deflate, gzip, x-gzip"
# In twisted 13.1.0 the _parse() function was replaced by the _URI class
if hasattr(client, '_parse'):
scheme, host, port, path = client._parse(url)
else:
from twisted.web.client import _URI
uri = _URI.fromBytes(url)
scheme = uri.scheme
host = uri.host
port = uri.port
path = uri.path
factory = HTTPDownloader(url, filename, callback, headers, force_filename, allow_compression)
if scheme == "https":
from twisted.internet import ssl
reactor.connectSSL(host, port, factory, ssl.ClientContextFactory())
else:
reactor.connectTCP(host, port, factory)
return factory.deferred