[#1260] Handle redirection better with httpdownloader

This commit is contained in:
bendikro 2015-11-29 15:55:58 +01:00 committed by Calum Lind
parent 77aa540dc3
commit 02f6bfd578
5 changed files with 134 additions and 96 deletions

View File

@ -15,11 +15,9 @@ import os
import shutil
import tempfile
import threading
from urlparse import urljoin
import twisted.web.client
import twisted.web.error
from twisted.internet import reactor, task
from twisted.web.client import getPage
import deluge.common
import deluge.component as component
@ -282,25 +280,9 @@ class Core(component.Component):
return self.add_torrent_file(filename, base64.encodestring(data), options)
def on_download_fail(failure):
if failure.check(twisted.web.error.PageRedirect):
new_url = urljoin(url, failure.getErrorMessage().split(" to ")[1])
result = download_file(
new_url, tempfile.mkstemp()[1], headers=headers,
force_filename=True
)
result.addCallbacks(on_download_success, on_download_fail)
elif failure.check(twisted.web.client.PartialDownloadError):
result = download_file(
url, tempfile.mkstemp()[1], headers=headers,
force_filename=True, allow_compression=False
)
result.addCallbacks(on_download_success, on_download_fail)
else:
# Log the error and pass the failure onto the client
log.error("Error occurred downloading torrent from %s", url)
log.error("Reason: %s", failure.getErrorMessage())
result = failure
return result
log.error("Failed to add torrent from url %s", url)
return failure
d = download_file(
url, tempfile.mkstemp()[1], headers=headers, force_filename=True
@ -890,8 +872,6 @@ class Core(component.Component):
:rtype: bool
"""
from twisted.web.client import getPage
d = getPage("http://deluge-torrent.org/test_port.php?port=%s" %
self.get_listen_port(), timeout=30)

View File

@ -10,6 +10,7 @@
import logging
import os.path
import zlib
from urlparse import urljoin
from twisted.internet import reactor
from twisted.python.failure import Failure
@ -139,32 +140,28 @@ def sanitise_filename(filename):
return filename
def download_file(url, filename, callback=None, headers=None, force_filename=False, allow_compression=True):
def _download_file(url, filename, callback=None, headers=None, force_filename=False, allow_compression=True):
"""
Downloads a file from a specific URL and returns a Deferred. You can also
specify a callback function to be called as parts are received.
Downloads a file from a specific URL and returns a Deferred. A callback
function can be specified to be called as parts are received.
:param url: the url to download from
:type url: string
:param filename: the filename to save the file as
:type filename: string
:param callback: a function to be called when a part of data is received,
Args:
url (str): The url to download from
filename (str): The filename to save the file as
callback (func): A function to be called when a part of data is received,
it's signature should be: func(data, current_length, total_length)
:type callback: function
:param headers: any optional headers to send
:type headers: dictionary
:param force_filename: force us to use the filename specified rather than
headers (dict): Any optional headers to send
force_filename (bool): force us to use the filename specified rather than
one the server may suggest
:type force_filename: boolean
:param allow_compression: allows gzip & deflate decoding
:type allow_compression: boolean
allow_compression (bool): Allows gzip & deflate decoding
:returns: the filename of the downloaded file
:rtype: Deferred
Returns:
Deferred: the filename of the downloaded file
Raises:
t.w.e.PageRedirect
t.w.e.Error: for all other HTTP response errors
:raises t.w.e.PageRedirect: when server responds with a temporary redirect
or permanently moved.
:raises t.w.e.Error: for all other HTTP response errors (besides OK)
"""
url = str(url)
filename = str(filename)
@ -216,3 +213,52 @@ def download_file(url, filename, callback=None, headers=None, force_filename=Fal
reactor.connectTCP(host, port, factory)
return factory.deferred
def download_file(url, filename, callback=None, headers=None, force_filename=False,
allow_compression=True, handle_redirects=True):
"""
Downloads a file from a specific URL and returns a Deferred. A callback
function can be specified to be called as parts are received.
Args:
url (str): The url to download from
filename (str): The filename to save the file as
callback (func): A function to be called when a part of data is received,
it's signature should be: func(data, current_length, total_length)
headers (dict): Any optional headers to send
force_filename (bool): force us to use the filename specified rather than
one the server may suggest
allow_compression (bool): Allows gzip & deflate decoding
handle_redirects (bool): If HTTP redirects should be handled automatically
Returns:
Deferred: the filename of the downloaded file
Raises:
t.w.e.PageRedirect: Unless handle_redirects=True
t.w.e.Error: for all other HTTP response errors
"""
def on_download_success(result):
log.debug("Download success!")
return result
def on_download_fail(failure):
if failure.check(PageRedirect) and handle_redirects:
new_url = urljoin(url, failure.getErrorMessage().split(" to ")[1])
result = _download_file(new_url, filename, callback=callback, headers=headers,
force_filename=force_filename,
allow_compression=allow_compression)
result.addCallbacks(on_download_success, on_download_fail)
else:
# Log the error and pass the failure to the caller
log.error("Error occurred downloading torrent from '%s': %s",
url, failure.getErrorMessage())
result = failure
return result
d = _download_file(url, filename, callback=callback, headers=headers,
force_filename=force_filename, allow_compression=allow_compression)
d.addCallbacks(on_download_success, on_download_fail)
return d

View File

@ -5,8 +5,10 @@ from twisted.internet import reactor
from twisted.internet.error import CannotListenError
from twisted.python.failure import Failure
from twisted.trial import unittest
from twisted.web.error import PageRedirect
from twisted.web.http import NOT_MODIFIED
from twisted.web.server import Site
from twisted.web.util import redirectTo
from deluge.httpdownloader import download_file
from deluge.log import setup_logger
@ -32,7 +34,8 @@ def fname(name):
class RedirectResource(Resource):
def render(self, request):
request.redirect("http://localhost:51242/")
url = self.get_url()
return redirectTo(url, request)
class RenameResource(Resource):
@ -66,6 +69,24 @@ class GzipResource(Resource):
return compress(message, request)
class PartialDownloadResource(Resource):
def __init__(self, *args, **kwargs):
self.render_count = 0
def render(self, request):
# encoding = request.requestHeaders._rawHeaders.get("accept-encoding", None)
if self.render_count == 0:
request.setHeader("content-length", "5")
else:
request.setHeader("content-length", "3")
# if encoding == "deflate, gzip, x-gzip":
request.write('abc')
self.render_count += 1
return ''
class TopLevelResource(Resource):
addSlash = True
@ -74,8 +95,10 @@ class TopLevelResource(Resource):
Resource.__init__(self)
self.putChild("cookie", CookieResource())
self.putChild("gzip", GzipResource())
self.putChild("redirect", RedirectResource())
self.redirect_rsrc = RedirectResource()
self.putChild("redirect", self.redirect_rsrc)
self.putChild("rename", RenameResource())
self.putChild("partial", PartialDownloadResource())
def getChild(self, path, request): # NOQA
if path == "":
@ -91,13 +114,17 @@ class TopLevelResource(Resource):
class DownloadFileTestCase(unittest.TestCase):
def get_url(self, path=""):
return "http://localhost:%d/%s" % (self.listen_port, path)
def setUp(self): # NOQA
setup_logger("warning", fname("log_file"))
website = Site(TopLevelResource())
self.website = Site(TopLevelResource())
self.listen_port = 51242
self.website.resource.redirect_rsrc.get_url = self.get_url
for dummy in range(10):
try:
self.webserver = reactor.listenTCP(self.listen_port, website)
self.webserver = reactor.listenTCP(self.listen_port, self.website)
except CannotListenError as ex:
error = ex
self.listen_port += 1
@ -130,19 +157,19 @@ class DownloadFileTestCase(unittest.TestCase):
return filename
def test_download(self):
d = download_file("http://localhost:%d/" % self.listen_port, fname("index.html"))
d = download_file(self.get_url(), fname("index.html"))
d.addCallback(self.assertEqual, fname("index.html"))
return d
def test_download_without_required_cookies(self):
url = "http://localhost:%d/cookie" % self.listen_port
url = self.get_url("cookie")
d = download_file(url, fname("none"))
d.addCallback(self.fail)
d.addErrback(self.assertIsInstance, Failure)
return d
def test_download_with_required_cookies(self):
url = "http://localhost:%d/cookie" % self.listen_port
url = self.get_url("cookie")
cookie = {"cookie": "password=deluge"}
d = download_file(url, fname("monster"), headers=cookie)
d.addCallback(self.assertEqual, fname("monster"))
@ -150,7 +177,7 @@ class DownloadFileTestCase(unittest.TestCase):
return d
def test_download_with_rename(self):
url = "http://localhost:%d/rename?filename=renamed" % self.listen_port
url = self.get_url("rename?filename=renamed")
d = download_file(url, fname("original"))
d.addCallback(self.assertEqual, fname("renamed"))
d.addCallback(self.assertContains, "This file should be called renamed")
@ -158,54 +185,64 @@ class DownloadFileTestCase(unittest.TestCase):
def test_download_with_rename_exists(self):
open(fname('renamed'), 'w').close()
url = "http://localhost:%d/rename?filename=renamed" % self.listen_port
url = self.get_url("rename?filename=renamed")
d = download_file(url, fname("original"))
d.addCallback(self.assertEqual, fname("renamed-1"))
d.addCallback(self.assertContains, "This file should be called renamed")
return d
def test_download_with_rename_sanitised(self):
url = "http://localhost:%d/rename?filename=/etc/passwd" % self.listen_port
url = self.get_url("rename?filename=/etc/passwd")
d = download_file(url, fname("original"))
d.addCallback(self.assertEqual, fname("passwd"))
d.addCallback(self.assertContains, "This file should be called /etc/passwd")
return d
def test_download_with_rename_prevented(self):
url = "http://localhost:%d/rename?filename=spam" % self.listen_port
url = self.get_url("rename?filename=spam")
d = download_file(url, fname("forced"), force_filename=True)
d.addCallback(self.assertEqual, fname("forced"))
d.addCallback(self.assertContains, "This file should be called spam")
return d
def test_download_with_gzip_encoding(self):
url = "http://localhost:%d/gzip?msg=success" % self.listen_port
url = self.get_url("gzip?msg=success")
d = download_file(url, fname("gzip_encoded"))
d.addCallback(self.assertContains, "success")
return d
def test_download_with_gzip_encoding_disabled(self):
url = "http://localhost:%d/gzip?msg=fail" % self.listen_port
url = self.get_url("gzip?msg=fail")
d = download_file(url, fname("gzip_encoded"), allow_compression=False)
d.addCallback(self.failIfContains, "fail")
return d
def test_page_redirect(self):
url = 'http://localhost:%d/redirect' % self.listen_port
def test_page_redirect_unhandled(self):
url = self.get_url("redirect")
d = download_file(url, fname("none"))
d.addCallback(self.fail)
d.addErrback(self.assertIsInstance, Failure)
def on_redirect(failure):
self.assertTrue(type(failure), PageRedirect)
d.addErrback(on_redirect)
return d
def test_page_redirect(self):
url = self.get_url("redirect")
d = download_file(url, fname("none"), handle_redirects=True)
d.addCallback(self.assertEqual, fname("none"))
d.addErrback(self.fail)
return d
def test_page_not_found(self):
d = download_file("http://localhost:%d/page/not/found" % self.listen_port, fname("none"))
d = download_file(self.get_url("page/not/found"), fname("none"))
d.addCallback(self.fail)
d.addErrback(self.assertIsInstance, Failure)
return d
def test_page_not_modified(self):
headers = {'If-Modified-Since': formatdate(usegmt=True)}
d = download_file("http://localhost:%d/" % self.listen_port, fname("index.html"), headers=headers)
d = download_file(self.get_url(), fname("index.html"), headers=headers)
d.addCallback(self.fail)
d.addErrback(self.assertIsInstance, Failure)
return d

View File

@ -11,13 +11,10 @@ import base64
import cgi
import logging
import os
from urlparse import urljoin
import gobject
import gtk
import pygtk
import twisted.web.client
import twisted.web.error
import deluge.common
import deluge.component as component
@ -641,19 +638,10 @@ class AddTorrentDialog(component.Component):
pb.set_text("%s" % deluge.common.fsize(current_length))
def on_download_success(result):
log.debug("Download success!")
self.add_from_files([result])
dialog.destroy()
def on_download_fail(result):
if result.check(twisted.web.error.PageRedirect):
new_url = urljoin(url, result.getErrorMessage().split(" to ")[1])
result = download_file(new_url, tmp_file, on_part)
result.addCallbacks(on_download_success, on_download_fail)
elif result.check(twisted.web.client.PartialDownloadError):
result = download_file(url, tmp_file, on_part, allow_compression=False)
result.addCallbacks(on_download_success, on_download_fail)
else:
log.debug("Download failed: %s", result)
dialog.destroy()
ErrorDialog(

View File

@ -17,10 +17,7 @@ import tempfile
import time
from types import FunctionType
from urllib import unquote_plus
from urlparse import urljoin
import twisted.web.client
import twisted.web.error
from twisted.internet import reactor
from twisted.internet.defer import Deferred, DeferredList
from twisted.web import http, resource, server
@ -628,17 +625,7 @@ class WebApi(JSONComponent):
return result
def on_download_fail(result):
if result.check(twisted.web.error.PageRedirect):
new_url = urljoin(url, result.getErrorMessage().split(" to ")[1])
result = httpdownloader.download_file(new_url, tmp_file, headers=headers)
result.addCallbacks(on_download_success, on_download_fail)
elif result.check(twisted.web.client.PartialDownloadError):
result = httpdownloader.download_file(url, tmp_file, headers=headers,
allow_compression=False)
result.addCallbacks(on_download_success, on_download_fail)
else:
log.error("Error occurred downloading torrent from %s", url)
log.error("Reason: %s", result.getErrorMessage())
log.error("Failed to add torrent from url %s", url)
return result
tempdir = tempfile.mkdtemp(prefix="delugeweb-")