deluge/libtorrent/src/torrent_info.cpp

607 lines
16 KiB
C++
Raw Normal View History

2007-07-04 08:24:30 +00:00
/*
2008-06-02 17:40:37 +00:00
Copyright (c) 2003-2008, Arvid Norberg
2007-07-04 08:24:30 +00:00
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in
the documentation and/or other materials provided with the distribution.
* Neither the name of the author nor the names of its
contributors may be used to endorse or promote products derived
from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.
*/
#include "libtorrent/pch.hpp"
#include <ctime>
#include <iostream>
#include <fstream>
#include <iomanip>
#include <iterator>
#include <algorithm>
#include <set>
#ifdef _MSC_VER
#pragma warning(push, 1)
#endif
#include <boost/lexical_cast.hpp>
#include <boost/filesystem/path.hpp>
2008-05-20 06:23:48 +00:00
#include <boost/filesystem.hpp>
2007-07-04 08:24:30 +00:00
#include <boost/bind.hpp>
#ifdef _MSC_VER
#pragma warning(pop)
#endif
#include "libtorrent/torrent_info.hpp"
#include "libtorrent/bencode.hpp"
#include "libtorrent/hasher.hpp"
#include "libtorrent/entry.hpp"
2008-07-01 21:51:16 +00:00
#include "libtorrent/file.hpp"
2007-07-04 08:24:30 +00:00
namespace gr = boost::gregorian;
using namespace libtorrent;
namespace
{
namespace fs = boost::filesystem;
void convert_to_utf8(std::string& str, unsigned char chr)
{
str += 0xc0 | ((chr & 0xff) >> 6);
str += 0x80 | (chr & 0x3f);
}
void verify_encoding(file_entry& target)
{
std::string tmp_path;
std::string file_path = target.path.string();
bool valid_encoding = true;
for (std::string::iterator i = file_path.begin()
, end(file_path.end()); i != end; ++i)
{
// valid ascii-character
if ((*i & 0x80) == 0)
{
tmp_path += *i;
continue;
}
if (std::distance(i, end) < 2)
{
convert_to_utf8(tmp_path, *i);
valid_encoding = false;
continue;
}
// valid 2-byte utf-8 character
if ((i[0] & 0xe0) == 0xc0
&& (i[1] & 0xc0) == 0x80)
{
tmp_path += i[0];
tmp_path += i[1];
i += 1;
continue;
}
if (std::distance(i, end) < 3)
{
convert_to_utf8(tmp_path, *i);
valid_encoding = false;
continue;
}
// valid 3-byte utf-8 character
if ((i[0] & 0xf0) == 0xe0
&& (i[1] & 0xc0) == 0x80
&& (i[2] & 0xc0) == 0x80)
{
tmp_path += i[0];
tmp_path += i[1];
tmp_path += i[2];
i += 2;
continue;
}
if (std::distance(i, end) < 4)
{
convert_to_utf8(tmp_path, *i);
valid_encoding = false;
continue;
}
// valid 4-byte utf-8 character
if ((i[0] & 0xf0) == 0xe0
&& (i[1] & 0xc0) == 0x80
&& (i[2] & 0xc0) == 0x80
&& (i[3] & 0xc0) == 0x80)
{
tmp_path += i[0];
tmp_path += i[1];
tmp_path += i[2];
tmp_path += i[3];
i += 3;
continue;
}
convert_to_utf8(tmp_path, *i);
valid_encoding = false;
}
// the encoding was not valid utf-8
// save the original encoding and replace the
// commonly used path with the correctly
// encoded string
2008-06-02 17:40:37 +00:00
if (!valid_encoding) target.path = tmp_path;
2007-07-04 08:24:30 +00:00
}
2008-05-20 06:23:48 +00:00
bool extract_single_file(lazy_entry const& dict, file_entry& target
2007-07-04 08:24:30 +00:00
, std::string const& root_dir)
{
2008-05-20 06:23:48 +00:00
lazy_entry const* length = dict.dict_find("length");
if (length == 0 || length->type() != lazy_entry::int_t)
return false;
2008-05-20 06:23:48 +00:00
target.size = length->int_value();
2007-07-04 08:24:30 +00:00
target.path = root_dir;
2007-11-28 03:29:53 +00:00
target.file_base = 0;
2007-07-04 08:24:30 +00:00
// prefer the name.utf-8
// because if it exists, it is more
// likely to be correctly encoded
2008-05-20 06:23:48 +00:00
lazy_entry const* p = dict.dict_find("path.utf-8");
if (p == 0 || p->type() != lazy_entry::list_t)
p = dict.dict_find("path");
if (p == 0 || p->type() != lazy_entry::list_t)
return false;
2007-07-04 08:24:30 +00:00
2008-05-20 06:23:48 +00:00
for (int i = 0, end(p->list_size()); i < end; ++i)
2007-07-04 08:24:30 +00:00
{
2008-05-20 06:23:48 +00:00
if (p->list_at(i)->type() != lazy_entry::string_t)
return false;
2008-05-20 06:23:48 +00:00
std::string path_element = p->list_at(i)->string_value();
if (path_element != "..")
target.path /= path_element;
2007-07-04 08:24:30 +00:00
}
verify_encoding(target);
if (target.path.is_complete())
return false;
return true;
2007-07-04 08:24:30 +00:00
}
2008-06-02 17:40:37 +00:00
bool extract_files(lazy_entry const& list, file_storage& target
2007-07-04 08:24:30 +00:00
, std::string const& root_dir)
{
2008-05-20 06:23:48 +00:00
if (list.type() != lazy_entry::list_t) return false;
for (int i = 0, end(list.list_size()); i < end; ++i)
2007-07-04 08:24:30 +00:00
{
2008-06-02 17:40:37 +00:00
file_entry e;
if (!extract_single_file(*list.list_at(i), e, root_dir))
return false;
2008-06-02 17:40:37 +00:00
target.add_file(e);
2007-07-04 08:24:30 +00:00
}
return true;
2007-07-04 08:24:30 +00:00
}
}
namespace libtorrent
{
2008-07-01 21:51:16 +00:00
int load_file(fs::path const& filename, std::vector<char>& v)
{
file f;
if (!f.open(filename, file::in)) return -1;
f.seek(0, file::end);
size_type s = f.tell();
if (s > 5000000) return -2;
v.resize(s);
f.seek(0);
size_type read = f.read(&v[0], s);
if (read != s) return -3;
return 0;
}
2007-07-04 08:24:30 +00:00
// standard constructor that parses a torrent file
2008-05-20 06:23:48 +00:00
torrent_info::torrent_info(entry const& torrent_file)
2008-06-02 17:40:37 +00:00
: m_creation_date(pt::ptime(pt::not_a_date_time))
2007-07-04 08:24:30 +00:00
, m_multifile(false)
, m_private(false)
2008-05-20 06:23:48 +00:00
, m_info_section_size(0)
, m_piece_hashes(0)
{
std::vector<char> tmp;
std::back_insert_iterator<std::vector<char> > out(tmp);
bencode(out, torrent_file);
lazy_entry e;
lazy_bdecode(&tmp[0], &tmp[0] + tmp.size(), e);
std::string error;
#ifndef BOOST_NO_EXCEPTIONS
if (!parse_torrent_file(e, error))
throw invalid_torrent_file();
#else
2008-07-01 21:51:16 +00:00
parse_torrent_file(e, error);
2007-07-04 08:24:30 +00:00
#endif
2008-05-20 06:23:48 +00:00
}
torrent_info::torrent_info(lazy_entry const& torrent_file)
2008-06-02 17:40:37 +00:00
: m_creation_date(pt::ptime(pt::not_a_date_time))
2008-05-20 06:23:48 +00:00
, m_multifile(false)
, m_private(false)
, m_info_section_size(0)
, m_piece_hashes(0)
2007-07-04 08:24:30 +00:00
{
std::string error;
#ifndef BOOST_NO_EXCEPTIONS
2008-05-20 06:23:48 +00:00
if (!parse_torrent_file(torrent_file, error))
2007-07-04 08:24:30 +00:00
throw invalid_torrent_file();
#else
2008-07-01 21:51:16 +00:00
parse_torrent_file(torrent_file, error);
#endif
2007-07-04 08:24:30 +00:00
}
2008-05-20 06:23:48 +00:00
torrent_info::torrent_info(char const* buffer, int size)
2008-06-02 17:40:37 +00:00
: m_creation_date(pt::ptime(pt::not_a_date_time))
2008-05-20 06:23:48 +00:00
, m_multifile(false)
, m_private(false)
, m_info_section_size(0)
, m_piece_hashes(0)
{
std::string error;
lazy_entry e;
lazy_bdecode(buffer, buffer + size, e);
#ifndef BOOST_NO_EXCEPTIONS
if (!parse_torrent_file(e, error))
throw invalid_torrent_file();
#else
2008-07-01 21:51:16 +00:00
parse_torrent_file(e, error);
2008-05-20 06:23:48 +00:00
#endif
}
2007-07-04 08:24:30 +00:00
// constructor used for creating new torrents
// will not contain any hashes, comments, creation date
// just the necessary to use it with piece manager
// used for torrents with no metadata
torrent_info::torrent_info(sha1_hash const& info_hash)
2008-06-02 17:40:37 +00:00
: m_info_hash(info_hash)
2007-07-04 08:24:30 +00:00
, m_creation_date(pt::second_clock::universal_time())
, m_multifile(false)
, m_private(false)
2008-05-20 06:23:48 +00:00
, m_info_section_size(0)
, m_piece_hashes(0)
2008-06-02 17:40:37 +00:00
{}
2007-07-04 08:24:30 +00:00
2008-07-01 21:51:16 +00:00
torrent_info::torrent_info(fs::path const& filename)
2008-06-02 17:40:37 +00:00
: m_creation_date(pt::ptime(pt::not_a_date_time))
2008-05-20 06:23:48 +00:00
, m_multifile(false)
, m_private(false)
{
2008-07-01 21:51:16 +00:00
std::vector<char> buf;
int ret = load_file(filename, buf);
if (ret < 0) return;
2008-05-20 06:23:48 +00:00
lazy_entry e;
lazy_bdecode(&buf[0], &buf[0] + buf.size(), e);
2008-07-01 21:51:16 +00:00
std::string error;
2008-05-20 06:23:48 +00:00
#ifndef BOOST_NO_EXCEPTIONS
if (!parse_torrent_file(e, error))
throw invalid_torrent_file();
#else
2008-07-01 21:51:16 +00:00
parse_torrent_file(e, error);
2008-05-20 06:23:48 +00:00
#endif
}
2007-07-04 08:24:30 +00:00
torrent_info::~torrent_info()
{}
void torrent_info::swap(torrent_info& ti)
{
using std::swap;
m_urls.swap(ti.m_urls);
m_url_seeds.swap(ti.m_url_seeds);
m_files.swap(ti.m_files);
m_nodes.swap(ti.m_nodes);
swap(m_info_hash, ti.m_info_hash);
swap(m_creation_date, ti.m_creation_date);
m_comment.swap(ti.m_comment);
m_created_by.swap(ti.m_created_by);
swap(m_multifile, ti.m_multifile);
swap(m_private, ti.m_private);
2008-05-20 06:23:48 +00:00
swap(m_info_section, ti.m_info_section);
swap(m_info_section_size, ti.m_info_section_size);
swap(m_piece_hashes, ti.m_piece_hashes);
swap(m_info_dict, ti.m_info_dict);
2007-07-04 08:24:30 +00:00
}
2008-05-20 06:23:48 +00:00
bool torrent_info::parse_info_section(lazy_entry const& info, std::string& error)
2007-07-04 08:24:30 +00:00
{
2008-05-20 06:23:48 +00:00
if (info.type() != lazy_entry::dict_t)
{
error = "'info' entry is not a dictionary";
return false;
}
2008-05-20 06:23:48 +00:00
// hash the info-field to calculate info-hash
2007-07-04 08:24:30 +00:00
hasher h;
2008-05-20 06:23:48 +00:00
std::pair<char const*, int> section = info.data_section();
h.update(section.first, section.second);
2007-07-04 08:24:30 +00:00
m_info_hash = h.final();
2008-05-20 06:23:48 +00:00
// copy the info section
m_info_section_size = section.second;
m_info_section.reset(new char[m_info_section_size]);
memcpy(m_info_section.get(), section.first, m_info_section_size);
TORRENT_ASSERT(section.first[0] == 'd');
TORRENT_ASSERT(section.first[m_info_section_size-1] == 'e');
2007-07-04 08:24:30 +00:00
// extract piece length
2008-06-02 17:40:37 +00:00
int piece_length = info.dict_find_int_value("piece length", -1);
if (piece_length <= 0)
{
2008-05-20 06:23:48 +00:00
error = "invalid or missing 'piece length' entry in torrent file";
return false;
}
2008-06-02 17:40:37 +00:00
m_files.set_piece_length(piece_length);
2007-07-04 08:24:30 +00:00
// extract file name (or the directory name if it's a multifile libtorrent)
2008-06-02 17:40:37 +00:00
std::string name = info.dict_find_string_value("name.utf-8");
if (name.empty()) name = info.dict_find_string_value("name");
if (name.empty())
{
2008-06-02 17:40:37 +00:00
error = "missing name in torrent file";
2008-05-20 06:23:48 +00:00
return false;
}
2008-05-20 06:23:48 +00:00
2008-06-02 17:40:37 +00:00
fs::path tmp = name;
if (tmp.is_complete())
{
2008-06-02 17:40:37 +00:00
name = tmp.leaf();
}
else if (tmp.has_branch_path())
{
fs::path p;
for (fs::path::iterator i = tmp.begin()
, end(tmp.end()); i != end; ++i)
{
if (*i == "." || *i == "..") continue;
p /= *i;
}
2008-06-02 17:40:37 +00:00
name = p.string();
}
2008-06-02 17:40:37 +00:00
if (name == ".." || name == ".")
{
error = "invalid 'name' of torrent (possible exploit attempt)";
return false;
}
2007-07-04 08:24:30 +00:00
// extract file list
2008-05-20 06:23:48 +00:00
lazy_entry const* i = info.dict_find_list("files");
2007-07-04 08:24:30 +00:00
if (i == 0)
{
// if there's no list of files, there has to be a length
// field.
file_entry e;
2008-06-02 17:40:37 +00:00
e.path = name;
2007-07-04 08:24:30 +00:00
e.offset = 0;
2008-05-20 06:23:48 +00:00
e.size = info.dict_find_int_value("length", -1);
if (e.size < 0)
{
error = "invalid length of torrent";
return false;
}
2008-06-02 17:40:37 +00:00
m_files.add_file(e);
2008-05-20 06:23:48 +00:00
m_multifile = false;
2007-07-04 08:24:30 +00:00
}
else
{
2008-06-02 17:40:37 +00:00
if (!extract_files(*i, m_files, name))
{
error = "failed to parse files from torrent file";
return false;
}
2007-07-04 08:24:30 +00:00
m_multifile = true;
}
2008-06-02 17:40:37 +00:00
m_files.set_name(name);
2007-07-04 08:24:30 +00:00
// extract sha-1 hashes for all pieces
// we want this division to round upwards, that's why we have the
// extra addition
2008-06-02 17:40:37 +00:00
m_files.set_num_pieces(int((m_files.total_size() + m_files.piece_length() - 1)
/ m_files.piece_length()));
2008-05-20 06:23:48 +00:00
lazy_entry const* pieces = info.dict_find("pieces");
if (pieces == 0 || pieces->type() != lazy_entry::string_t)
{
error = "invalid or missing 'pieces' entry in torrent file";
return false;
}
2008-06-02 17:40:37 +00:00
if (pieces->string_length() != m_files.num_pieces() * 20)
{
error = "incorrect number of piece hashes in torrent file";
return false;
}
2007-07-04 08:24:30 +00:00
2008-05-20 06:23:48 +00:00
m_piece_hashes = m_info_section.get() + (pieces->string_ptr() - section.first);
TORRENT_ASSERT(m_piece_hashes >= m_info_section.get());
TORRENT_ASSERT(m_piece_hashes < m_info_section.get() + m_info_section_size);
2007-07-04 08:24:30 +00:00
2008-05-20 06:23:48 +00:00
m_private = info.dict_find_int_value("private", 0);
return true;
2007-07-04 08:24:30 +00:00
}
2008-05-20 06:23:48 +00:00
bool torrent_info::parse_torrent_file(lazy_entry const& torrent_file, std::string& error)
2007-07-04 08:24:30 +00:00
{
2008-05-20 06:23:48 +00:00
if (torrent_file.type() != lazy_entry::dict_t)
{
error = "torrent file is not a dictionary";
return false;
}
2007-07-04 08:24:30 +00:00
// extract the url of the tracker
2008-05-20 06:23:48 +00:00
lazy_entry const* i = torrent_file.dict_find_list("announce-list");
if (i)
2007-07-04 08:24:30 +00:00
{
2008-05-20 06:23:48 +00:00
m_urls.reserve(i->list_size());
for (int j = 0, end(i->list_size()); j < end; ++j)
2007-07-04 08:24:30 +00:00
{
2008-05-20 06:23:48 +00:00
lazy_entry const* tier = i->list_at(j);
if (tier->type() != lazy_entry::list_t) continue;
for (int k = 0, end(tier->list_size()); k < end; ++k)
2007-07-04 08:24:30 +00:00
{
2008-05-20 06:23:48 +00:00
announce_entry e(tier->list_string_value_at(k));
if (e.url.empty()) continue;
e.tier = j;
2007-07-04 08:24:30 +00:00
m_urls.push_back(e);
}
}
// shuffle each tier
std::vector<announce_entry>::iterator start = m_urls.begin();
std::vector<announce_entry>::iterator stop;
int current_tier = m_urls.front().tier;
for (stop = m_urls.begin(); stop != m_urls.end(); ++stop)
{
if (stop->tier != current_tier)
{
std::random_shuffle(start, stop);
start = stop;
current_tier = stop->tier;
}
}
std::random_shuffle(start, stop);
}
2008-05-20 06:23:48 +00:00
if (m_urls.empty())
2007-07-04 08:24:30 +00:00
{
2008-05-20 06:23:48 +00:00
announce_entry e(torrent_file.dict_find_string_value("announce"));
if (!e.url.empty()) m_urls.push_back(e);
2007-07-04 08:24:30 +00:00
}
2008-05-20 06:23:48 +00:00
lazy_entry const* nodes = torrent_file.dict_find_list("nodes");
if (nodes)
2007-07-04 08:24:30 +00:00
{
2008-05-20 06:23:48 +00:00
for (int i = 0, end(nodes->list_size()); i < end; ++i)
2007-07-04 08:24:30 +00:00
{
2008-05-20 06:23:48 +00:00
lazy_entry const* n = nodes->list_at(i);
if (n->type() != lazy_entry::list_t
|| n->list_size() < 2
|| n->list_at(0)->type() != lazy_entry::string_t
|| n->list_at(1)->type() != lazy_entry::int_t)
continue;
m_nodes.push_back(std::make_pair(
n->list_at(0)->string_value()
, int(n->list_at(1)->int_value())));
2007-07-04 08:24:30 +00:00
}
}
// extract creation date
2008-05-20 06:23:48 +00:00
size_type cd = torrent_file.dict_find_int_value("creation date", -1);
if (cd >= 0)
2007-07-04 08:24:30 +00:00
{
m_creation_date = pt::ptime(gr::date(1970, gr::Jan, 1))
2008-05-20 06:23:48 +00:00
+ pt::seconds(long(cd));
2007-07-04 08:24:30 +00:00
}
// if there are any url-seeds, extract them
2008-05-20 06:23:48 +00:00
lazy_entry const* url_seeds = torrent_file.dict_find("url-list");
if (url_seeds && url_seeds->type() == lazy_entry::string_t)
2007-07-04 08:24:30 +00:00
{
2008-05-20 06:23:48 +00:00
m_url_seeds.push_back(url_seeds->string_value());
}
2008-05-20 06:23:48 +00:00
else if (url_seeds && url_seeds->type() == lazy_entry::list_t)
{
2008-05-20 06:23:48 +00:00
for (int i = 0, end(url_seeds->list_size()); i < end; ++i)
2007-07-04 08:24:30 +00:00
{
2008-05-20 06:23:48 +00:00
lazy_entry const* url = url_seeds->list_at(i);
if (url->type() != lazy_entry::string_t) continue;
m_url_seeds.push_back(url->string_value());
2007-07-04 08:24:30 +00:00
}
}
2008-05-20 06:23:48 +00:00
m_comment = torrent_file.dict_find_string_value("comment.utf-8");
if (m_comment.empty()) m_comment = torrent_file.dict_find_string_value("comment");
2007-07-04 08:24:30 +00:00
2008-05-20 06:23:48 +00:00
m_created_by = torrent_file.dict_find_string_value("created by.utf-8");
if (m_created_by.empty()) m_created_by = torrent_file.dict_find_string_value("created by");
2007-07-04 08:24:30 +00:00
2008-05-20 06:23:48 +00:00
lazy_entry const* info = torrent_file.dict_find_dict("info");
if (info == 0)
{
error = "missing or invalid 'info' section in torrent file";
return false;
}
return parse_info_section(*info, error);
2007-07-04 08:24:30 +00:00
}
boost::optional<pt::ptime>
torrent_info::creation_date() const
{
if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
{
return boost::optional<pt::ptime>(m_creation_date);
}
return boost::optional<pt::ptime>();
}
void torrent_info::add_tracker(std::string const& url, int tier)
{
announce_entry e(url);
e.tier = tier;
m_urls.push_back(e);
using boost::bind;
std::sort(m_urls.begin(), m_urls.end(), boost::bind<bool>(std::less<int>()
, bind(&announce_entry::tier, _1), bind(&announce_entry::tier, _2)));
}
// ------- start deprecation -------
void torrent_info::print(std::ostream& os) const
{
os << "trackers:\n";
for (std::vector<announce_entry>::const_iterator i = trackers().begin();
i != trackers().end(); ++i)
{
os << i->tier << ": " << i->url << "\n";
}
if (!m_comment.empty())
os << "comment: " << m_comment << "\n";
// if (m_creation_date != pt::ptime(gr::date(pt::not_a_date_time)))
// os << "creation date: " << to_simple_string(m_creation_date) << "\n";
os << "private: " << (m_private?"yes":"no") << "\n";
os << "number of pieces: " << num_pieces() << "\n";
os << "piece length: " << piece_length() << "\n";
os << "files:\n";
2008-06-02 17:40:37 +00:00
for (file_storage::iterator i = m_files.begin(); i != m_files.end(); ++i)
2007-07-04 08:24:30 +00:00
os << " " << std::setw(11) << i->size << " " << i->path.string() << "\n";
}
// ------- end deprecation -------
}