Use a better data structure for IndexSet

Switch to a chunked vector-of-vectors which makes mid-insertions on large sets
much faster, and cache the begin/end/count for each chunk to make lookups much
more cache-friendly.
This commit is contained in:
Thomas Goyne 2016-03-23 10:01:14 -07:00
parent f600487769
commit 7a75e2bae2
6 changed files with 617 additions and 175 deletions

View File

@ -382,9 +382,9 @@ public:
} }
if (o->kind == ColumnInfo::Kind::Remove) if (o->kind == ColumnInfo::Kind::Remove)
old_size += o->indices.size(); old_size += o->indices.count();
else if (o->kind == ColumnInfo::Kind::Insert) else if (o->kind == ColumnInfo::Kind::Insert)
old_size -= o->indices.size(); old_size -= o->indices.count();
o->indices.set(old_size); o->indices.set(old_size);

View File

@ -23,9 +23,247 @@
#include <algorithm> #include <algorithm>
using namespace realm; using namespace realm;
using namespace realm::_impl;
const size_t IndexSet::npos; const size_t IndexSet::npos;
template<typename T>
void MutableChunkedRangeVectorIterator<T>::set(size_t front, size_t back)
{
this->m_outer->count -= this->m_inner->second - this->m_inner->first;
if (this->offset() == 0) {
this->m_outer->begin = front;
}
if (this->m_inner == &this->m_outer->data.back()) {
this->m_outer->end = back;
}
this->m_outer->count += back - front;
this->m_inner->first = front;
this->m_inner->second = back;
}
template<typename T>
void MutableChunkedRangeVectorIterator<T>::adjust(ptrdiff_t front, ptrdiff_t back)
{
if (this->offset() == 0) {
this->m_outer->begin += front;
}
if (this->m_inner == &this->m_outer->data.back()) {
this->m_outer->end += back;
}
this->m_outer->count += -front + back;
this->m_inner->first += front;
this->m_inner->second += back;
}
template<typename T>
void MutableChunkedRangeVectorIterator<T>::shift(ptrdiff_t distance)
{
if (this->offset() == 0) {
this->m_outer->begin += distance;
}
if (this->m_inner == &this->m_outer->data.back()) {
this->m_outer->end += distance;
}
this->m_inner->first += distance;
this->m_inner->second += distance;
}
void ChunkedRangeVector::push_back(value_type value)
{
if (!empty() && m_data.back().data.size() < max_size) {
auto& range = m_data.back();
REALM_ASSERT(range.end <= value.first);
range.data.push_back(value);
range.count += value.second - value.first;
range.end = value.second;
}
else {
m_data.push_back({{std::move(value)}, value.first, value.second, value.second - value.first});
}
verify();
}
ChunkedRangeVector::iterator ChunkedRangeVector::insert(iterator pos, value_type value)
{
if (pos.m_outer == m_data.end()) {
push_back(std::move(value));
return std::prev(end());
}
pos = ensure_space(pos);
auto& chunk = *pos.m_outer;
pos.m_inner = &*chunk.data.insert(pos.m_outer->data.begin() + pos.offset(), value);
chunk.count += value.second - value.first;
chunk.begin = std::min(chunk.begin, value.first);
chunk.end = std::max(chunk.end, value.second);
verify();
return pos;
}
ChunkedRangeVector::iterator ChunkedRangeVector::ensure_space(iterator pos)
{
if (pos.m_outer->data.size() + 1 <= max_size)
return pos;
auto offset = pos.offset();
// Split the chunk in half to make space for the new insertion
auto new_pos = m_data.insert(pos.m_outer + 1, Chunk{});
auto prev = new_pos - 1;
auto to_move = max_size / 2;
new_pos->data.reserve(to_move);
new_pos->data.assign(prev->data.end() - to_move, prev->data.end());
prev->data.resize(prev->data.size() - to_move);
size_t moved_count = 0;
for (auto range : new_pos->data)
moved_count += range.second - range.first;
prev->end = prev->data.back().second;
prev->count -= moved_count;
new_pos->begin = new_pos->data.front().first;
new_pos->end = new_pos->data.back().second;
new_pos->count = moved_count;
if (offset >= to_move) {
pos.m_outer = new_pos;
offset -= to_move;
}
else {
pos.m_outer = prev;
}
pos.m_end = m_data.end();
pos.m_inner = &pos.m_outer->data[offset];
verify();
return pos;
}
ChunkedRangeVector::iterator ChunkedRangeVector::erase(iterator pos)
{
auto offset = pos.offset();
auto& chunk = *pos.m_outer;
chunk.count -= pos->second - pos->first;
chunk.data.erase(chunk.data.begin() + offset);
if (chunk.data.size() == 0) {
pos.m_outer = m_data.erase(pos.m_outer);
pos.m_end = m_data.end();
pos.m_inner = pos.m_outer == m_data.end() ? nullptr : &pos.m_outer->data.back();
verify();
return pos;
}
chunk.begin = chunk.data.front().first;
chunk.end = chunk.data.back().second;
if (offset < chunk.data.size())
pos.m_inner = &chunk.data[offset];
else {
++pos.m_outer;
pos.m_inner = pos.m_outer == pos.m_end ? nullptr : &pos.m_outer->data.front();
}
verify();
return pos;
}
void ChunkedRangeVector::verify() const noexcept
{
#ifdef REALM_DEBUG
size_t prev_end = -1;
for (auto range : *this) {
REALM_ASSERT(range.first < range.second);
REALM_ASSERT(prev_end == size_t(-1) || range.first > prev_end);
prev_end = range.second;
}
for (auto& chunk : m_data) {
REALM_ASSERT(!chunk.data.empty());
REALM_ASSERT(chunk.data.front().first == chunk.begin);
REALM_ASSERT(chunk.data.back().second == chunk.end);
REALM_ASSERT(chunk.count <= chunk.end - chunk.begin);
size_t count = 0;
for (auto range : chunk.data)
count += range.second - range.first;
REALM_ASSERT(count == chunk.count);
}
#endif
}
namespace {
class ChunkedRangeVectorBuilder {
public:
using value_type = std::pair<size_t, size_t>;
ChunkedRangeVectorBuilder(ChunkedRangeVector const& expected);
void push_back(size_t index);
void push_back(std::pair<size_t, size_t> range);
std::vector<ChunkedRangeVector::Chunk> finalize();
private:
std::vector<ChunkedRangeVector::Chunk> m_data;
size_t m_outer_pos = 0;
};
ChunkedRangeVectorBuilder::ChunkedRangeVectorBuilder(ChunkedRangeVector const& expected)
{
size_t size = 0;
for (auto const& chunk : expected.m_data)
size += chunk.data.size();
m_data.resize(size / ChunkedRangeVector::max_size + 1);
for (size_t i = 0; i < m_data.size() - 1; ++i)
m_data[i].data.reserve(ChunkedRangeVector::max_size);
}
void ChunkedRangeVectorBuilder::push_back(size_t index)
{
push_back({index, index + 1});
}
void ChunkedRangeVectorBuilder::push_back(std::pair<size_t, size_t> range)
{
auto& chunk = m_data[m_outer_pos];
if (chunk.data.empty()) {
chunk.data.push_back(range);
chunk.count = range.second - range.first;
chunk.begin = range.first;
}
else if (range.first == chunk.data.back().second) {
chunk.data.back().second = range.second;
chunk.count += range.second - range.first;
}
else if (chunk.data.size() < ChunkedRangeVector::max_size) {
chunk.data.push_back(range);
chunk.count += range.second - range.first;
}
else {
chunk.end = chunk.data.back().second;
++m_outer_pos;
if (m_outer_pos >= m_data.size())
m_data.push_back({{range}, range.first, 0, 1});
else {
auto& chunk = m_data[m_outer_pos];
chunk.data.push_back(range);
chunk.begin = range.first;
chunk.count = range.second - range.first;
}
}
}
std::vector<ChunkedRangeVector::Chunk> ChunkedRangeVectorBuilder::finalize()
{
if (!m_data.empty()) {
m_data.resize(m_outer_pos + 1);
if (m_data.back().data.empty())
m_data.pop_back();
else
m_data.back().end = m_data.back().data.back().second;
}
return std::move(m_data);
}
}
IndexSet::IndexSet(std::initializer_list<size_t> values) IndexSet::IndexSet(std::initializer_list<size_t> values)
{ {
for (size_t v : values) for (size_t v : values)
@ -35,24 +273,48 @@ IndexSet::IndexSet(std::initializer_list<size_t> values)
bool IndexSet::contains(size_t index) const bool IndexSet::contains(size_t index) const
{ {
auto it = const_cast<IndexSet*>(this)->find(index); auto it = const_cast<IndexSet*>(this)->find(index);
return it != m_ranges.end() && it->first <= index; return it != end() && it->first <= index;
} }
size_t IndexSet::count(size_t start_index, size_t end_index) const size_t IndexSet::count(size_t start_index, size_t end_index) const
{ {
auto it = const_cast<IndexSet*>(this)->find(start_index); auto it = const_cast<IndexSet*>(this)->find(start_index);
const auto end = m_ranges.end(); const auto end = this->end();
if (it == end || it->first >= end_index) { if (it == end || it->first >= end_index) {
return 0; return 0;
} }
if (it->second >= end_index) if (it->second >= end_index)
return std::min(it->second, end_index) - std::max(it->first, start_index); return std::min(it->second, end_index) - std::max(it->first, start_index);
// These checks are somewhat redundant, but this loop is hot so pulling instructions out of it helps size_t ret = 0;
size_t ret = it->second - std::max(it->first, start_index);
for (++it; it != end && it->second < end_index; ++it) { if (start_index > it->first || it.offset() != 0) {
// Start index is in the middle of a chunk, so start by counting the
// rest of that chunk
ret = it->second - std::max(it->first, start_index);
for (++it; it != end && it->second < end_index && it.offset() != 0; ++it) {
ret += it->second - it->first; ret += it->second - it->first;
} }
if (it != end && it->first < end_index && it.offset() != 0)
ret += end_index - it->first;
if (it == end || it->second >= end_index)
return ret;
}
// Now count all complete chunks that fall within the range
while (it != end && it.outer()->end <= end_index) {
REALM_ASSERT_DEBUG(it.offset() == 0);
ret += it.outer()->count;
it.next_chunk();
}
// Cound all complete ranges within the last chunk
while (it != end && it->second <= end_index) {
ret += it->second - it->first;
++it;
}
// And finally add in the partial last range
if (it != end && it->first < end_index) if (it != end && it->first < end_index)
ret += end_index - it->first; ret += end_index - it->first;
return ret; return ret;
@ -60,13 +322,25 @@ size_t IndexSet::count(size_t start_index, size_t end_index) const
IndexSet::iterator IndexSet::find(size_t index) IndexSet::iterator IndexSet::find(size_t index)
{ {
return find(index, m_ranges.begin()); return find(index, begin());
} }
IndexSet::iterator IndexSet::find(size_t index, iterator it) IndexSet::iterator IndexSet::find(size_t index, iterator begin)
{ {
return std::lower_bound(it, m_ranges.end(), std::make_pair(size_t(0), index + 1), auto it = std::find_if(begin.outer(), m_data.end(),
[&](auto const& a, auto const& b) { return a.second < b.second; }); [&](auto const& lft) { return lft.end > index; });
if (it == m_data.end())
return end();
if (index < it->begin)
return iterator(it, m_data.end(), &it->data[0]);
auto inner_begin = it->data.begin();
if (it == begin.outer())
inner_begin += begin.offset();
auto inner = std::lower_bound(inner_begin, it->data.end(), index,
[&](auto const& lft, auto) { return lft.second <= index; });
REALM_ASSERT_DEBUG(inner != it->data.end());
return iterator(it, m_data.end(), &*inner);
} }
void IndexSet::add(size_t index) void IndexSet::add(size_t index)
@ -76,7 +350,7 @@ void IndexSet::add(size_t index)
void IndexSet::add(IndexSet const& other) void IndexSet::add(IndexSet const& other)
{ {
auto it = m_ranges.begin(); auto it = begin();
for (size_t index : other.as_indexes()) { for (size_t index : other.as_indexes()) {
it = do_add(find(index, it), index); it = do_add(find(index, it), index);
} }
@ -84,10 +358,16 @@ void IndexSet::add(IndexSet const& other)
size_t IndexSet::add_shifted(size_t index) size_t IndexSet::add_shifted(size_t index)
{ {
auto it = m_ranges.begin(); iterator it = begin(), end = this->end();
for (auto end = m_ranges.end(); it != end && it->first <= index; ++it) {
// Shift for any complete chunks before the target
for (; it != end && it.outer()->end <= index; it.next_chunk())
index += it.outer()->count;
// And any ranges within the last partial chunk
for (; it != end && it->first <= index; ++it)
index += it->second - it->first; index += it->second - it->first;
}
do_add(it, index); do_add(it, index);
return index; return index;
} }
@ -105,11 +385,10 @@ void IndexSet::add_shifted_by(IndexSet const& shifted_by, IndexSet const& values
} }
#endif #endif
auto old_ranges = move(m_ranges); ChunkedRangeVectorBuilder builder(*this);
m_ranges.reserve(std::max(old_ranges.size(), values.size()));
auto old_it = old_ranges.cbegin(), old_end = old_ranges.cend(); auto old_it = cbegin(), old_end = cend();
auto shift_it = shifted_by.m_ranges.cbegin(), shift_end = shifted_by.m_ranges.cend(); auto shift_it = shifted_by.cbegin(), shift_end = shifted_by.cend();
size_t skip_until = 0; size_t skip_until = 0;
size_t old_shift = 0; size_t old_shift = 0;
@ -124,30 +403,25 @@ void IndexSet::add_shifted_by(IndexSet const& shifted_by, IndexSet const& values
for (; old_it != old_end && old_it->first <= index - new_shift + old_shift; ++old_it) { for (; old_it != old_end && old_it->first <= index - new_shift + old_shift; ++old_it) {
for (size_t i = old_it->first; i < old_it->second; ++i) for (size_t i = old_it->first; i < old_it->second; ++i)
add_back(i); builder.push_back(i);
old_shift += old_it->second - old_it->first; old_shift += old_it->second - old_it->first;
} }
REALM_ASSERT(index >= new_shift); REALM_ASSERT(index >= new_shift);
add_back(index - new_shift + old_shift); builder.push_back(index - new_shift + old_shift);
} }
if (old_it != old_end) { copy(old_it, old_end, std::back_inserter(builder));
if (!empty() && old_it->first == m_ranges.back().second) { m_data = builder.finalize();
m_ranges.back().second = old_it->second;
++old_it;
}
copy(old_it, old_end, back_inserter(m_ranges));
}
REALM_ASSERT_DEBUG((size_t)std::distance(as_indexes().begin(), as_indexes().end()) == expected); REALM_ASSERT_DEBUG((size_t)std::distance(as_indexes().begin(), as_indexes().end()) == expected);
} }
void IndexSet::set(size_t len) void IndexSet::set(size_t len)
{ {
m_ranges.clear(); clear();
if (len) { if (len) {
m_ranges.push_back({0, len}); push_back({0, len});
} }
} }
@ -156,22 +430,25 @@ void IndexSet::insert_at(size_t index, size_t count)
REALM_ASSERT(count > 0); REALM_ASSERT(count > 0);
auto pos = find(index); auto pos = find(index);
auto end = this->end();
bool in_existing = false; bool in_existing = false;
if (pos != m_ranges.end()) { if (pos != end) {
if (pos->first <= index) if (pos->first <= index) {
in_existing = true; in_existing = true;
else pos.adjust(0, count);
pos->first += count;
pos->second += count;
for (auto it = pos + 1; it != m_ranges.end(); ++it) {
it->first += count;
it->second += count;
} }
else {
pos.shift(count);
}
for (auto it = std::next(pos); it != end; ++it)
it.shift(count);
} }
if (!in_existing) { if (!in_existing) {
for (size_t i = 0; i < count; ++i) for (size_t i = 0; i < count; ++i)
pos = do_add(pos, index + i) + 1; pos = std::next(do_add(pos, index + i));
} }
verify();
} }
void IndexSet::insert_at(IndexSet const& positions) void IndexSet::insert_at(IndexSet const& positions)
@ -179,30 +456,30 @@ void IndexSet::insert_at(IndexSet const& positions)
if (positions.empty()) if (positions.empty())
return; return;
if (empty()) { if (empty()) {
m_ranges = positions.m_ranges; *this = positions;
return; return;
} }
auto old_ranges = move(m_ranges); IndexIterator begin1 = cbegin(), begin2 = positions.cbegin();
m_ranges.reserve(std::max(m_ranges.size(), positions.m_ranges.size())); IndexIterator end1 = cend(), end2 = positions.cend();
IndexIterator begin1 = old_ranges.cbegin(), begin2 = positions.m_ranges.cbegin();
IndexIterator end1 = old_ranges.cend(), end2 = positions.m_ranges.cend();
ChunkedRangeVectorBuilder builder(*this);
size_t shift = 0; size_t shift = 0;
while (begin1 != end1 && begin2 != end2) { while (begin1 != end1 && begin2 != end2) {
if (*begin1 + shift < *begin2) { if (*begin1 + shift < *begin2) {
add_back(*begin1++ + shift); builder.push_back(*begin1++ + shift);
} }
else { else {
++shift; ++shift;
add_back(*begin2++); builder.push_back(*begin2++);
} }
} }
for (; begin1 != end1; ++begin1) for (; begin1 != end1; ++begin1)
add_back(*begin1 + shift); builder.push_back(*begin1 + shift);
for (; begin2 != end2; ++begin2) for (; begin2 != end2; ++begin2)
add_back(*begin2); builder.push_back(*begin2);
m_data = builder.finalize();
} }
void IndexSet::shift_for_insert_at(size_t index, size_t count) void IndexSet::shift_for_insert_at(size_t index, size_t count)
@ -210,58 +487,53 @@ void IndexSet::shift_for_insert_at(size_t index, size_t count)
REALM_ASSERT(count > 0); REALM_ASSERT(count > 0);
auto it = find(index); auto it = find(index);
if (it == m_ranges.end()) if (it == end())
return; return;
if (it->first < index) { for (auto pos = it, end = this->end(); pos != end; ++pos)
// split the range so that we can exclude `index` pos.shift(count);
auto old_second = it->second;
it->second = index;
it = m_ranges.insert(it + 1, {index, old_second});
}
for (; it != m_ranges.end(); ++it) { // If the range contained the insertion point, split the range and move
it->first += count; // the part of it before the insertion point back
it->second += count; if (it->first < index + count) {
auto old_second = it->second;
it.set(it->first - count, index);
insert(std::next(it), {index + count, old_second});
} }
verify();
} }
void IndexSet::shift_for_insert_at(realm::IndexSet const& values) void IndexSet::shift_for_insert_at(realm::IndexSet const& values)
{ {
if (values.empty()) if (empty() || values.empty())
return;
if (values.m_data.front().begin >= m_data.back().end)
return; return;
IndexIterator begin1 = cbegin(), begin2 = values.cbegin();
IndexIterator end1 = cend(), end2 = values.cend();
ChunkedRangeVectorBuilder builder(*this);
size_t shift = 0; size_t shift = 0;
auto it = find(values.begin()->first); while (begin1 != end1 && begin2 != end2) {
for (auto range : values) { if (*begin1 + shift < *begin2) {
for (; it != m_ranges.end() && it->second + shift <= range.first; ++it) { builder.push_back(*begin1++ + shift);
it->first += shift;
it->second += shift;
} }
if (it == m_ranges.end()) else {
return; ++shift;
begin2++;
}
}
for (; begin1 != end1; ++begin1)
builder.push_back(*begin1 + shift);
if (it->first + shift < range.first) { m_data = builder.finalize();
// split the range so that we can exclude `index`
auto old_second = it->second;
it->first += shift;
it->second = range.first;
it = m_ranges.insert(it + 1, {range.first - shift, old_second});
}
shift += range.second - range.first;
}
for (; it != m_ranges.end(); ++it) {
it->first += shift;
it->second += shift;
}
} }
void IndexSet::erase_at(size_t index) void IndexSet::erase_at(size_t index)
{ {
auto it = find(index); auto it = find(index);
if (it != m_ranges.end()) if (it != end())
do_erase(it, index); do_erase(it, index);
} }
@ -270,16 +542,15 @@ void IndexSet::erase_at(IndexSet const& positions)
if (empty() || positions.empty()) if (empty() || positions.empty())
return; return;
auto old_ranges = move(m_ranges); ChunkedRangeVectorBuilder builder(*this);
m_ranges.reserve(std::max(m_ranges.size(), positions.m_ranges.size()));
IndexIterator begin1 = old_ranges.cbegin(), begin2 = positions.m_ranges.cbegin(); IndexIterator begin1 = cbegin(), begin2 = positions.cbegin();
IndexIterator end1 = old_ranges.cend(), end2 = positions.m_ranges.cend(); IndexIterator end1 = cend(), end2 = positions.cend();
size_t shift = 0; size_t shift = 0;
while (begin1 != end1 && begin2 != end2) { while (begin1 != end1 && begin2 != end2) {
if (*begin1 < *begin2) { if (*begin1 < *begin2) {
add_back(*begin1++ - shift); builder.push_back(*begin1++ - shift);
} }
else if (*begin1 == *begin2) { else if (*begin1 == *begin2) {
++shift; ++shift;
@ -292,16 +563,24 @@ void IndexSet::erase_at(IndexSet const& positions)
} }
} }
for (; begin1 != end1; ++begin1) for (; begin1 != end1; ++begin1)
add_back(*begin1 - shift); builder.push_back(*begin1 - shift);
m_data = builder.finalize();
} }
size_t IndexSet::erase_or_unshift(size_t index) size_t IndexSet::erase_or_unshift(size_t index)
{ {
auto shifted = index; auto shifted = index;
auto it = m_ranges.begin(), end = m_ranges.end(); iterator it = begin(), end = this->end();
for (; it != end && it->second <= index; ++it) {
// Shift for any complete chunks before the target
for (; it != end && it.outer()->end <= index; it.next_chunk())
shifted -= it.outer()->count;
// And any ranges within the last partial chunk
for (; it != end && it->second <= index; ++it)
shifted -= it->second - it->first; shifted -= it->second - it->first;
}
if (it == end) if (it == end)
return shifted; return shifted;
@ -316,45 +595,43 @@ size_t IndexSet::erase_or_unshift(size_t index)
void IndexSet::do_erase(iterator it, size_t index) void IndexSet::do_erase(iterator it, size_t index)
{ {
if (it->first <= index) { if (it->first <= index) {
--it->second; if (it->first + 1 == it->second) {
if (it->first == it->second) { it = erase(it);
it = m_ranges.erase(it);
} }
else { else {
it.adjust(0, -1);
++it; ++it;
} }
} }
else if (it != m_ranges.begin() && (it - 1)->second + 1 == it->first) { else if (it != begin() && std::prev(it)->second + 1 == it->first) {
(it - 1)->second = it->second - 1; std::prev(it).adjust(0, it->second - it->first);
it = m_ranges.erase(it); it = erase(it);
} }
for (; it != m_ranges.end(); ++it) { for (; it != end(); ++it)
--it->first; it.shift(-1);
--it->second;
}
} }
IndexSet::iterator IndexSet::do_remove(iterator it, size_t begin, size_t end) IndexSet::iterator IndexSet::do_remove(iterator it, size_t begin, size_t end)
{ {
for (it = find(begin, it); it != m_ranges.end() && it->first < end; it = find(begin, it)) { for (it = find(begin, it); it != this->end() && it->first < end; it = find(begin, it)) {
// Trim off any part of the range to remove that's before the matching range // Trim off any part of the range to remove that's before the matching range
begin = std::max(it->first, begin); begin = std::max(it->first, begin);
// If the matching range extends to both sides of the range to remove, // If the matching range extends to both sides of the range to remove,
// split it on the range to remove // split it on the range to remove
if (it->first < begin && it->second > end) { if (it->first < begin && it->second > end) {
it = m_ranges.insert(it + 1, {end, it->second}) - 1; auto old_second = it->second;
it->second = begin; it.set(it->first, begin);
it = std::prev(insert(std::next(it), {end, old_second}));
} }
// Range to delete now coverages (at least) one end of the matching range // Range to delete now coverages (at least) one end of the matching range
if (begin == it->first && end >= it->second) else if (begin == it->first && end >= it->second)
it = m_ranges.erase(it); it = erase(it);
else if (begin == it->first) else if (begin == it->first)
it->first = end; it.set(end, it->second);
else else
it->second = begin; it.set(it->first, begin);
} }
return it; return it;
} }
@ -366,17 +643,18 @@ void IndexSet::remove(size_t index, size_t count)
void IndexSet::remove(realm::IndexSet const& values) void IndexSet::remove(realm::IndexSet const& values)
{ {
auto it = m_ranges.begin(); auto it = begin();
for (auto range : values) { for (auto range : values) {
it = do_remove(it, range.first, range.second); it = do_remove(it, range.first, range.second);
if (it == m_ranges.end()) if (it == end())
return; return;
} }
} }
size_t IndexSet::shift(size_t index) const size_t IndexSet::shift(size_t index) const
{ {
for (auto range : m_ranges) { // FIXME: optimize
for (auto range : *this) {
if (range.first > index) if (range.first > index)
break; break;
index += range.second - range.first; index += range.second - range.first;
@ -392,59 +670,36 @@ size_t IndexSet::unshift(size_t index) const
void IndexSet::clear() void IndexSet::clear()
{ {
m_ranges.clear(); m_data.clear();
}
void IndexSet::add_back(size_t index)
{
if (m_ranges.empty())
m_ranges.push_back({index, index + 1});
else if (m_ranges.back().second == index)
++m_ranges.back().second;
else {
REALM_ASSERT_DEBUG(m_ranges.back().second < index);
m_ranges.push_back({index, index + 1});
}
} }
IndexSet::iterator IndexSet::do_add(iterator it, size_t index) IndexSet::iterator IndexSet::do_add(iterator it, size_t index)
{ {
verify(); verify();
bool more_before = it != m_ranges.begin(), valid = it != m_ranges.end(); bool more_before = it != begin(), valid = it != end();
REALM_ASSERT(!more_before || index >= (it - 1)->second); REALM_ASSERT(!more_before || index >= std::prev(it)->second);
if (valid && it->first <= index && it->second > index) { if (valid && it->first <= index && it->second > index) {
// index is already in set // index is already in set
return it; return it;
} }
if (more_before && (it - 1)->second == index) { if (more_before && std::prev(it)->second == index) {
auto prev = std::prev(it);
// index is immediately after an existing range // index is immediately after an existing range
++(it - 1)->second; prev.adjust(0, 1);
if (valid && (it - 1)->second == it->first) { if (valid && prev->second == it->first) {
// index joins two existing ranges // index joins two existing ranges
(it - 1)->second = it->second; prev.adjust(0, it->second - it->first);
return m_ranges.erase(it) - 1; return std::prev(erase(it));
} }
return it - 1; return prev;
} }
if (valid && it->first == index + 1) { if (valid && it->first == index + 1) {
// index is immediately before an existing range // index is immediately before an existing range
--it->first; it.adjust(-1, 0);
return it; return it;
} }
// index is not next to an existing range // index is not next to an existing range
return m_ranges.insert(it, {index, index + 1}); return insert(it, {index, index + 1});
}
void IndexSet::verify() const noexcept
{
#ifdef REALM_DEBUG
size_t prev_end = -1;
for (auto range : m_ranges) {
REALM_ASSERT(range.first < range.second);
REALM_ASSERT(prev_end == size_t(-1) || range.first > prev_end);
prev_end = range.second;
}
#endif
} }

View File

@ -25,18 +25,115 @@
#include <stddef.h> #include <stddef.h>
namespace realm { namespace realm {
class IndexSet { class IndexSet;
namespace _impl {
template<typename OuterIterator>
class MutableChunkedRangeVectorIterator;
// An iterator for ChunkedRangeVector, templated on the vector iterator/const_iterator
template<typename OuterIterator>
class ChunkedRangeVectorIterator {
public:
using iterator_category = std::bidirectional_iterator_tag;
using value_type = typename std::remove_reference<decltype(*OuterIterator()->data.begin())>::type;
using difference_type = ptrdiff_t;
using pointer = const value_type*;
using reference = const value_type&;
ChunkedRangeVectorIterator(OuterIterator outer, OuterIterator end, value_type* inner)
: m_outer(outer), m_end(end), m_inner(inner) { }
reference operator*() const { return *m_inner; }
pointer operator->() const { return m_inner; }
template<typename Other> bool operator==(Other const& it) const;
template<typename Other> bool operator!=(Other const& it) const;
ChunkedRangeVectorIterator& operator++();
ChunkedRangeVectorIterator operator++(int);
ChunkedRangeVectorIterator& operator--();
ChunkedRangeVectorIterator operator--(int);
// Advance directly to the next outer block
void next_chunk();
OuterIterator outer() const { return m_outer; }
size_t offset() const { return m_inner - &m_outer->data[0]; }
private:
OuterIterator m_outer;
OuterIterator m_end;
value_type* m_inner;
friend struct ChunkedRangeVector;
friend class MutableChunkedRangeVectorIterator<OuterIterator>;
};
// A mutable iterator that adds some invariant-preserving mutation methods
template<typename OuterIterator>
class MutableChunkedRangeVectorIterator : public ChunkedRangeVectorIterator<OuterIterator> {
public:
using ChunkedRangeVectorIterator<OuterIterator>::ChunkedRangeVectorIterator;
// Set this iterator to the given range and update the parent if needed
void set(size_t begin, size_t end);
// Adjust the begin and end of this iterator by the given amounts and
// update the parent if needed
void adjust(ptrdiff_t front, ptrdiff_t back);
// Shift this iterator by the given amount and update the parent if needed
void shift(ptrdiff_t distance);
};
// A vector which stores ranges in chunks with a maximum size
struct ChunkedRangeVector {
struct Chunk {
std::vector<std::pair<size_t, size_t>> data;
size_t begin;
size_t end;
size_t count;
};
std::vector<Chunk> m_data;
using value_type = std::pair<size_t, size_t>;
using iterator = MutableChunkedRangeVectorIterator<typename decltype(m_data)::iterator>;
using const_iterator = ChunkedRangeVectorIterator<typename decltype(m_data)::const_iterator>;
#ifdef REALM_DEBUG
static const size_t max_size = 4;
#else
static const size_t max_size = 4096 / sizeof(std::pair<size_t, size_t>);
#endif
iterator begin() { return empty() ? end() : iterator(m_data.begin(), m_data.end(), &m_data[0].data[0]); }
iterator end() { return iterator(m_data.end(), m_data.end(), nullptr); }
const_iterator begin() const { return cbegin(); }
const_iterator end() const { return cend(); }
const_iterator cbegin() const { return empty() ? cend() : const_iterator(m_data.cbegin(), m_data.end(), &m_data[0].data[0]); }
const_iterator cend() const { return const_iterator(m_data.end(), m_data.end(), nullptr); }
bool empty() const noexcept { return m_data.empty(); }
iterator insert(iterator pos, value_type value);
iterator erase(iterator pos);
void push_back(value_type value);
iterator ensure_space(iterator pos);
void verify() const noexcept;
};
} // namespace _impl
class IndexSet : private _impl::ChunkedRangeVector {
public: public:
static const size_t npos = -1; static const size_t npos = -1;
using value_type = std::pair<size_t, size_t>; using ChunkedRangeVector::value_type;
using iterator = std::vector<value_type>::iterator; using ChunkedRangeVector::iterator;
using const_iterator = std::vector<value_type>::const_iterator; using ChunkedRangeVector::const_iterator;
using ChunkedRangeVector::begin;
const_iterator begin() const { return m_ranges.begin(); } using ChunkedRangeVector::end;
const_iterator end() const { return m_ranges.end(); } using ChunkedRangeVector::empty;
bool empty() const { return m_ranges.empty(); } using ChunkedRangeVector::verify;
size_t size() const { return m_ranges.size(); }
IndexSet() = default; IndexSet() = default;
IndexSet(std::initializer_list<size_t>); IndexSet(std::initializer_list<size_t>);
@ -45,7 +142,7 @@ public:
bool contains(size_t index) const; bool contains(size_t index) const;
// Counts the number of indices in the set in the given range // Counts the number of indices in the set in the given range
size_t count(size_t start_index, size_t end_index) const; size_t count(size_t start_index=0, size_t end_index=-1) const;
// Add an index to the set, doing nothing if it's already present // Add an index to the set, doing nothing if it's already present
void add(size_t index); void add(size_t index);
@ -91,8 +188,6 @@ public:
// Remove all indexes from the set // Remove all indexes from the set
void clear(); void clear();
void verify() const noexcept;
// An iterator over the indivual indices in the set rather than the ranges // An iterator over the indivual indices in the set rather than the ranges
class IndexIterator : public std::iterator<std::forward_iterator_tag, size_t> { class IndexIterator : public std::iterator<std::forward_iterator_tag, size_t> {
public: public:
@ -140,8 +235,6 @@ public:
IndexIteratableAdaptor as_indexes() const { return *this; } IndexIteratableAdaptor as_indexes() const { return *this; }
private: private:
std::vector<value_type> m_ranges;
// Find the range which contains the index, or the first one after it if // Find the range which contains the index, or the first one after it if
// none do // none do
iterator find(size_t index); iterator find(size_t index);
@ -153,8 +246,7 @@ private:
void do_erase(iterator it, size_t index); void do_erase(iterator it, size_t index);
iterator do_remove(iterator it, size_t index, size_t count); iterator do_remove(iterator it, size_t index, size_t count);
// Add an index which must be greater than the largest index in the set void shift_until_end_by(iterator begin, ptrdiff_t shift);
void add_back(size_t index);
}; };
namespace util { namespace util {
@ -166,6 +258,68 @@ std::reverse_iterator<Iterator> make_reverse_iterator(Iterator it)
} }
} // namespace util } // namespace util
namespace _impl {
template<typename T>
template<typename OtherIterator>
inline bool ChunkedRangeVectorIterator<T>::operator==(OtherIterator const& it) const
{
return m_outer == it.outer() && m_inner == it.operator->();
}
template<typename T>
template<typename OtherIterator>
inline bool ChunkedRangeVectorIterator<T>::operator!=(OtherIterator const& it) const
{
return !(*this == it);
}
template<typename T>
inline ChunkedRangeVectorIterator<T>& ChunkedRangeVectorIterator<T>::operator++()
{
++m_inner;
if (offset() == m_outer->data.size())
next_chunk();
return *this;
}
template<typename T>
inline ChunkedRangeVectorIterator<T> ChunkedRangeVectorIterator<T>::operator++(int)
{
auto value = *this;
++*this;
return value;
}
template<typename T>
inline ChunkedRangeVectorIterator<T>& ChunkedRangeVectorIterator<T>::operator--()
{
if (!m_inner || m_inner == &m_outer->data.front()) {
--m_outer;
m_inner = &m_outer->data.back();
}
else {
--m_inner;
}
return *this;
}
template<typename T>
inline ChunkedRangeVectorIterator<T> ChunkedRangeVectorIterator<T>::operator--(int)
{
auto value = *this;
--*this;
return value;
}
template<typename T>
inline void ChunkedRangeVectorIterator<T>::next_chunk()
{
++m_outer;
m_inner = m_outer != m_end ? &m_outer->data[0] : nullptr;
}
} // namespace _impl
} // namespace realm } // namespace realm
#endif // REALM_INDEX_SET_HPP #endif // REALM_INDEX_SET_HPP

View File

@ -233,7 +233,8 @@ TEST_CASE("[collection_change] clear()") {
SECTION("sets deletions SIZE_T_MAX if that if the given previous size") { SECTION("sets deletions SIZE_T_MAX if that if the given previous size") {
c.insertions = {1, 2, 3}; c.insertions = {1, 2, 3};
c.clear(std::numeric_limits<size_t>::max()); c.clear(std::numeric_limits<size_t>::max());
REQUIRE(c.deletions.size() == 1); REQUIRE(!c.deletions.empty());
REQUIRE(++c.deletions.begin() == c.deletions.end());
REQUIRE(c.deletions.begin()->first == 0); REQUIRE(c.deletions.begin()->first == 0);
REQUIRE(c.deletions.begin()->second == std::numeric_limits<size_t>::max()); REQUIRE(c.deletions.begin()->second == std::numeric_limits<size_t>::max());
} }

View File

@ -57,6 +57,20 @@ TEST_CASE("[index_set] count()") {
realm::IndexSet set = {1, 2, 3, 5, 6, 7, 8, 9}; realm::IndexSet set = {1, 2, 3, 5, 6, 7, 8, 9};
REQUIRE(set.count(3, 9) == 5); REQUIRE(set.count(3, 9) == 5);
} }
SECTION("handles full chunks well") {
size_t count = realm::_impl::ChunkedRangeVector::max_size * 4;
realm::IndexSet set;
for (size_t i = 0; i < count; ++i) {
set.add(i * 3);
set.add(i * 3 + 1);
}
for (size_t i = 0; i < count * 3; ++i) {
REQUIRE(set.count(i) == 2 * count - (i + 1) * 2 / 3);
REQUIRE(set.count(0, i) == (i + 1) / 3 + (i + 2) / 3);
}
}
} }
TEST_CASE("[index_set] add()") { TEST_CASE("[index_set] add()") {
@ -100,6 +114,12 @@ TEST_CASE("[index_set] add()") {
set.add({1, 4, 5}); set.add({1, 4, 5});
REQUIRE_INDICES(set, 0, 1, 2, 4, 5, 6); REQUIRE_INDICES(set, 0, 1, 2, 4, 5, 6);
} }
SECTION("handles front additions of ranges") {
for (size_t i = 20; i > 0; i -= 2)
set.add(i);
REQUIRE_INDICES(set, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20);
}
} }
TEST_CASE("[index_set] add_shifted()") { TEST_CASE("[index_set] add_shifted()") {
@ -172,14 +192,14 @@ TEST_CASE("[index_set] add_shifted_by()") {
SECTION("adds the indices shifted by the old count when they are all after the old indices and the shifted-by set is empty") { SECTION("adds the indices shifted by the old count when they are all after the old indices and the shifted-by set is empty") {
set = {5, 6}; set = {5, 6};
set.add_shifted_by({}, {7, 8}); set.add_shifted_by({}, {7, 9, 11, 13});
REQUIRE_INDICES(set, 5, 6, 9, 10); REQUIRE_INDICES(set, 5, 6, 9, 11, 13, 15);
} }
SECTION("acts like bulk add_shifted() when shifted_by is empty") { SECTION("acts like bulk add_shifted() when shifted_by is empty") {
set = {5, 10}; set = {5, 10, 15, 20, 25};
set.add_shifted_by({}, {4, 5, 11}); set.add_shifted_by({}, {4, 5, 11});
REQUIRE_INDICES(set, 4, 5, 6, 10, 13); REQUIRE_INDICES(set, 4, 5, 6, 10, 13, 15, 20, 25);
} }
SECTION("shifts indices in values back by the number of indices in shifted_by before them") { SECTION("shifts indices in values back by the number of indices in shifted_by before them") {
@ -288,6 +308,18 @@ TEST_CASE("[index_set] shift_for_insert_at()") {
REQUIRE(set.empty()); REQUIRE(set.empty());
} }
SECTION("does nothing when the insertion points are all after the current indices") {
set = {10, 20};
set.shift_for_insert_at({30, 40});
REQUIRE_INDICES(set, 10, 20);
}
SECTION("does shift when the insertion points are all before the current indices") {
set = {10, 20};
set.shift_for_insert_at({2, 4});
REQUIRE_INDICES(set, 12, 22);
}
SECTION("shifts indices at or after the insertion points") { SECTION("shifts indices at or after the insertion points") {
set = {5}; set = {5};

View File

@ -366,7 +366,7 @@ TEST_CASE("Transaction log parsing") {
auto info = track_changes({false, false, false}, [&] { auto info = track_changes({false, false, false}, [&] {
table.set_int(0, 1, 2); table.set_int(0, 1, 2);
}); });
REQUIRE(info.tables.size() == 0); REQUIRE(info.tables.empty());
} }
SECTION("new row additions are reported") { SECTION("new row additions are reported") {
@ -678,7 +678,7 @@ TEST_CASE("Transaction log parsing") {
lv->remove(5); lv->remove(5);
} }
REQUIRE_INDICES(changes.deletions, 5); REQUIRE_INDICES(changes.deletions, 5);
REQUIRE(changes.modifications.size() == 0); REQUIRE(changes.modifications.empty());
VALIDATE_CHANGES(changes) { VALIDATE_CHANGES(changes) {
lv->set(5, 0); lv->set(5, 0);
@ -693,7 +693,7 @@ TEST_CASE("Transaction log parsing") {
lv->remove(4); lv->remove(4);
} }
REQUIRE_INDICES(changes.deletions, 4, 5); REQUIRE_INDICES(changes.deletions, 4, 5);
REQUIRE(changes.modifications.size() == 0); REQUIRE(changes.modifications.empty());
} }
SECTION("erase -> set") { SECTION("erase -> set") {
@ -711,7 +711,7 @@ TEST_CASE("Transaction log parsing") {
lv->clear(); lv->clear();
} }
REQUIRE_INDICES(changes.deletions, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); REQUIRE_INDICES(changes.deletions, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
REQUIRE(changes.insertions.size() == 0); REQUIRE(changes.insertions.empty());
} }
SECTION("set -> clear") { SECTION("set -> clear") {
@ -720,7 +720,7 @@ TEST_CASE("Transaction log parsing") {
lv->clear(); lv->clear();
} }
REQUIRE_INDICES(changes.deletions, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); REQUIRE_INDICES(changes.deletions, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9);
REQUIRE(changes.modifications.size() == 0); REQUIRE(changes.modifications.empty());
} }
SECTION("clear -> insert") { SECTION("clear -> insert") {
@ -737,8 +737,8 @@ TEST_CASE("Transaction log parsing") {
lv->add(0); lv->add(0);
lv->remove(10); lv->remove(10);
} }
REQUIRE(changes.insertions.size() == 0); REQUIRE(changes.insertions.empty());
REQUIRE(changes.deletions.size() == 0); REQUIRE(changes.deletions.empty());
VALIDATE_CHANGES(changes) { VALIDATE_CHANGES(changes) {
lv->add(0); lv->add(0);