From 7a75e2bae297ab025bd06643dd33c7fef806bd0f Mon Sep 17 00:00:00 2001 From: Thomas Goyne Date: Wed, 23 Mar 2016 10:01:14 -0700 Subject: [PATCH] Use a better data structure for IndexSet Switch to a chunked vector-of-vectors which makes mid-insertions on large sets much faster, and cache the begin/end/count for each chunk to make lookups much more cache-friendly. --- src/impl/transact_log_handler.cpp | 4 +- src/index_set.cpp | 545 ++++++++++++++++++++-------- src/index_set.hpp | 186 +++++++++- tests/collection_change_indices.cpp | 3 +- tests/index_set.cpp | 40 +- tests/transaction_log_parsing.cpp | 14 +- 6 files changed, 617 insertions(+), 175 deletions(-) diff --git a/src/impl/transact_log_handler.cpp b/src/impl/transact_log_handler.cpp index 87c18c25..19af17eb 100644 --- a/src/impl/transact_log_handler.cpp +++ b/src/impl/transact_log_handler.cpp @@ -382,9 +382,9 @@ public: } if (o->kind == ColumnInfo::Kind::Remove) - old_size += o->indices.size(); + old_size += o->indices.count(); else if (o->kind == ColumnInfo::Kind::Insert) - old_size -= o->indices.size(); + old_size -= o->indices.count(); o->indices.set(old_size); diff --git a/src/index_set.cpp b/src/index_set.cpp index 4edfe084..b9b9525c 100644 --- a/src/index_set.cpp +++ b/src/index_set.cpp @@ -23,9 +23,247 @@ #include using namespace realm; +using namespace realm::_impl; const size_t IndexSet::npos; +template +void MutableChunkedRangeVectorIterator::set(size_t front, size_t back) +{ + this->m_outer->count -= this->m_inner->second - this->m_inner->first; + if (this->offset() == 0) { + this->m_outer->begin = front; + } + if (this->m_inner == &this->m_outer->data.back()) { + this->m_outer->end = back; + } + this->m_outer->count += back - front; + this->m_inner->first = front; + this->m_inner->second = back; +} + +template +void MutableChunkedRangeVectorIterator::adjust(ptrdiff_t front, ptrdiff_t back) +{ + if (this->offset() == 0) { + this->m_outer->begin += front; + } + if (this->m_inner == &this->m_outer->data.back()) { + this->m_outer->end += back; + } + this->m_outer->count += -front + back; + this->m_inner->first += front; + this->m_inner->second += back; +} + +template +void MutableChunkedRangeVectorIterator::shift(ptrdiff_t distance) +{ + if (this->offset() == 0) { + this->m_outer->begin += distance; + } + if (this->m_inner == &this->m_outer->data.back()) { + this->m_outer->end += distance; + } + this->m_inner->first += distance; + this->m_inner->second += distance; +} + +void ChunkedRangeVector::push_back(value_type value) +{ + if (!empty() && m_data.back().data.size() < max_size) { + auto& range = m_data.back(); + REALM_ASSERT(range.end <= value.first); + + range.data.push_back(value); + range.count += value.second - value.first; + range.end = value.second; + } + else { + m_data.push_back({{std::move(value)}, value.first, value.second, value.second - value.first}); + } + verify(); +} + +ChunkedRangeVector::iterator ChunkedRangeVector::insert(iterator pos, value_type value) +{ + if (pos.m_outer == m_data.end()) { + push_back(std::move(value)); + return std::prev(end()); + } + + pos = ensure_space(pos); + auto& chunk = *pos.m_outer; + pos.m_inner = &*chunk.data.insert(pos.m_outer->data.begin() + pos.offset(), value); + chunk.count += value.second - value.first; + chunk.begin = std::min(chunk.begin, value.first); + chunk.end = std::max(chunk.end, value.second); + + verify(); + return pos; +} + +ChunkedRangeVector::iterator ChunkedRangeVector::ensure_space(iterator pos) +{ + if (pos.m_outer->data.size() + 1 <= max_size) + return pos; + + auto offset = pos.offset(); + + // Split the chunk in half to make space for the new insertion + auto new_pos = m_data.insert(pos.m_outer + 1, Chunk{}); + auto prev = new_pos - 1; + auto to_move = max_size / 2; + new_pos->data.reserve(to_move); + new_pos->data.assign(prev->data.end() - to_move, prev->data.end()); + prev->data.resize(prev->data.size() - to_move); + + size_t moved_count = 0; + for (auto range : new_pos->data) + moved_count += range.second - range.first; + + prev->end = prev->data.back().second; + prev->count -= moved_count; + new_pos->begin = new_pos->data.front().first; + new_pos->end = new_pos->data.back().second; + new_pos->count = moved_count; + + if (offset >= to_move) { + pos.m_outer = new_pos; + offset -= to_move; + } + else { + pos.m_outer = prev; + } + pos.m_end = m_data.end(); + pos.m_inner = &pos.m_outer->data[offset]; + verify(); + return pos; +} + +ChunkedRangeVector::iterator ChunkedRangeVector::erase(iterator pos) +{ + auto offset = pos.offset(); + auto& chunk = *pos.m_outer; + chunk.count -= pos->second - pos->first; + chunk.data.erase(chunk.data.begin() + offset); + + if (chunk.data.size() == 0) { + pos.m_outer = m_data.erase(pos.m_outer); + pos.m_end = m_data.end(); + pos.m_inner = pos.m_outer == m_data.end() ? nullptr : &pos.m_outer->data.back(); + verify(); + return pos; + } + + chunk.begin = chunk.data.front().first; + chunk.end = chunk.data.back().second; + if (offset < chunk.data.size()) + pos.m_inner = &chunk.data[offset]; + else { + ++pos.m_outer; + pos.m_inner = pos.m_outer == pos.m_end ? nullptr : &pos.m_outer->data.front(); + } + + verify(); + return pos; +} + +void ChunkedRangeVector::verify() const noexcept +{ +#ifdef REALM_DEBUG + size_t prev_end = -1; + for (auto range : *this) { + REALM_ASSERT(range.first < range.second); + REALM_ASSERT(prev_end == size_t(-1) || range.first > prev_end); + prev_end = range.second; + } + + for (auto& chunk : m_data) { + REALM_ASSERT(!chunk.data.empty()); + REALM_ASSERT(chunk.data.front().first == chunk.begin); + REALM_ASSERT(chunk.data.back().second == chunk.end); + REALM_ASSERT(chunk.count <= chunk.end - chunk.begin); + size_t count = 0; + for (auto range : chunk.data) + count += range.second - range.first; + REALM_ASSERT(count == chunk.count); + } +#endif +} + +namespace { +class ChunkedRangeVectorBuilder { +public: + using value_type = std::pair; + + ChunkedRangeVectorBuilder(ChunkedRangeVector const& expected); + void push_back(size_t index); + void push_back(std::pair range); + std::vector finalize(); +private: + std::vector m_data; + size_t m_outer_pos = 0; +}; + +ChunkedRangeVectorBuilder::ChunkedRangeVectorBuilder(ChunkedRangeVector const& expected) +{ + size_t size = 0; + for (auto const& chunk : expected.m_data) + size += chunk.data.size(); + m_data.resize(size / ChunkedRangeVector::max_size + 1); + for (size_t i = 0; i < m_data.size() - 1; ++i) + m_data[i].data.reserve(ChunkedRangeVector::max_size); +} + +void ChunkedRangeVectorBuilder::push_back(size_t index) +{ + push_back({index, index + 1}); +} + +void ChunkedRangeVectorBuilder::push_back(std::pair range) +{ + auto& chunk = m_data[m_outer_pos]; + if (chunk.data.empty()) { + chunk.data.push_back(range); + chunk.count = range.second - range.first; + chunk.begin = range.first; + } + else if (range.first == chunk.data.back().second) { + chunk.data.back().second = range.second; + chunk.count += range.second - range.first; + } + else if (chunk.data.size() < ChunkedRangeVector::max_size) { + chunk.data.push_back(range); + chunk.count += range.second - range.first; + } + else { + chunk.end = chunk.data.back().second; + ++m_outer_pos; + if (m_outer_pos >= m_data.size()) + m_data.push_back({{range}, range.first, 0, 1}); + else { + auto& chunk = m_data[m_outer_pos]; + chunk.data.push_back(range); + chunk.begin = range.first; + chunk.count = range.second - range.first; + } + } +} + +std::vector ChunkedRangeVectorBuilder::finalize() +{ + if (!m_data.empty()) { + m_data.resize(m_outer_pos + 1); + if (m_data.back().data.empty()) + m_data.pop_back(); + else + m_data.back().end = m_data.back().data.back().second; + } + return std::move(m_data); +} +} + IndexSet::IndexSet(std::initializer_list values) { for (size_t v : values) @@ -35,24 +273,48 @@ IndexSet::IndexSet(std::initializer_list values) bool IndexSet::contains(size_t index) const { auto it = const_cast(this)->find(index); - return it != m_ranges.end() && it->first <= index; + return it != end() && it->first <= index; } size_t IndexSet::count(size_t start_index, size_t end_index) const { auto it = const_cast(this)->find(start_index); - const auto end = m_ranges.end(); + const auto end = this->end(); if (it == end || it->first >= end_index) { return 0; } if (it->second >= end_index) return std::min(it->second, end_index) - std::max(it->first, start_index); - // These checks are somewhat redundant, but this loop is hot so pulling instructions out of it helps - size_t ret = it->second - std::max(it->first, start_index); - for (++it; it != end && it->second < end_index; ++it) { - ret += it->second - it->first; + size_t ret = 0; + + if (start_index > it->first || it.offset() != 0) { + // Start index is in the middle of a chunk, so start by counting the + // rest of that chunk + ret = it->second - std::max(it->first, start_index); + for (++it; it != end && it->second < end_index && it.offset() != 0; ++it) { + ret += it->second - it->first; + } + if (it != end && it->first < end_index && it.offset() != 0) + ret += end_index - it->first; + if (it == end || it->second >= end_index) + return ret; } + + // Now count all complete chunks that fall within the range + while (it != end && it.outer()->end <= end_index) { + REALM_ASSERT_DEBUG(it.offset() == 0); + ret += it.outer()->count; + it.next_chunk(); + } + + // Cound all complete ranges within the last chunk + while (it != end && it->second <= end_index) { + ret += it->second - it->first; + ++it; + } + + // And finally add in the partial last range if (it != end && it->first < end_index) ret += end_index - it->first; return ret; @@ -60,13 +322,25 @@ size_t IndexSet::count(size_t start_index, size_t end_index) const IndexSet::iterator IndexSet::find(size_t index) { - return find(index, m_ranges.begin()); + return find(index, begin()); } -IndexSet::iterator IndexSet::find(size_t index, iterator it) +IndexSet::iterator IndexSet::find(size_t index, iterator begin) { - return std::lower_bound(it, m_ranges.end(), std::make_pair(size_t(0), index + 1), - [&](auto const& a, auto const& b) { return a.second < b.second; }); + auto it = std::find_if(begin.outer(), m_data.end(), + [&](auto const& lft) { return lft.end > index; }); + if (it == m_data.end()) + return end(); + if (index < it->begin) + return iterator(it, m_data.end(), &it->data[0]); + auto inner_begin = it->data.begin(); + if (it == begin.outer()) + inner_begin += begin.offset(); + auto inner = std::lower_bound(inner_begin, it->data.end(), index, + [&](auto const& lft, auto) { return lft.second <= index; }); + REALM_ASSERT_DEBUG(inner != it->data.end()); + + return iterator(it, m_data.end(), &*inner); } void IndexSet::add(size_t index) @@ -76,7 +350,7 @@ void IndexSet::add(size_t index) void IndexSet::add(IndexSet const& other) { - auto it = m_ranges.begin(); + auto it = begin(); for (size_t index : other.as_indexes()) { it = do_add(find(index, it), index); } @@ -84,10 +358,16 @@ void IndexSet::add(IndexSet const& other) size_t IndexSet::add_shifted(size_t index) { - auto it = m_ranges.begin(); - for (auto end = m_ranges.end(); it != end && it->first <= index; ++it) { + iterator it = begin(), end = this->end(); + + // Shift for any complete chunks before the target + for (; it != end && it.outer()->end <= index; it.next_chunk()) + index += it.outer()->count; + + // And any ranges within the last partial chunk + for (; it != end && it->first <= index; ++it) index += it->second - it->first; - } + do_add(it, index); return index; } @@ -105,11 +385,10 @@ void IndexSet::add_shifted_by(IndexSet const& shifted_by, IndexSet const& values } #endif - auto old_ranges = move(m_ranges); - m_ranges.reserve(std::max(old_ranges.size(), values.size())); + ChunkedRangeVectorBuilder builder(*this); - auto old_it = old_ranges.cbegin(), old_end = old_ranges.cend(); - auto shift_it = shifted_by.m_ranges.cbegin(), shift_end = shifted_by.m_ranges.cend(); + auto old_it = cbegin(), old_end = cend(); + auto shift_it = shifted_by.cbegin(), shift_end = shifted_by.cend(); size_t skip_until = 0; size_t old_shift = 0; @@ -124,30 +403,25 @@ void IndexSet::add_shifted_by(IndexSet const& shifted_by, IndexSet const& values for (; old_it != old_end && old_it->first <= index - new_shift + old_shift; ++old_it) { for (size_t i = old_it->first; i < old_it->second; ++i) - add_back(i); + builder.push_back(i); old_shift += old_it->second - old_it->first; } REALM_ASSERT(index >= new_shift); - add_back(index - new_shift + old_shift); + builder.push_back(index - new_shift + old_shift); } - if (old_it != old_end) { - if (!empty() && old_it->first == m_ranges.back().second) { - m_ranges.back().second = old_it->second; - ++old_it; - } - copy(old_it, old_end, back_inserter(m_ranges)); - } + copy(old_it, old_end, std::back_inserter(builder)); + m_data = builder.finalize(); REALM_ASSERT_DEBUG((size_t)std::distance(as_indexes().begin(), as_indexes().end()) == expected); } void IndexSet::set(size_t len) { - m_ranges.clear(); + clear(); if (len) { - m_ranges.push_back({0, len}); + push_back({0, len}); } } @@ -156,22 +430,25 @@ void IndexSet::insert_at(size_t index, size_t count) REALM_ASSERT(count > 0); auto pos = find(index); + auto end = this->end(); bool in_existing = false; - if (pos != m_ranges.end()) { - if (pos->first <= index) + if (pos != end) { + if (pos->first <= index) { in_existing = true; - else - pos->first += count; - pos->second += count; - for (auto it = pos + 1; it != m_ranges.end(); ++it) { - it->first += count; - it->second += count; + pos.adjust(0, count); } + else { + pos.shift(count); + } + for (auto it = std::next(pos); it != end; ++it) + it.shift(count); } if (!in_existing) { for (size_t i = 0; i < count; ++i) - pos = do_add(pos, index + i) + 1; + pos = std::next(do_add(pos, index + i)); } + + verify(); } void IndexSet::insert_at(IndexSet const& positions) @@ -179,30 +456,30 @@ void IndexSet::insert_at(IndexSet const& positions) if (positions.empty()) return; if (empty()) { - m_ranges = positions.m_ranges; + *this = positions; return; } - auto old_ranges = move(m_ranges); - m_ranges.reserve(std::max(m_ranges.size(), positions.m_ranges.size())); - - IndexIterator begin1 = old_ranges.cbegin(), begin2 = positions.m_ranges.cbegin(); - IndexIterator end1 = old_ranges.cend(), end2 = positions.m_ranges.cend(); + IndexIterator begin1 = cbegin(), begin2 = positions.cbegin(); + IndexIterator end1 = cend(), end2 = positions.cend(); + ChunkedRangeVectorBuilder builder(*this); size_t shift = 0; while (begin1 != end1 && begin2 != end2) { if (*begin1 + shift < *begin2) { - add_back(*begin1++ + shift); + builder.push_back(*begin1++ + shift); } else { ++shift; - add_back(*begin2++); + builder.push_back(*begin2++); } } for (; begin1 != end1; ++begin1) - add_back(*begin1 + shift); + builder.push_back(*begin1 + shift); for (; begin2 != end2; ++begin2) - add_back(*begin2); + builder.push_back(*begin2); + + m_data = builder.finalize(); } void IndexSet::shift_for_insert_at(size_t index, size_t count) @@ -210,58 +487,53 @@ void IndexSet::shift_for_insert_at(size_t index, size_t count) REALM_ASSERT(count > 0); auto it = find(index); - if (it == m_ranges.end()) + if (it == end()) return; - if (it->first < index) { - // split the range so that we can exclude `index` - auto old_second = it->second; - it->second = index; - it = m_ranges.insert(it + 1, {index, old_second}); - } + for (auto pos = it, end = this->end(); pos != end; ++pos) + pos.shift(count); - for (; it != m_ranges.end(); ++it) { - it->first += count; - it->second += count; + // If the range contained the insertion point, split the range and move + // the part of it before the insertion point back + if (it->first < index + count) { + auto old_second = it->second; + it.set(it->first - count, index); + insert(std::next(it), {index + count, old_second}); } + verify(); } void IndexSet::shift_for_insert_at(realm::IndexSet const& values) { - if (values.empty()) + if (empty() || values.empty()) + return; + if (values.m_data.front().begin >= m_data.back().end) return; + IndexIterator begin1 = cbegin(), begin2 = values.cbegin(); + IndexIterator end1 = cend(), end2 = values.cend(); + + ChunkedRangeVectorBuilder builder(*this); size_t shift = 0; - auto it = find(values.begin()->first); - for (auto range : values) { - for (; it != m_ranges.end() && it->second + shift <= range.first; ++it) { - it->first += shift; - it->second += shift; + while (begin1 != end1 && begin2 != end2) { + if (*begin1 + shift < *begin2) { + builder.push_back(*begin1++ + shift); } - if (it == m_ranges.end()) - return; - - if (it->first + shift < range.first) { - // split the range so that we can exclude `index` - auto old_second = it->second; - it->first += shift; - it->second = range.first; - it = m_ranges.insert(it + 1, {range.first - shift, old_second}); + else { + ++shift; + begin2++; } - - shift += range.second - range.first; } + for (; begin1 != end1; ++begin1) + builder.push_back(*begin1 + shift); - for (; it != m_ranges.end(); ++it) { - it->first += shift; - it->second += shift; - } + m_data = builder.finalize(); } void IndexSet::erase_at(size_t index) { auto it = find(index); - if (it != m_ranges.end()) + if (it != end()) do_erase(it, index); } @@ -270,16 +542,15 @@ void IndexSet::erase_at(IndexSet const& positions) if (empty() || positions.empty()) return; - auto old_ranges = move(m_ranges); - m_ranges.reserve(std::max(m_ranges.size(), positions.m_ranges.size())); + ChunkedRangeVectorBuilder builder(*this); - IndexIterator begin1 = old_ranges.cbegin(), begin2 = positions.m_ranges.cbegin(); - IndexIterator end1 = old_ranges.cend(), end2 = positions.m_ranges.cend(); + IndexIterator begin1 = cbegin(), begin2 = positions.cbegin(); + IndexIterator end1 = cend(), end2 = positions.cend(); size_t shift = 0; while (begin1 != end1 && begin2 != end2) { if (*begin1 < *begin2) { - add_back(*begin1++ - shift); + builder.push_back(*begin1++ - shift); } else if (*begin1 == *begin2) { ++shift; @@ -292,16 +563,24 @@ void IndexSet::erase_at(IndexSet const& positions) } } for (; begin1 != end1; ++begin1) - add_back(*begin1 - shift); + builder.push_back(*begin1 - shift); + + m_data = builder.finalize(); } size_t IndexSet::erase_or_unshift(size_t index) { auto shifted = index; - auto it = m_ranges.begin(), end = m_ranges.end(); - for (; it != end && it->second <= index; ++it) { + iterator it = begin(), end = this->end(); + + // Shift for any complete chunks before the target + for (; it != end && it.outer()->end <= index; it.next_chunk()) + shifted -= it.outer()->count; + + // And any ranges within the last partial chunk + for (; it != end && it->second <= index; ++it) shifted -= it->second - it->first; - } + if (it == end) return shifted; @@ -316,45 +595,43 @@ size_t IndexSet::erase_or_unshift(size_t index) void IndexSet::do_erase(iterator it, size_t index) { if (it->first <= index) { - --it->second; - if (it->first == it->second) { - it = m_ranges.erase(it); + if (it->first + 1 == it->second) { + it = erase(it); } else { + it.adjust(0, -1); ++it; } } - else if (it != m_ranges.begin() && (it - 1)->second + 1 == it->first) { - (it - 1)->second = it->second - 1; - it = m_ranges.erase(it); + else if (it != begin() && std::prev(it)->second + 1 == it->first) { + std::prev(it).adjust(0, it->second - it->first); + it = erase(it); } - for (; it != m_ranges.end(); ++it) { - --it->first; - --it->second; - } + for (; it != end(); ++it) + it.shift(-1); } IndexSet::iterator IndexSet::do_remove(iterator it, size_t begin, size_t end) { - for (it = find(begin, it); it != m_ranges.end() && it->first < end; it = find(begin, it)) { + for (it = find(begin, it); it != this->end() && it->first < end; it = find(begin, it)) { // Trim off any part of the range to remove that's before the matching range begin = std::max(it->first, begin); // If the matching range extends to both sides of the range to remove, // split it on the range to remove if (it->first < begin && it->second > end) { - it = m_ranges.insert(it + 1, {end, it->second}) - 1; - it->second = begin; + auto old_second = it->second; + it.set(it->first, begin); + it = std::prev(insert(std::next(it), {end, old_second})); } - // Range to delete now coverages (at least) one end of the matching range - if (begin == it->first && end >= it->second) - it = m_ranges.erase(it); + else if (begin == it->first && end >= it->second) + it = erase(it); else if (begin == it->first) - it->first = end; + it.set(end, it->second); else - it->second = begin; + it.set(it->first, begin); } return it; } @@ -366,17 +643,18 @@ void IndexSet::remove(size_t index, size_t count) void IndexSet::remove(realm::IndexSet const& values) { - auto it = m_ranges.begin(); + auto it = begin(); for (auto range : values) { it = do_remove(it, range.first, range.second); - if (it == m_ranges.end()) + if (it == end()) return; } } size_t IndexSet::shift(size_t index) const { - for (auto range : m_ranges) { + // FIXME: optimize + for (auto range : *this) { if (range.first > index) break; index += range.second - range.first; @@ -392,59 +670,36 @@ size_t IndexSet::unshift(size_t index) const void IndexSet::clear() { - m_ranges.clear(); -} - -void IndexSet::add_back(size_t index) -{ - if (m_ranges.empty()) - m_ranges.push_back({index, index + 1}); - else if (m_ranges.back().second == index) - ++m_ranges.back().second; - else { - REALM_ASSERT_DEBUG(m_ranges.back().second < index); - m_ranges.push_back({index, index + 1}); - } + m_data.clear(); } IndexSet::iterator IndexSet::do_add(iterator it, size_t index) { verify(); - bool more_before = it != m_ranges.begin(), valid = it != m_ranges.end(); - REALM_ASSERT(!more_before || index >= (it - 1)->second); + bool more_before = it != begin(), valid = it != end(); + REALM_ASSERT(!more_before || index >= std::prev(it)->second); if (valid && it->first <= index && it->second > index) { // index is already in set return it; } - if (more_before && (it - 1)->second == index) { + if (more_before && std::prev(it)->second == index) { + auto prev = std::prev(it); // index is immediately after an existing range - ++(it - 1)->second; + prev.adjust(0, 1); - if (valid && (it - 1)->second == it->first) { + if (valid && prev->second == it->first) { // index joins two existing ranges - (it - 1)->second = it->second; - return m_ranges.erase(it) - 1; + prev.adjust(0, it->second - it->first); + return std::prev(erase(it)); } - return it - 1; + return prev; } if (valid && it->first == index + 1) { // index is immediately before an existing range - --it->first; + it.adjust(-1, 0); return it; } // index is not next to an existing range - return m_ranges.insert(it, {index, index + 1}); -} - -void IndexSet::verify() const noexcept -{ -#ifdef REALM_DEBUG - size_t prev_end = -1; - for (auto range : m_ranges) { - REALM_ASSERT(range.first < range.second); - REALM_ASSERT(prev_end == size_t(-1) || range.first > prev_end); - prev_end = range.second; - } -#endif + return insert(it, {index, index + 1}); } diff --git a/src/index_set.hpp b/src/index_set.hpp index 924f35e6..3a4ee1ea 100644 --- a/src/index_set.hpp +++ b/src/index_set.hpp @@ -25,18 +25,115 @@ #include namespace realm { -class IndexSet { +class IndexSet; + +namespace _impl { +template +class MutableChunkedRangeVectorIterator; + +// An iterator for ChunkedRangeVector, templated on the vector iterator/const_iterator +template +class ChunkedRangeVectorIterator { +public: + using iterator_category = std::bidirectional_iterator_tag; + using value_type = typename std::remove_referencedata.begin())>::type; + using difference_type = ptrdiff_t; + using pointer = const value_type*; + using reference = const value_type&; + + ChunkedRangeVectorIterator(OuterIterator outer, OuterIterator end, value_type* inner) + : m_outer(outer), m_end(end), m_inner(inner) { } + + reference operator*() const { return *m_inner; } + pointer operator->() const { return m_inner; } + + template bool operator==(Other const& it) const; + template bool operator!=(Other const& it) const; + + ChunkedRangeVectorIterator& operator++(); + ChunkedRangeVectorIterator operator++(int); + + ChunkedRangeVectorIterator& operator--(); + ChunkedRangeVectorIterator operator--(int); + + // Advance directly to the next outer block + void next_chunk(); + + OuterIterator outer() const { return m_outer; } + size_t offset() const { return m_inner - &m_outer->data[0]; } + +private: + OuterIterator m_outer; + OuterIterator m_end; + value_type* m_inner; + friend struct ChunkedRangeVector; + friend class MutableChunkedRangeVectorIterator; +}; + +// A mutable iterator that adds some invariant-preserving mutation methods +template +class MutableChunkedRangeVectorIterator : public ChunkedRangeVectorIterator { +public: + using ChunkedRangeVectorIterator::ChunkedRangeVectorIterator; + + // Set this iterator to the given range and update the parent if needed + void set(size_t begin, size_t end); + // Adjust the begin and end of this iterator by the given amounts and + // update the parent if needed + void adjust(ptrdiff_t front, ptrdiff_t back); + // Shift this iterator by the given amount and update the parent if needed + void shift(ptrdiff_t distance); +}; + +// A vector which stores ranges in chunks with a maximum size +struct ChunkedRangeVector { + struct Chunk { + std::vector> data; + size_t begin; + size_t end; + size_t count; + }; + std::vector m_data; + + using value_type = std::pair; + using iterator = MutableChunkedRangeVectorIterator; + using const_iterator = ChunkedRangeVectorIterator; + +#ifdef REALM_DEBUG + static const size_t max_size = 4; +#else + static const size_t max_size = 4096 / sizeof(std::pair); +#endif + + iterator begin() { return empty() ? end() : iterator(m_data.begin(), m_data.end(), &m_data[0].data[0]); } + iterator end() { return iterator(m_data.end(), m_data.end(), nullptr); } + const_iterator begin() const { return cbegin(); } + const_iterator end() const { return cend(); } + const_iterator cbegin() const { return empty() ? cend() : const_iterator(m_data.cbegin(), m_data.end(), &m_data[0].data[0]); } + const_iterator cend() const { return const_iterator(m_data.end(), m_data.end(), nullptr); } + + bool empty() const noexcept { return m_data.empty(); } + + iterator insert(iterator pos, value_type value); + iterator erase(iterator pos); + void push_back(value_type value); + iterator ensure_space(iterator pos); + + void verify() const noexcept; +}; +} // namespace _impl + +class IndexSet : private _impl::ChunkedRangeVector { public: static const size_t npos = -1; - using value_type = std::pair; - using iterator = std::vector::iterator; - using const_iterator = std::vector::const_iterator; - - const_iterator begin() const { return m_ranges.begin(); } - const_iterator end() const { return m_ranges.end(); } - bool empty() const { return m_ranges.empty(); } - size_t size() const { return m_ranges.size(); } + using ChunkedRangeVector::value_type; + using ChunkedRangeVector::iterator; + using ChunkedRangeVector::const_iterator; + using ChunkedRangeVector::begin; + using ChunkedRangeVector::end; + using ChunkedRangeVector::empty; + using ChunkedRangeVector::verify; IndexSet() = default; IndexSet(std::initializer_list); @@ -45,7 +142,7 @@ public: bool contains(size_t index) const; // Counts the number of indices in the set in the given range - size_t count(size_t start_index, size_t end_index) const; + size_t count(size_t start_index=0, size_t end_index=-1) const; // Add an index to the set, doing nothing if it's already present void add(size_t index); @@ -91,8 +188,6 @@ public: // Remove all indexes from the set void clear(); - void verify() const noexcept; - // An iterator over the indivual indices in the set rather than the ranges class IndexIterator : public std::iterator { public: @@ -140,8 +235,6 @@ public: IndexIteratableAdaptor as_indexes() const { return *this; } private: - std::vector m_ranges; - // Find the range which contains the index, or the first one after it if // none do iterator find(size_t index); @@ -153,8 +246,7 @@ private: void do_erase(iterator it, size_t index); iterator do_remove(iterator it, size_t index, size_t count); - // Add an index which must be greater than the largest index in the set - void add_back(size_t index); + void shift_until_end_by(iterator begin, ptrdiff_t shift); }; namespace util { @@ -166,6 +258,68 @@ std::reverse_iterator make_reverse_iterator(Iterator it) } } // namespace util + +namespace _impl { +template +template +inline bool ChunkedRangeVectorIterator::operator==(OtherIterator const& it) const +{ + return m_outer == it.outer() && m_inner == it.operator->(); +} + +template +template +inline bool ChunkedRangeVectorIterator::operator!=(OtherIterator const& it) const +{ + return !(*this == it); +} + +template +inline ChunkedRangeVectorIterator& ChunkedRangeVectorIterator::operator++() +{ + ++m_inner; + if (offset() == m_outer->data.size()) + next_chunk(); + return *this; +} + +template +inline ChunkedRangeVectorIterator ChunkedRangeVectorIterator::operator++(int) +{ + auto value = *this; + ++*this; + return value; +} + +template +inline ChunkedRangeVectorIterator& ChunkedRangeVectorIterator::operator--() +{ + if (!m_inner || m_inner == &m_outer->data.front()) { + --m_outer; + m_inner = &m_outer->data.back(); + } + else { + --m_inner; + } + return *this; +} + +template +inline ChunkedRangeVectorIterator ChunkedRangeVectorIterator::operator--(int) +{ + auto value = *this; + --*this; + return value; +} + +template +inline void ChunkedRangeVectorIterator::next_chunk() +{ + ++m_outer; + m_inner = m_outer != m_end ? &m_outer->data[0] : nullptr; +} +} // namespace _impl + } // namespace realm #endif // REALM_INDEX_SET_HPP diff --git a/tests/collection_change_indices.cpp b/tests/collection_change_indices.cpp index 5d67a5e2..a15b7b18 100644 --- a/tests/collection_change_indices.cpp +++ b/tests/collection_change_indices.cpp @@ -233,7 +233,8 @@ TEST_CASE("[collection_change] clear()") { SECTION("sets deletions SIZE_T_MAX if that if the given previous size") { c.insertions = {1, 2, 3}; c.clear(std::numeric_limits::max()); - REQUIRE(c.deletions.size() == 1); + REQUIRE(!c.deletions.empty()); + REQUIRE(++c.deletions.begin() == c.deletions.end()); REQUIRE(c.deletions.begin()->first == 0); REQUIRE(c.deletions.begin()->second == std::numeric_limits::max()); } diff --git a/tests/index_set.cpp b/tests/index_set.cpp index 05c4bbf2..df5af2f1 100644 --- a/tests/index_set.cpp +++ b/tests/index_set.cpp @@ -57,6 +57,20 @@ TEST_CASE("[index_set] count()") { realm::IndexSet set = {1, 2, 3, 5, 6, 7, 8, 9}; REQUIRE(set.count(3, 9) == 5); } + + SECTION("handles full chunks well") { + size_t count = realm::_impl::ChunkedRangeVector::max_size * 4; + realm::IndexSet set; + for (size_t i = 0; i < count; ++i) { + set.add(i * 3); + set.add(i * 3 + 1); + } + + for (size_t i = 0; i < count * 3; ++i) { + REQUIRE(set.count(i) == 2 * count - (i + 1) * 2 / 3); + REQUIRE(set.count(0, i) == (i + 1) / 3 + (i + 2) / 3); + } + } } TEST_CASE("[index_set] add()") { @@ -100,6 +114,12 @@ TEST_CASE("[index_set] add()") { set.add({1, 4, 5}); REQUIRE_INDICES(set, 0, 1, 2, 4, 5, 6); } + + SECTION("handles front additions of ranges") { + for (size_t i = 20; i > 0; i -= 2) + set.add(i); + REQUIRE_INDICES(set, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20); + } } TEST_CASE("[index_set] add_shifted()") { @@ -172,14 +192,14 @@ TEST_CASE("[index_set] add_shifted_by()") { SECTION("adds the indices shifted by the old count when they are all after the old indices and the shifted-by set is empty") { set = {5, 6}; - set.add_shifted_by({}, {7, 8}); - REQUIRE_INDICES(set, 5, 6, 9, 10); + set.add_shifted_by({}, {7, 9, 11, 13}); + REQUIRE_INDICES(set, 5, 6, 9, 11, 13, 15); } SECTION("acts like bulk add_shifted() when shifted_by is empty") { - set = {5, 10}; + set = {5, 10, 15, 20, 25}; set.add_shifted_by({}, {4, 5, 11}); - REQUIRE_INDICES(set, 4, 5, 6, 10, 13); + REQUIRE_INDICES(set, 4, 5, 6, 10, 13, 15, 20, 25); } SECTION("shifts indices in values back by the number of indices in shifted_by before them") { @@ -288,6 +308,18 @@ TEST_CASE("[index_set] shift_for_insert_at()") { REQUIRE(set.empty()); } + SECTION("does nothing when the insertion points are all after the current indices") { + set = {10, 20}; + set.shift_for_insert_at({30, 40}); + REQUIRE_INDICES(set, 10, 20); + } + + SECTION("does shift when the insertion points are all before the current indices") { + set = {10, 20}; + set.shift_for_insert_at({2, 4}); + REQUIRE_INDICES(set, 12, 22); + } + SECTION("shifts indices at or after the insertion points") { set = {5}; diff --git a/tests/transaction_log_parsing.cpp b/tests/transaction_log_parsing.cpp index 76a20f46..7363c6c2 100644 --- a/tests/transaction_log_parsing.cpp +++ b/tests/transaction_log_parsing.cpp @@ -366,7 +366,7 @@ TEST_CASE("Transaction log parsing") { auto info = track_changes({false, false, false}, [&] { table.set_int(0, 1, 2); }); - REQUIRE(info.tables.size() == 0); + REQUIRE(info.tables.empty()); } SECTION("new row additions are reported") { @@ -678,7 +678,7 @@ TEST_CASE("Transaction log parsing") { lv->remove(5); } REQUIRE_INDICES(changes.deletions, 5); - REQUIRE(changes.modifications.size() == 0); + REQUIRE(changes.modifications.empty()); VALIDATE_CHANGES(changes) { lv->set(5, 0); @@ -693,7 +693,7 @@ TEST_CASE("Transaction log parsing") { lv->remove(4); } REQUIRE_INDICES(changes.deletions, 4, 5); - REQUIRE(changes.modifications.size() == 0); + REQUIRE(changes.modifications.empty()); } SECTION("erase -> set") { @@ -711,7 +711,7 @@ TEST_CASE("Transaction log parsing") { lv->clear(); } REQUIRE_INDICES(changes.deletions, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); - REQUIRE(changes.insertions.size() == 0); + REQUIRE(changes.insertions.empty()); } SECTION("set -> clear") { @@ -720,7 +720,7 @@ TEST_CASE("Transaction log parsing") { lv->clear(); } REQUIRE_INDICES(changes.deletions, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9); - REQUIRE(changes.modifications.size() == 0); + REQUIRE(changes.modifications.empty()); } SECTION("clear -> insert") { @@ -737,8 +737,8 @@ TEST_CASE("Transaction log parsing") { lv->add(0); lv->remove(10); } - REQUIRE(changes.insertions.size() == 0); - REQUIRE(changes.deletions.size() == 0); + REQUIRE(changes.insertions.empty()); + REQUIRE(changes.deletions.empty()); VALIDATE_CHANGES(changes) { lv->add(0);