Speed up unsorted changeset calculations by eliminating most calls to count()

This commit is contained in:
Thomas Goyne 2016-03-11 16:51:35 -08:00
parent c6def6b814
commit f1f0327146
1 changed files with 36 additions and 17 deletions

View File

@ -323,22 +323,34 @@ struct RowInfo {
size_t shifted_row_index;
size_t prev_tv_index;
size_t tv_index;
size_t shifted_tv_index;
};
void calculate_moves_unsorted(std::vector<RowInfo>& new_rows, CollectionChangeIndices& changeset)
void calculate_moves_unsorted(std::vector<RowInfo>& new_rows, IndexSet const& removed, CollectionChangeIndices& changeset)
{
size_t expected = 0;
for (auto& row : new_rows) {
// Calculate where this row would be with only previous insertions
// and deletions. We can ignore future insertions/deletions from moves
// because move_last_over() can only move rows to lower indices
size_t expected = row.prev_tv_index
- changeset.deletions.count(0, row.prev_tv_index)
+ changeset.insertions.count(0, row.tv_index);
if (row.tv_index != expected) {
changeset.moves.push_back({row.prev_tv_index, row.tv_index});
changeset.insertions.add(row.tv_index);
changeset.deletions.add(row.prev_tv_index);
// With unsorted queries rows only move due to move_last_over(), which
// inherently can only move a row to earlier in the table.
REALM_ASSERT(row.shifted_tv_index >= expected);
if (row.shifted_tv_index == expected) {
++expected;
continue;
}
// This row isn't just the row after the previous one, but it still may
// not be a move if there were rows deleted between the two, so next
// calcuate what row should be here taking those in to account
size_t calc_expected = row.tv_index - changeset.insertions.count(0, row.tv_index) + removed.count(0, row.prev_tv_index);
if (row.shifted_tv_index == calc_expected) {
expected = calc_expected + 1;
continue;
}
// The row still isn't the expected one, so it's a move
changeset.moves.push_back({row.prev_tv_index, row.tv_index});
changeset.insertions.add(row.tv_index);
changeset.deletions.add(row.prev_tv_index);
}
}
@ -471,12 +483,15 @@ CollectionChangeBuilder CollectionChangeBuilder::calculate(std::vector<size_t> c
{
CollectionChangeBuilder ret;
size_t deleted = 0;
std::vector<RowInfo> old_rows;
for (size_t i = 0; i < prev_rows.size(); ++i) {
if (prev_rows[i] == npos)
if (prev_rows[i] == npos) {
++deleted;
ret.deletions.add(i);
}
else
old_rows.push_back({prev_rows[i], npos, i});
old_rows.push_back({prev_rows[i], npos, i, i - deleted});
}
std::stable_sort(begin(old_rows), end(old_rows), [](auto& lft, auto& rgt) {
return lft.shifted_row_index < rgt.shifted_row_index;
@ -484,23 +499,26 @@ CollectionChangeBuilder CollectionChangeBuilder::calculate(std::vector<size_t> c
std::vector<RowInfo> new_rows;
for (size_t i = 0; i < next_rows.size(); ++i) {
new_rows.push_back({next_rows[i], npos, i});
new_rows.push_back({next_rows[i], npos, i, 0});
}
std::stable_sort(begin(new_rows), end(new_rows), [](auto& lft, auto& rgt) {
return lft.shifted_row_index < rgt.shifted_row_index;
});
IndexSet removed;
size_t i = 0, j = 0;
while (i < old_rows.size() && j < new_rows.size()) {
auto old_index = old_rows[i];
auto new_index = new_rows[j];
if (old_index.shifted_row_index == new_index.shifted_row_index) {
new_rows[j].prev_tv_index = old_rows[i].tv_index;
new_rows[j].shifted_tv_index = old_rows[i].shifted_tv_index;
++i;
++j;
}
else if (old_index.shifted_row_index < new_index.shifted_row_index) {
ret.deletions.add(old_index.tv_index);
removed.add(old_index.tv_index);
++i;
}
else {
@ -510,7 +528,7 @@ CollectionChangeBuilder CollectionChangeBuilder::calculate(std::vector<size_t> c
}
for (; i < old_rows.size(); ++i)
ret.deletions.add(old_rows[i].tv_index);
removed.add(old_rows[i].tv_index);
for (; j < new_rows.size(); ++j)
ret.insertions.add(new_rows[j].tv_index);
@ -532,8 +550,9 @@ CollectionChangeBuilder CollectionChangeBuilder::calculate(std::vector<size_t> c
calculate_moves_sorted(new_rows, ret);
}
else {
calculate_moves_unsorted(new_rows, ret);
calculate_moves_unsorted(new_rows, removed, ret);
}
ret.deletions.add(removed);
ret.verify();
#ifdef REALM_DEBUG