mirror of
https://github.com/status-im/realm-js.git
synced 2025-02-24 20:28:23 +00:00
Refactor SortedMoveCalculator and add more of an explanation
This commit is contained in:
parent
95900f5e17
commit
eee6e55eb0
@ -352,78 +352,65 @@ void calculate_moves_unsorted(std::vector<RowInfo>& new_rows, IndexSet& removed,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
class SortedMoveCalculator {
|
class LongestCommonSubsequenceCalculator {
|
||||||
public:
|
public:
|
||||||
SortedMoveCalculator(std::vector<RowInfo>& new_rows, CollectionChangeSet& changeset)
|
// A pair of an index in the table and an index in the table view
|
||||||
: m_modified(changeset.modifications)
|
|
||||||
{
|
|
||||||
std::vector<Row> old_candidates;
|
|
||||||
old_candidates.reserve(new_rows.size());
|
|
||||||
for (auto& row : new_rows) {
|
|
||||||
old_candidates.push_back({row.row_index, row.prev_tv_index});
|
|
||||||
}
|
|
||||||
std::sort(begin(old_candidates), end(old_candidates), [](auto a, auto b) {
|
|
||||||
return std::tie(a.tv_index, a.row_index) < std::tie(b.tv_index, b.row_index);
|
|
||||||
});
|
|
||||||
|
|
||||||
// First check if the order of any of the rows actually changed
|
|
||||||
size_t first_difference = IndexSet::npos;
|
|
||||||
for (size_t i = 0; i < old_candidates.size(); ++i) {
|
|
||||||
if (old_candidates[i].row_index != new_rows[i].row_index) {
|
|
||||||
first_difference = i;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (first_difference == IndexSet::npos)
|
|
||||||
return;
|
|
||||||
|
|
||||||
// A map from row index -> tv index in new results
|
|
||||||
b.reserve(new_rows.size());
|
|
||||||
for (size_t i = 0; i < new_rows.size(); ++i)
|
|
||||||
b.push_back({new_rows[i].row_index, i});
|
|
||||||
std::sort(begin(b), end(b), [](auto a, auto b) {
|
|
||||||
return std::tie(a.row_index, a.tv_index) < std::tie(b.row_index, b.tv_index);
|
|
||||||
});
|
|
||||||
|
|
||||||
a = std::move(old_candidates);
|
|
||||||
|
|
||||||
find_longest_matches(first_difference, a.size(),
|
|
||||||
first_difference, new_rows.size());
|
|
||||||
m_longest_matches.push_back({a.size(), new_rows.size(), 0});
|
|
||||||
|
|
||||||
size_t i = first_difference, j = first_difference;
|
|
||||||
for (auto match : m_longest_matches) {
|
|
||||||
for (; i < match.i; ++i)
|
|
||||||
changeset.deletions.add(a[i].tv_index);
|
|
||||||
for (; j < match.j; ++j)
|
|
||||||
changeset.insertions.add(new_rows[j].tv_index);
|
|
||||||
i += match.size;
|
|
||||||
j += match.size;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
struct Match {
|
|
||||||
size_t i, j, size, modified;
|
|
||||||
};
|
|
||||||
struct Row {
|
struct Row {
|
||||||
size_t row_index;
|
size_t row_index;
|
||||||
size_t tv_index;
|
size_t tv_index;
|
||||||
};
|
};
|
||||||
|
|
||||||
IndexSet const& m_modified;
|
struct Match {
|
||||||
|
// The index in `a` at which this match begins
|
||||||
|
size_t i;
|
||||||
|
// The index in `b` at which this match begins
|
||||||
|
size_t j;
|
||||||
|
// The length of this match
|
||||||
|
size_t size;
|
||||||
|
// The number of rows in this block which were modified
|
||||||
|
size_t modified;
|
||||||
|
};
|
||||||
std::vector<Match> m_longest_matches;
|
std::vector<Match> m_longest_matches;
|
||||||
|
|
||||||
std::vector<Row> a, b;
|
LongestCommonSubsequenceCalculator(std::vector<Row>& a, std::vector<Row>& b,
|
||||||
|
size_t start_index,
|
||||||
|
IndexSet const& modifications)
|
||||||
|
: m_modified(modifications)
|
||||||
|
, a(a), b(b)
|
||||||
|
{
|
||||||
|
find_longest_matches(start_index, a.size(),
|
||||||
|
start_index, b.size());
|
||||||
|
m_longest_matches.push_back({a.size(), b.size(), 0});
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
IndexSet const& m_modified;
|
||||||
|
|
||||||
|
// The two arrays of rows being diffed
|
||||||
|
// a is sorted by tv_index, b is sorted by row_index
|
||||||
|
std::vector<Row> &a, &b;
|
||||||
|
|
||||||
|
// Find the longest matching range in (a + begin1, a + end1) and (b + begin2, b + end2)
|
||||||
|
// "Matching" is defined as "has the same row index"; the TV index is just
|
||||||
|
// there to let us turn an index in a/b into an index which can be reported
|
||||||
|
// in the output changeset.
|
||||||
|
//
|
||||||
|
// This is done with the O(N) space variant of the dynamic programming
|
||||||
|
// algorithm for longest common subsequence, where N is the maximum number
|
||||||
|
// of the most common row index (which for everything but linkview-derived
|
||||||
|
// TVs will be 1).
|
||||||
Match find_longest_match(size_t begin1, size_t end1, size_t begin2, size_t end2)
|
Match find_longest_match(size_t begin1, size_t end1, size_t begin2, size_t end2)
|
||||||
{
|
{
|
||||||
struct Length {
|
struct Length {
|
||||||
size_t j, len;
|
size_t j, len;
|
||||||
};
|
};
|
||||||
std::vector<Length> cur;
|
// The length of the matching block for each `j` for the previously checked row
|
||||||
std::vector<Length> prev;
|
std::vector<Length> prev;
|
||||||
|
// The length of the matching block for each `j` for the row currently being checked
|
||||||
|
std::vector<Length> cur;
|
||||||
|
|
||||||
|
// Calculate the length of the matching block *ending* at b[j], which
|
||||||
|
// is 1 if b[j - 1] did not match, and b[j - 1] + 1 otherwise.
|
||||||
auto length = [&](size_t j) -> size_t {
|
auto length = [&](size_t j) -> size_t {
|
||||||
for (auto const& pair : prev) {
|
for (auto const& pair : prev) {
|
||||||
if (pair.j + 1 == j)
|
if (pair.j + 1 == j)
|
||||||
@ -432,17 +419,15 @@ private:
|
|||||||
return 1;
|
return 1;
|
||||||
};
|
};
|
||||||
|
|
||||||
Match best = {begin1, begin2, 0, 0};
|
// Iterate over each `j` which has the same row index as a[i] and falls
|
||||||
|
// within the range begin2 <= j < end2
|
||||||
for (size_t i = begin1; i < end1; ++i) {
|
auto for_each_b_match = [&](size_t i, auto&& f) {
|
||||||
cur.clear();
|
|
||||||
|
|
||||||
size_t ai = a[i].row_index;
|
size_t ai = a[i].row_index;
|
||||||
// Find the TV indicies at which this row appears in the new results
|
// Find the TV indicies at which this row appears in the new results
|
||||||
// There should always be at least one (or it would have been filtered out earlier),
|
// There should always be at least one (or it would have been
|
||||||
// but can be multiple if there are dupes
|
// filtered out earlier), but there can be multiple if there are dupes
|
||||||
auto it = lower_bound(begin(b), end(b), Row{ai, 0},
|
auto it = lower_bound(begin(b), end(b), ai,
|
||||||
[](auto a, auto b) { return a.row_index < b.row_index; });
|
[](auto lft, auto rgt) { return lft.row_index < rgt; });
|
||||||
REALM_ASSERT(it != end(b) && it->row_index == ai);
|
REALM_ASSERT(it != end(b) && it->row_index == ai);
|
||||||
for (; it != end(b) && it->row_index == ai; ++it) {
|
for (; it != end(b) && it->row_index == ai; ++it) {
|
||||||
size_t j = it->tv_index;
|
size_t j = it->tv_index;
|
||||||
@ -450,9 +435,23 @@ private:
|
|||||||
continue;
|
continue;
|
||||||
if (j >= end2)
|
if (j >= end2)
|
||||||
break; // b is sorted by tv_index so this can't transition from false to true
|
break; // b is sorted by tv_index so this can't transition from false to true
|
||||||
|
f(j);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
Match best = {begin1, begin2, 0, 0};
|
||||||
|
for (size_t i = begin1; i < end1; ++i) {
|
||||||
|
// prev = std::move(cur), but avoids discarding prev's heap allocation
|
||||||
|
cur.swap(prev);
|
||||||
|
cur.clear();
|
||||||
|
|
||||||
|
for_each_b_match(i, [&](size_t j) {
|
||||||
size_t size = length(j);
|
size_t size = length(j);
|
||||||
|
|
||||||
cur.push_back({j, size});
|
cur.push_back({j, size});
|
||||||
|
|
||||||
|
// If the matching block ending at a[i] and b[j] is longer than
|
||||||
|
// the previous one, select it as the best
|
||||||
if (size > best.size)
|
if (size > best.size)
|
||||||
best = {i - size + 1, j - size + 1, size, IndexSet::npos};
|
best = {i - size + 1, j - size + 1, size, IndexSet::npos};
|
||||||
// Given two equal-length matches, prefer the one with fewer modified rows
|
// Given two equal-length matches, prefer the one with fewer modified rows
|
||||||
@ -463,10 +462,11 @@ private:
|
|||||||
if (count < best.modified)
|
if (count < best.modified)
|
||||||
best = {i - size + 1, j - size + 1, size, count};
|
best = {i - size + 1, j - size + 1, size, count};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// The best block should always fall within the range being searched
|
||||||
REALM_ASSERT(best.i >= begin1 && best.i + best.size <= end1);
|
REALM_ASSERT(best.i >= begin1 && best.i + best.size <= end1);
|
||||||
REALM_ASSERT(best.j >= begin2 && best.j + best.size <= end2);
|
REALM_ASSERT(best.j >= begin2 && best.j + best.size <= end2);
|
||||||
}
|
});
|
||||||
cur.swap(prev);
|
|
||||||
}
|
}
|
||||||
return best;
|
return best;
|
||||||
}
|
}
|
||||||
@ -489,6 +489,57 @@ private:
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
void calculate_moves_sorted(std::vector<RowInfo>& rows, CollectionChangeSet& changeset)
|
||||||
|
{
|
||||||
|
// The RowInfo array contains information about the old and new TV indices of
|
||||||
|
// each row, which we need to turn into two sequences of rows, which we'll
|
||||||
|
// then find matches in
|
||||||
|
std::vector<LongestCommonSubsequenceCalculator::Row> a, b;
|
||||||
|
|
||||||
|
a.reserve(rows.size());
|
||||||
|
for (auto& row : rows) {
|
||||||
|
a.push_back({row.row_index, row.prev_tv_index});
|
||||||
|
}
|
||||||
|
std::sort(begin(a), end(a), [](auto lft, auto rgt) {
|
||||||
|
return std::tie(lft.tv_index, lft.row_index) < std::tie(rgt.tv_index, rgt.row_index);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Before constructing `b`, first find the first index in `a` which will
|
||||||
|
// actually differ in `b`, and skip everything else if there aren't any
|
||||||
|
size_t first_difference = IndexSet::npos;
|
||||||
|
for (size_t i = 0; i < a.size(); ++i) {
|
||||||
|
if (a[i].row_index != rows[i].row_index) {
|
||||||
|
first_difference = i;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (first_difference == IndexSet::npos)
|
||||||
|
return;
|
||||||
|
|
||||||
|
// Note that `b` is sorted by row_index, while `a` is sorted by tv_index
|
||||||
|
b.reserve(rows.size());
|
||||||
|
for (size_t i = 0; i < rows.size(); ++i)
|
||||||
|
b.push_back({rows[i].row_index, i});
|
||||||
|
std::sort(begin(b), end(b), [](auto lft, auto rgt) {
|
||||||
|
return std::tie(lft.row_index, lft.tv_index) < std::tie(rgt.row_index, rgt.tv_index);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Calculate the LCS of the two sequences
|
||||||
|
auto matches = LongestCommonSubsequenceCalculator(a, b, first_difference,
|
||||||
|
changeset.modifications).m_longest_matches;
|
||||||
|
|
||||||
|
// And then insert and delete rows as needed to align them
|
||||||
|
size_t i = first_difference, j = first_difference;
|
||||||
|
for (auto match : matches) {
|
||||||
|
for (; i < match.i; ++i)
|
||||||
|
changeset.deletions.add(a[i].tv_index);
|
||||||
|
for (; j < match.j; ++j)
|
||||||
|
changeset.insertions.add(rows[j].tv_index);
|
||||||
|
i += match.size;
|
||||||
|
j += match.size;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
} // Anonymous namespace
|
} // Anonymous namespace
|
||||||
|
|
||||||
CollectionChangeBuilder CollectionChangeBuilder::calculate(std::vector<size_t> const& prev_rows,
|
CollectionChangeBuilder CollectionChangeBuilder::calculate(std::vector<size_t> const& prev_rows,
|
||||||
@ -524,8 +575,14 @@ CollectionChangeBuilder CollectionChangeBuilder::calculate(std::vector<size_t> c
|
|||||||
return lft.row_index < rgt.row_index;
|
return lft.row_index < rgt.row_index;
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// Don't add rows which were modified to not match the query to `deletions`
|
||||||
|
// immediately because the unsorted move logic needs to be able to distinuish
|
||||||
|
// them from rows which were outright deleted
|
||||||
IndexSet removed;
|
IndexSet removed;
|
||||||
|
|
||||||
|
// Now that our old and new sets of rows are sorted by row index, we can
|
||||||
|
// iterate over them and either record old+new TV indices for rows present
|
||||||
|
// in both, or mark them as inserted/deleted if they appear only in one
|
||||||
size_t i = 0, j = 0;
|
size_t i = 0, j = 0;
|
||||||
while (i < old_rows.size() && j < new_rows.size()) {
|
while (i < old_rows.size() && j < new_rows.size()) {
|
||||||
auto old_index = old_rows[i];
|
auto old_index = old_rows[i];
|
||||||
@ -566,7 +623,7 @@ CollectionChangeBuilder CollectionChangeBuilder::calculate(std::vector<size_t> c
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (sort) {
|
if (sort) {
|
||||||
SortedMoveCalculator(new_rows, ret);
|
calculate_moves_sorted(new_rows, ret);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
calculate_moves_unsorted(new_rows, removed, ret);
|
calculate_moves_unsorted(new_rows, removed, ret);
|
||||||
|
@ -37,6 +37,8 @@ public:
|
|||||||
IndexSet modification = {},
|
IndexSet modification = {},
|
||||||
std::vector<Move> moves = {});
|
std::vector<Move> moves = {});
|
||||||
|
|
||||||
|
// Calculate where rows need to be inserted or deleted from old_rows to turn
|
||||||
|
// it into new_rows, and check all matching rows for modifications
|
||||||
static CollectionChangeBuilder calculate(std::vector<size_t> const& old_rows,
|
static CollectionChangeBuilder calculate(std::vector<size_t> const& old_rows,
|
||||||
std::vector<size_t> const& new_rows,
|
std::vector<size_t> const& new_rows,
|
||||||
std::function<bool (size_t)> row_did_change,
|
std::function<bool (size_t)> row_did_change,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user