trie: add difference iterator (#3637)

This PR implements a differenceIterator, which allows iterating over trie nodes
that exist in one trie but not in another. This is a prerequisite for most GC
strategies, in order to find obsolete nodes.
This commit is contained in:
Nick Johnson 2017-02-22 22:49:34 +00:00 committed by Felix Lange
parent 024d41d0c2
commit 555273495b
5 changed files with 327 additions and 130 deletions

View File

@ -31,15 +31,14 @@ import (
type NodeIterator struct { type NodeIterator struct {
state *StateDB // State being iterated state *StateDB // State being iterated
stateIt *trie.NodeIterator // Primary iterator for the global state trie stateIt trie.NodeIterator // Primary iterator for the global state trie
dataIt *trie.NodeIterator // Secondary iterator for the data trie of a contract dataIt trie.NodeIterator // Secondary iterator for the data trie of a contract
accountHash common.Hash // Hash of the node containing the account accountHash common.Hash // Hash of the node containing the account
codeHash common.Hash // Hash of the contract source code codeHash common.Hash // Hash of the contract source code
code []byte // Source code associated with a contract code []byte // Source code associated with a contract
Hash common.Hash // Hash of the current entry being iterated (nil if not standalone) Hash common.Hash // Hash of the current entry being iterated (nil if not standalone)
Entry interface{} // Current state entry being iterated (internal representation)
Parent common.Hash // Hash of the first full ancestor node (nil if current is the root) Parent common.Hash // Hash of the first full ancestor node (nil if current is the root)
Error error // Failure set in case of an internal error in the iterator Error error // Failure set in case of an internal error in the iterator
@ -80,9 +79,9 @@ func (it *NodeIterator) step() error {
} }
// If we had data nodes previously, we surely have at least state nodes // If we had data nodes previously, we surely have at least state nodes
if it.dataIt != nil { if it.dataIt != nil {
if cont := it.dataIt.Next(); !cont { if cont := it.dataIt.Next(true); !cont {
if it.dataIt.Error != nil { if it.dataIt.Error() != nil {
return it.dataIt.Error return it.dataIt.Error()
} }
it.dataIt = nil it.dataIt = nil
} }
@ -94,15 +93,15 @@ func (it *NodeIterator) step() error {
return nil return nil
} }
// Step to the next state trie node, terminating if we're out of nodes // Step to the next state trie node, terminating if we're out of nodes
if cont := it.stateIt.Next(); !cont { if cont := it.stateIt.Next(true); !cont {
if it.stateIt.Error != nil { if it.stateIt.Error() != nil {
return it.stateIt.Error return it.stateIt.Error()
} }
it.state, it.stateIt = nil, nil it.state, it.stateIt = nil, nil
return nil return nil
} }
// If the state trie node is an internal entry, leave as is // If the state trie node is an internal entry, leave as is
if !it.stateIt.Leaf { if !it.stateIt.Leaf() {
return nil return nil
} }
// Otherwise we've reached an account node, initiate data iteration // Otherwise we've reached an account node, initiate data iteration
@ -112,7 +111,7 @@ func (it *NodeIterator) step() error {
Root common.Hash Root common.Hash
CodeHash []byte CodeHash []byte
} }
if err := rlp.Decode(bytes.NewReader(it.stateIt.LeafBlob), &account); err != nil { if err := rlp.Decode(bytes.NewReader(it.stateIt.LeafBlob()), &account); err != nil {
return err return err
} }
dataTrie, err := trie.New(account.Root, it.state.db) dataTrie, err := trie.New(account.Root, it.state.db)
@ -120,7 +119,7 @@ func (it *NodeIterator) step() error {
return err return err
} }
it.dataIt = trie.NewNodeIterator(dataTrie) it.dataIt = trie.NewNodeIterator(dataTrie)
if !it.dataIt.Next() { if !it.dataIt.Next(true) {
it.dataIt = nil it.dataIt = nil
} }
if !bytes.Equal(account.CodeHash, emptyCodeHash) { if !bytes.Equal(account.CodeHash, emptyCodeHash) {
@ -130,7 +129,7 @@ func (it *NodeIterator) step() error {
return fmt.Errorf("code %x: %v", account.CodeHash, err) return fmt.Errorf("code %x: %v", account.CodeHash, err)
} }
} }
it.accountHash = it.stateIt.Parent it.accountHash = it.stateIt.Parent()
return nil return nil
} }
@ -138,7 +137,7 @@ func (it *NodeIterator) step() error {
// The method returns whether there are any more data left for inspection. // The method returns whether there are any more data left for inspection.
func (it *NodeIterator) retrieve() bool { func (it *NodeIterator) retrieve() bool {
// Clear out any previously set values // Clear out any previously set values
it.Hash, it.Entry = common.Hash{}, nil it.Hash = common.Hash{}
// If the iteration's done, return no available data // If the iteration's done, return no available data
if it.state == nil { if it.state == nil {
@ -147,14 +146,14 @@ func (it *NodeIterator) retrieve() bool {
// Otherwise retrieve the current entry // Otherwise retrieve the current entry
switch { switch {
case it.dataIt != nil: case it.dataIt != nil:
it.Hash, it.Entry, it.Parent = it.dataIt.Hash, it.dataIt.Node, it.dataIt.Parent it.Hash, it.Parent = it.dataIt.Hash(), it.dataIt.Parent()
if it.Parent == (common.Hash{}) { if it.Parent == (common.Hash{}) {
it.Parent = it.accountHash it.Parent = it.accountHash
} }
case it.code != nil: case it.code != nil:
it.Hash, it.Entry, it.Parent = it.codeHash, it.code, it.accountHash it.Hash, it.Parent = it.codeHash, it.accountHash
case it.stateIt != nil: case it.stateIt != nil:
it.Hash, it.Entry, it.Parent = it.stateIt.Hash, it.stateIt.Node, it.stateIt.Parent it.Hash, it.Parent = it.stateIt.Hash(), it.stateIt.Parent()
} }
return true return true
} }

View File

@ -16,13 +16,14 @@
package trie package trie
import "github.com/ethereum/go-ethereum/common" import (
"bytes"
"github.com/ethereum/go-ethereum/common"
)
// Iterator is a key-value trie iterator that traverses a Trie. // Iterator is a key-value trie iterator that traverses a Trie.
type Iterator struct { type Iterator struct {
trie *Trie nodeIt NodeIterator
nodeIt *NodeIterator
keyBuf []byte
Key []byte // Current data key on which the iterator is positioned on Key []byte // Current data key on which the iterator is positioned on
Value []byte // Current data value on which the iterator is positioned on Value []byte // Current data value on which the iterator is positioned on
@ -31,19 +32,23 @@ type Iterator struct {
// NewIterator creates a new key-value iterator. // NewIterator creates a new key-value iterator.
func NewIterator(trie *Trie) *Iterator { func NewIterator(trie *Trie) *Iterator {
return &Iterator{ return &Iterator{
trie: trie,
nodeIt: NewNodeIterator(trie), nodeIt: NewNodeIterator(trie),
keyBuf: make([]byte, 0, 64), }
Key: nil, }
// FromNodeIterator creates a new key-value iterator from a node iterator
func NewIteratorFromNodeIterator(it NodeIterator) *Iterator {
return &Iterator{
nodeIt: it,
} }
} }
// Next moves the iterator forward one key-value entry. // Next moves the iterator forward one key-value entry.
func (it *Iterator) Next() bool { func (it *Iterator) Next() bool {
for it.nodeIt.Next() { for it.nodeIt.Next(true) {
if it.nodeIt.Leaf { if it.nodeIt.Leaf() {
it.Key = it.makeKey() it.Key = decodeCompact(it.nodeIt.Path())
it.Value = it.nodeIt.LeafBlob it.Value = it.nodeIt.LeafBlob()
return true return true
} }
} }
@ -52,23 +57,25 @@ func (it *Iterator) Next() bool {
return false return false
} }
func (it *Iterator) makeKey() []byte { // NodeIterator is an iterator to traverse the trie pre-order.
key := it.keyBuf[:0] type NodeIterator interface {
for _, se := range it.nodeIt.stack { // Hash returns the hash of the current node
switch node := se.node.(type) { Hash() common.Hash
case *fullNode: // Parent returns the hash of the parent of the current node
if se.child <= 16 { Parent() common.Hash
key = append(key, byte(se.child)) // Leaf returns true iff the current node is a leaf node.
} Leaf() bool
case *shortNode: // LeafBlob returns the contents of the node, if it is a leaf.
if hasTerm(node.Key) { // Callers must not retain references to the return value after calling Next()
key = append(key, node.Key[:len(node.Key)-1]...) LeafBlob() []byte
} else { // Path returns the hex-encoded path to the current node.
key = append(key, node.Key...) // Callers must not retain references to the return value after calling Next()
} Path() []byte
} // Next moves the iterator to the next node. If the parameter is false, any child
} // nodes will be skipped.
return decodeCompact(key) Next(bool) bool
// Error returns the error status of the iterator.
Error() error
} }
// nodeIteratorState represents the iteration state at one particular node of the // nodeIteratorState represents the iteration state at one particular node of the
@ -78,48 +85,95 @@ type nodeIteratorState struct {
node node // Trie node being iterated node node // Trie node being iterated
parent common.Hash // Hash of the first full ancestor node (nil if current is the root) parent common.Hash // Hash of the first full ancestor node (nil if current is the root)
child int // Child to be processed next child int // Child to be processed next
pathlen int // Length of the path to this node
} }
// NodeIterator is an iterator to traverse the trie post-order. type nodeIterator struct {
type NodeIterator struct {
trie *Trie // Trie being iterated trie *Trie // Trie being iterated
stack []*nodeIteratorState // Hierarchy of trie nodes persisting the iteration state stack []*nodeIteratorState // Hierarchy of trie nodes persisting the iteration state
Hash common.Hash // Hash of the current node being iterated (nil if not standalone) err error // Failure set in case of an internal error in the iterator
Node node // Current node being iterated (internal representation)
Parent common.Hash // Hash of the first full ancestor node (nil if current is the root)
Leaf bool // Flag whether the current node is a value (data) node
LeafBlob []byte // Data blob contained within a leaf (otherwise nil)
Error error // Failure set in case of an internal error in the iterator path []byte // Path to the current node
} }
// NewNodeIterator creates an post-order trie iterator. // NewNodeIterator creates an post-order trie iterator.
func NewNodeIterator(trie *Trie) *NodeIterator { func NewNodeIterator(trie *Trie) NodeIterator {
if trie.Hash() == emptyState { if trie.Hash() == emptyState {
return new(NodeIterator) return new(nodeIterator)
} }
return &NodeIterator{trie: trie} return &nodeIterator{trie: trie}
}
// Hash returns the hash of the current node
func (it *nodeIterator) Hash() common.Hash {
if len(it.stack) == 0 {
return common.Hash{}
}
return it.stack[len(it.stack)-1].hash
}
// Parent returns the hash of the parent node
func (it *nodeIterator) Parent() common.Hash {
if len(it.stack) == 0 {
return common.Hash{}
}
return it.stack[len(it.stack)-1].parent
}
// Leaf returns true if the current node is a leaf
func (it *nodeIterator) Leaf() bool {
if len(it.stack) == 0 {
return false
}
_, ok := it.stack[len(it.stack)-1].node.(valueNode)
return ok
}
// LeafBlob returns the data for the current node, if it is a leaf
func (it *nodeIterator) LeafBlob() []byte {
if len(it.stack) == 0 {
return nil
}
if node, ok := it.stack[len(it.stack)-1].node.(valueNode); ok {
return []byte(node)
}
return nil
}
// Path returns the hex-encoded path to the current node
func (it *nodeIterator) Path() []byte {
return it.path
}
// Error returns the error set in case of an internal error in the iterator
func (it *nodeIterator) Error() error {
return it.err
} }
// Next moves the iterator to the next node, returning whether there are any // Next moves the iterator to the next node, returning whether there are any
// further nodes. In case of an internal error this method returns false and // further nodes. In case of an internal error this method returns false and
// sets the Error field to the encountered failure. // sets the Error field to the encountered failure. If `descend` is false,
func (it *NodeIterator) Next() bool { // skips iterating over any subnodes of the current node.
func (it *nodeIterator) Next(descend bool) bool {
// If the iterator failed previously, don't do anything // If the iterator failed previously, don't do anything
if it.Error != nil { if it.err != nil {
return false return false
} }
// Otherwise step forward with the iterator and report any errors // Otherwise step forward with the iterator and report any errors
if err := it.step(); err != nil { if err := it.step(descend); err != nil {
it.Error = err it.err = err
return false return false
} }
return it.retrieve() return it.trie != nil
} }
// step moves the iterator to the next node of the trie. // step moves the iterator to the next node of the trie.
func (it *NodeIterator) step() error { func (it *nodeIterator) step(descend bool) error {
if it.trie == nil { if it.trie == nil {
// Abort if we reached the end of the iteration // Abort if we reached the end of the iteration
return nil return nil
@ -132,57 +186,67 @@ func (it *NodeIterator) step() error {
state.hash = root state.hash = root
} }
it.stack = append(it.stack, state) it.stack = append(it.stack, state)
} else { return nil
// Continue iterating at the previous node otherwise. }
if !descend {
// If we're skipping children, pop the current node first
it.path = it.path[:it.stack[len(it.stack)-1].pathlen]
it.stack = it.stack[:len(it.stack)-1] it.stack = it.stack[:len(it.stack)-1]
}
// Continue iteration to the next child
outer:
for {
if len(it.stack) == 0 { if len(it.stack) == 0 {
it.trie = nil it.trie = nil
return nil return nil
} }
}
// Continue iteration to the next child
for {
parent := it.stack[len(it.stack)-1] parent := it.stack[len(it.stack)-1]
ancestor := parent.hash ancestor := parent.hash
if (ancestor == common.Hash{}) { if (ancestor == common.Hash{}) {
ancestor = parent.parent ancestor = parent.parent
} }
if node, ok := parent.node.(*fullNode); ok { if node, ok := parent.node.(*fullNode); ok {
// Full node, traverse all children, then the node itself // Full node, iterate over children
if parent.child >= len(node.Children) {
break
}
for parent.child++; parent.child < len(node.Children); parent.child++ { for parent.child++; parent.child < len(node.Children); parent.child++ {
if current := node.Children[parent.child]; current != nil { child := node.Children[parent.child]
if child != nil {
hash, _ := child.cache()
it.stack = append(it.stack, &nodeIteratorState{ it.stack = append(it.stack, &nodeIteratorState{
hash: common.BytesToHash(node.flags.hash), hash: common.BytesToHash(hash),
node: current, node: child,
parent: ancestor, parent: ancestor,
child: -1, child: -1,
pathlen: len(it.path),
}) })
break it.path = append(it.path, byte(parent.child))
break outer
} }
} }
} else if node, ok := parent.node.(*shortNode); ok { } else if node, ok := parent.node.(*shortNode); ok {
// Short node, traverse the pointer singleton child, then the node itself // Short node, return the pointer singleton child
if parent.child >= 0 { if parent.child < 0 {
break
}
parent.child++ parent.child++
hash, _ := node.Val.cache()
it.stack = append(it.stack, &nodeIteratorState{ it.stack = append(it.stack, &nodeIteratorState{
hash: common.BytesToHash(node.flags.hash), hash: common.BytesToHash(hash),
node: node.Val, node: node.Val,
parent: ancestor, parent: ancestor,
child: -1, child: -1,
pathlen: len(it.path),
}) })
} else if hash, ok := parent.node.(hashNode); ok { if hasTerm(node.Key) {
// Hash node, resolve the hash child from the database, then the node itself it.path = append(it.path, node.Key[:len(node.Key)-1]...)
if parent.child >= 0 { } else {
it.path = append(it.path, node.Key...)
}
break break
} }
} else if hash, ok := parent.node.(hashNode); ok {
// Hash node, resolve the hash child from the database
if parent.child < 0 {
parent.child++ parent.child++
node, err := it.trie.resolveHash(hash, nil, nil) node, err := it.trie.resolveHash(hash, nil, nil)
if err != nil { if err != nil {
return err return err
@ -192,33 +256,110 @@ func (it *NodeIterator) step() error {
node: node, node: node,
parent: ancestor, parent: ancestor,
child: -1, child: -1,
pathlen: len(it.path),
}) })
} else {
break break
} }
} }
it.path = it.path[:parent.pathlen]
it.stack = it.stack[:len(it.stack)-1]
}
return nil return nil
} }
// retrieve pulls and caches the current trie node the iterator is traversing. type differenceIterator struct {
// In case of a value node, the additional leaf blob is also populated with the a, b NodeIterator // Nodes returned are those in b - a.
// data contents for external interpretation. eof bool // Indicates a has run out of elements
// count int // Number of nodes scanned on either trie
// The method returns whether there are any more data left for inspection. }
func (it *NodeIterator) retrieve() bool {
// Clear out any previously set values
it.Hash, it.Node, it.Parent, it.Leaf, it.LeafBlob = common.Hash{}, nil, common.Hash{}, false, nil
// If the iteration's done, return no available data // NewDifferenceIterator constructs a NodeIterator that iterates over elements in b that
if it.trie == nil { // are not in a. Returns the iterator, and a pointer to an integer recording the number
// of nodes seen.
func NewDifferenceIterator(a, b NodeIterator) (NodeIterator, *int) {
a.Next(true)
it := &differenceIterator{
a: a,
b: b,
}
return it, &it.count
}
func (it *differenceIterator) Hash() common.Hash {
return it.b.Hash()
}
func (it *differenceIterator) Parent() common.Hash {
return it.b.Parent()
}
func (it *differenceIterator) Leaf() bool {
return it.b.Leaf()
}
func (it *differenceIterator) LeafBlob() []byte {
return it.b.LeafBlob()
}
func (it *differenceIterator) Path() []byte {
return it.b.Path()
}
func (it *differenceIterator) Next(bool) bool {
// Invariants:
// - We always advance at least one element in b.
// - At the start of this function, a's path is lexically greater than b's.
if !it.b.Next(true) {
return false return false
} }
// Otherwise retrieve the current node and resolve leaf accessors it.count += 1
state := it.stack[len(it.stack)-1]
it.Hash, it.Node, it.Parent = state.hash, state.node, state.parent if it.eof {
if value, ok := it.Node.(valueNode); ok { // a has reached eof, so we just return all elements from b
it.Leaf, it.LeafBlob = true, []byte(value)
}
return true return true
} }
for {
apath, bpath := it.a.Path(), it.b.Path()
switch bytes.Compare(apath, bpath) {
case -1:
// b jumped past a; advance a
if !it.a.Next(true) {
it.eof = true
return true
}
it.count += 1
case 1:
// b is before a
return true
case 0:
if it.a.Hash() != it.b.Hash() || it.a.Leaf() != it.b.Leaf() {
// Keys are identical, but hashes or leaf status differs
return true
}
if it.a.Leaf() && it.b.Leaf() && !bytes.Equal(it.a.LeafBlob(), it.b.LeafBlob()) {
// Both are leaf nodes, but with different values
return true
}
// a and b are identical; skip this whole subtree if the nodes have hashes
hasHash := it.a.Hash() == common.Hash{}
if !it.b.Next(hasHash) {
return false
}
it.count += 1
if !it.a.Next(hasHash) {
it.eof = true
return true
}
it.count += 1
}
}
}
func (it *differenceIterator) Error() error {
if err := it.a.Error(); err != nil {
return err
}
return it.b.Error()
}

View File

@ -99,9 +99,9 @@ func TestNodeIteratorCoverage(t *testing.T) {
// Gather all the node hashes found by the iterator // Gather all the node hashes found by the iterator
hashes := make(map[common.Hash]struct{}) hashes := make(map[common.Hash]struct{})
for it := NewNodeIterator(trie); it.Next(); { for it := NewNodeIterator(trie); it.Next(true); {
if it.Hash != (common.Hash{}) { if it.Hash() != (common.Hash{}) {
hashes[it.Hash] = struct{}{} hashes[it.Hash()] = struct{}{}
} }
} }
// Cross check the hashes and the database itself // Cross check the hashes and the database itself
@ -116,3 +116,60 @@ func TestNodeIteratorCoverage(t *testing.T) {
} }
} }
} }
func TestDifferenceIterator(t *testing.T) {
triea := newEmpty()
valsa := []struct{ k, v string }{
{"bar", "b"},
{"barb", "ba"},
{"bars", "bb"},
{"bard", "bc"},
{"fab", "z"},
{"foo", "a"},
{"food", "ab"},
{"foos", "aa"},
}
for _, val := range valsa {
triea.Update([]byte(val.k), []byte(val.v))
}
triea.Commit()
trieb := newEmpty()
valsb := []struct{ k, v string }{
{"aardvark", "c"},
{"bar", "b"},
{"barb", "bd"},
{"bars", "be"},
{"fab", "z"},
{"foo", "a"},
{"foos", "aa"},
{"food", "ab"},
{"jars", "d"},
}
for _, val := range valsb {
trieb.Update([]byte(val.k), []byte(val.v))
}
trieb.Commit()
found := make(map[string]string)
di, _ := NewDifferenceIterator(NewNodeIterator(triea), NewNodeIterator(trieb))
it := NewIteratorFromNodeIterator(di)
for it.Next() {
found[string(it.Key)] = string(it.Value)
}
all := []struct{ k, v string }{
{"aardvark", "c"},
{"barb", "bd"},
{"bars", "be"},
{"jars", "d"},
}
for _, item := range all {
if found[item.k] != item.v {
t.Errorf("iterator value mismatch for %s: got %q want %q", item.k, found[item.k], item.v)
}
}
if len(found) != len(all) {
t.Errorf("iterator count mismatch: got %d values, want %d", len(found), len(all))
}
}

View File

@ -159,7 +159,7 @@ func (t *SecureTrie) Iterator() *Iterator {
return t.trie.Iterator() return t.trie.Iterator()
} }
func (t *SecureTrie) NodeIterator() *NodeIterator { func (t *SecureTrie) NodeIterator() NodeIterator {
return NewNodeIterator(&t.trie) return NewNodeIterator(&t.trie)
} }

View File

@ -81,9 +81,9 @@ func checkTrieConsistency(db Database, root common.Hash) error {
return nil // // Consider a non existent state consistent return nil // // Consider a non existent state consistent
} }
it := NewNodeIterator(trie) it := NewNodeIterator(trie)
for it.Next() { for it.Next(true) {
} }
return it.Error return it.Error()
} }
// Tests that an empty trie is not scheduled for syncing. // Tests that an empty trie is not scheduled for syncing.