2018-02-14 13:49:11 +01:00
// Copyright 2018 The go-ethereum Authors
2018-02-05 18:40:32 +02:00
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
package trie
import (
2019-03-22 16:13:28 +01:00
"encoding/binary"
2018-06-21 12:28:05 +03:00
"fmt"
"io"
2018-02-05 18:40:32 +02:00
"sync"
"time"
2018-11-12 18:47:34 +02:00
"github.com/allegro/bigcache"
2018-02-05 18:40:32 +02:00
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/ethdb"
"github.com/ethereum/go-ethereum/log"
2018-06-04 10:47:43 +03:00
"github.com/ethereum/go-ethereum/metrics"
2018-06-21 12:28:05 +03:00
"github.com/ethereum/go-ethereum/rlp"
2018-06-04 10:47:43 +03:00
)
var (
2018-11-12 18:47:34 +02:00
memcacheCleanHitMeter = metrics . NewRegisteredMeter ( "trie/memcache/clean/hit" , nil )
memcacheCleanMissMeter = metrics . NewRegisteredMeter ( "trie/memcache/clean/miss" , nil )
memcacheCleanReadMeter = metrics . NewRegisteredMeter ( "trie/memcache/clean/read" , nil )
memcacheCleanWriteMeter = metrics . NewRegisteredMeter ( "trie/memcache/clean/write" , nil )
2018-06-04 10:47:43 +03:00
memcacheFlushTimeTimer = metrics . NewRegisteredResettingTimer ( "trie/memcache/flush/time" , nil )
memcacheFlushNodesMeter = metrics . NewRegisteredMeter ( "trie/memcache/flush/nodes" , nil )
memcacheFlushSizeMeter = metrics . NewRegisteredMeter ( "trie/memcache/flush/size" , nil )
memcacheGCTimeTimer = metrics . NewRegisteredResettingTimer ( "trie/memcache/gc/time" , nil )
memcacheGCNodesMeter = metrics . NewRegisteredMeter ( "trie/memcache/gc/nodes" , nil )
memcacheGCSizeMeter = metrics . NewRegisteredMeter ( "trie/memcache/gc/size" , nil )
memcacheCommitTimeTimer = metrics . NewRegisteredResettingTimer ( "trie/memcache/commit/time" , nil )
memcacheCommitNodesMeter = metrics . NewRegisteredMeter ( "trie/memcache/commit/nodes" , nil )
memcacheCommitSizeMeter = metrics . NewRegisteredMeter ( "trie/memcache/commit/size" , nil )
2018-02-05 18:40:32 +02:00
)
// secureKeyPrefix is the database key prefix used to store trie node preimages.
var secureKeyPrefix = [ ] byte ( "secure-key-" )
// secureKeyLength is the length of the above prefix + 32byte hash.
const secureKeyLength = 11 + 32
// Database is an intermediate write layer between the trie data structures and
// the disk database. The aim is to accumulate trie writes in-memory and only
// periodically flush a couple tries to disk, garbage collecting the remainder.
2019-03-26 15:48:31 +01:00
//
// Note, the trie Database is **not** thread safe in its mutations, but it **is**
// thread safe in providing individual, independent node access. The rationale
// behind this split design is to provide read access to RPC handlers and sync
// servers even while the trie is executing expensive garbage collection.
2018-02-05 18:40:32 +02:00
type Database struct {
2018-09-24 15:57:49 +03:00
diskdb ethdb . KeyValueStore // Persistent storage for matured trie nodes
2018-02-05 18:40:32 +02:00
2018-11-12 18:47:34 +02:00
cleans * bigcache . BigCache // GC friendly memory cache of clean node RLPs
dirties map [ common . Hash ] * cachedNode // Data and references relationships of dirty nodes
oldest common . Hash // Oldest tracked node, flush-list head
newest common . Hash // Newest tracked node, flush-list tail
2018-06-04 10:47:43 +03:00
preimages map [ common . Hash ] [ ] byte // Preimages of nodes from the secure trie
seckeybuf [ secureKeyLength ] byte // Ephemeral buffer for calculating preimage keys
2018-02-05 18:40:32 +02:00
gctime time . Duration // Time spent on garbage collection since last commit
gcnodes uint64 // Nodes garbage collected since last commit
gcsize common . StorageSize // Data storage garbage collected since last commit
2018-06-04 10:47:43 +03:00
flushtime time . Duration // Time spent on data flushing since last commit
flushnodes uint64 // Nodes flushed since last commit
flushsize common . StorageSize // Data storage flushed since last commit
2018-11-12 18:47:34 +02:00
dirtiesSize common . StorageSize // Storage size of the dirty node cache (exc. flushlist)
2018-02-05 18:40:32 +02:00
preimagesSize common . StorageSize // Storage size of the preimages cache
lock sync . RWMutex
}
2018-06-21 12:28:05 +03:00
// rawNode is a simple binary blob used to differentiate between collapsed trie
// nodes and already encoded RLP binary blobs (while at the same time store them
// in the same cache fields).
type rawNode [ ] byte
func ( n rawNode ) canUnload ( uint16 , uint16 ) bool { panic ( "this should never end up in a live trie" ) }
func ( n rawNode ) cache ( ) ( hashNode , bool ) { panic ( "this should never end up in a live trie" ) }
func ( n rawNode ) fstring ( ind string ) string { panic ( "this should never end up in a live trie" ) }
// rawFullNode represents only the useful data content of a full node, with the
// caches and flags stripped out to minimize its data storage. This type honors
// the same RLP encoding as the original parent.
type rawFullNode [ 17 ] node
func ( n rawFullNode ) canUnload ( uint16 , uint16 ) bool { panic ( "this should never end up in a live trie" ) }
func ( n rawFullNode ) cache ( ) ( hashNode , bool ) { panic ( "this should never end up in a live trie" ) }
func ( n rawFullNode ) fstring ( ind string ) string { panic ( "this should never end up in a live trie" ) }
func ( n rawFullNode ) EncodeRLP ( w io . Writer ) error {
var nodes [ 17 ] node
for i , child := range n {
if child != nil {
nodes [ i ] = child
} else {
nodes [ i ] = nilValueNode
}
}
return rlp . Encode ( w , nodes )
}
// rawShortNode represents only the useful data content of a short node, with the
// caches and flags stripped out to minimize its data storage. This type honors
// the same RLP encoding as the original parent.
type rawShortNode struct {
Key [ ] byte
Val node
}
func ( n rawShortNode ) canUnload ( uint16 , uint16 ) bool { panic ( "this should never end up in a live trie" ) }
func ( n rawShortNode ) cache ( ) ( hashNode , bool ) { panic ( "this should never end up in a live trie" ) }
func ( n rawShortNode ) fstring ( ind string ) string { panic ( "this should never end up in a live trie" ) }
2018-02-05 18:40:32 +02:00
// cachedNode is all the information we know about a single cached node in the
// memory database write layer.
type cachedNode struct {
2018-06-21 12:28:05 +03:00
node node // Cached collapsed trie node, or raw rlp data
size uint16 // Byte size of the useful cached data
2018-11-22 14:14:31 +01:00
parents uint32 // Number of live nodes referencing this one
2018-06-21 12:28:05 +03:00
children map [ common . Hash ] uint16 // External children referenced by this node
2018-06-04 10:47:43 +03:00
flushPrev common . Hash // Previous node in the flush-list
flushNext common . Hash // Next node in the flush-list
2018-02-05 18:40:32 +02:00
}
2018-06-21 12:28:05 +03:00
// rlp returns the raw rlp encoded blob of the cached node, either directly from
// the cache, or by regenerating it from the collapsed node.
func ( n * cachedNode ) rlp ( ) [ ] byte {
if node , ok := n . node . ( rawNode ) ; ok {
return node
}
blob , err := rlp . EncodeToBytes ( n . node )
if err != nil {
panic ( err )
}
return blob
}
// obj returns the decoded and expanded trie node, either directly from the cache,
// or by regenerating it from the rlp encoded blob.
2019-03-14 15:25:12 +02:00
func ( n * cachedNode ) obj ( hash common . Hash ) node {
2018-06-21 12:28:05 +03:00
if node , ok := n . node . ( rawNode ) ; ok {
2019-03-14 15:25:12 +02:00
return mustDecodeNode ( hash [ : ] , node )
2018-06-21 12:28:05 +03:00
}
2019-03-14 15:25:12 +02:00
return expandNode ( hash [ : ] , n . node )
2018-06-21 12:28:05 +03:00
}
// childs returns all the tracked children of this node, both the implicit ones
// from inside the node as well as the explicit ones from outside the node.
func ( n * cachedNode ) childs ( ) [ ] common . Hash {
children := make ( [ ] common . Hash , 0 , 16 )
for child := range n . children {
children = append ( children , child )
}
if _ , ok := n . node . ( rawNode ) ; ! ok {
gatherChildren ( n . node , & children )
}
return children
}
// gatherChildren traverses the node hierarchy of a collapsed storage node and
// retrieves all the hashnode children.
func gatherChildren ( n node , children * [ ] common . Hash ) {
switch n := n . ( type ) {
case * rawShortNode :
gatherChildren ( n . Val , children )
case rawFullNode :
for i := 0 ; i < 16 ; i ++ {
gatherChildren ( n [ i ] , children )
}
case hashNode :
* children = append ( * children , common . BytesToHash ( n ) )
case valueNode , nil :
default :
panic ( fmt . Sprintf ( "unknown node type: %T" , n ) )
}
}
// simplifyNode traverses the hierarchy of an expanded memory node and discards
// all the internal caches, returning a node that only contains the raw data.
func simplifyNode ( n node ) node {
switch n := n . ( type ) {
case * shortNode :
// Short nodes discard the flags and cascade
return & rawShortNode { Key : n . Key , Val : simplifyNode ( n . Val ) }
case * fullNode :
// Full nodes discard the flags and cascade
node := rawFullNode ( n . Children )
for i := 0 ; i < len ( node ) ; i ++ {
if node [ i ] != nil {
node [ i ] = simplifyNode ( node [ i ] )
}
}
return node
case valueNode , hashNode , rawNode :
return n
default :
panic ( fmt . Sprintf ( "unknown node type: %T" , n ) )
}
}
// expandNode traverses the node hierarchy of a collapsed storage node and converts
// all fields and keys into expanded memory form.
2019-03-14 15:25:12 +02:00
func expandNode ( hash hashNode , n node ) node {
2018-06-21 12:28:05 +03:00
switch n := n . ( type ) {
case * rawShortNode :
// Short nodes need key and child expansion
return & shortNode {
Key : compactToHex ( n . Key ) ,
2019-03-14 15:25:12 +02:00
Val : expandNode ( nil , n . Val ) ,
2018-06-21 12:28:05 +03:00
flags : nodeFlag {
hash : hash ,
} ,
}
case rawFullNode :
// Full nodes need child expansion
node := & fullNode {
flags : nodeFlag {
hash : hash ,
} ,
}
for i := 0 ; i < len ( node . Children ) ; i ++ {
if n [ i ] != nil {
2019-03-14 15:25:12 +02:00
node . Children [ i ] = expandNode ( nil , n [ i ] )
2018-06-21 12:28:05 +03:00
}
}
return node
case valueNode , hashNode :
return n
default :
panic ( fmt . Sprintf ( "unknown node type: %T" , n ) )
}
}
2019-03-22 16:13:28 +01:00
// trienodeHasher is a struct to be used with BigCache, which uses a Hasher to
// determine which shard to place an entry into. It's not a cryptographic hash,
// just to provide a bit of anti-collision (default is FNV64a).
//
// Since trie keys are already hashes, we can just use the key directly to
// map shard id.
type trienodeHasher struct { }
// Sum64 implements the bigcache.Hasher interface.
func ( t trienodeHasher ) Sum64 ( key string ) uint64 {
return binary . BigEndian . Uint64 ( [ ] byte ( key ) )
}
2018-02-05 18:40:32 +02:00
// NewDatabase creates a new trie database to store ephemeral trie content before
2018-11-12 18:47:34 +02:00
// its written out to disk or garbage collected. No read cache is created, so all
// data retrievals will hit the underlying disk database.
2018-09-24 15:57:49 +03:00
func NewDatabase ( diskdb ethdb . KeyValueStore ) * Database {
2018-11-12 18:47:34 +02:00
return NewDatabaseWithCache ( diskdb , 0 )
}
// NewDatabaseWithCache creates a new trie database to store ephemeral trie content
// before its written out to disk or garbage collected. It also acts as a read cache
// for nodes loaded from disk.
2018-09-24 15:57:49 +03:00
func NewDatabaseWithCache ( diskdb ethdb . KeyValueStore , cache int ) * Database {
2018-11-12 18:47:34 +02:00
var cleans * bigcache . BigCache
if cache > 0 {
cleans , _ = bigcache . NewBigCache ( bigcache . Config {
Shards : 1024 ,
LifeWindow : time . Hour ,
MaxEntriesInWindow : cache * 1024 ,
MaxEntrySize : 512 ,
HardMaxCacheSize : cache ,
2019-03-22 16:13:28 +01:00
Hasher : trienodeHasher { } ,
2018-11-12 18:47:34 +02:00
} )
}
2018-02-05 18:40:32 +02:00
return & Database {
2018-06-21 12:28:05 +03:00
diskdb : diskdb ,
2018-11-12 18:47:34 +02:00
cleans : cleans ,
dirties : map [ common . Hash ] * cachedNode { { } : { } } ,
2018-02-05 18:40:32 +02:00
preimages : make ( map [ common . Hash ] [ ] byte ) ,
}
}
// DiskDB retrieves the persistent storage backing the trie database.
2018-09-24 15:57:49 +03:00
func ( db * Database ) DiskDB ( ) ethdb . Reader {
2018-02-05 18:40:32 +02:00
return db . diskdb
}
2018-06-21 12:28:05 +03:00
// InsertBlob writes a new reference tracked blob to the memory database if it's
// yet unknown. This method should only be used for non-trie nodes that require
// reference counting, since trie nodes are garbage collected directly through
// their embedded children.
func ( db * Database ) InsertBlob ( hash common . Hash , blob [ ] byte ) {
2018-02-05 18:40:32 +02:00
db . lock . Lock ( )
defer db . lock . Unlock ( )
2018-06-21 12:28:05 +03:00
db . insert ( hash , blob , rawNode ( blob ) )
2018-02-05 18:40:32 +02:00
}
2018-06-21 12:28:05 +03:00
// insert inserts a collapsed trie node into the memory database. This method is
// a more generic version of InsertBlob, supporting both raw blob insertions as
// well ex trie node insertions. The blob must always be specified to allow proper
// size tracking.
func ( db * Database ) insert ( hash common . Hash , blob [ ] byte , node node ) {
2018-06-04 10:47:43 +03:00
// If the node's already cached, skip
2018-11-12 18:47:34 +02:00
if _ , ok := db . dirties [ hash ] ; ok {
2018-02-05 18:40:32 +02:00
return
}
2018-06-21 12:28:05 +03:00
// Create the cached entry for this node
entry := & cachedNode {
node : simplifyNode ( node ) ,
size : uint16 ( len ( blob ) ) ,
2018-06-04 10:47:43 +03:00
flushPrev : db . newest ,
}
2018-06-21 12:28:05 +03:00
for _ , child := range entry . childs ( ) {
2018-11-12 18:47:34 +02:00
if c := db . dirties [ child ] ; c != nil {
2018-06-21 12:28:05 +03:00
c . parents ++
}
}
2018-11-12 18:47:34 +02:00
db . dirties [ hash ] = entry
2018-06-21 12:28:05 +03:00
2018-06-04 10:47:43 +03:00
// Update the flush-list endpoints
if db . oldest == ( common . Hash { } ) {
db . oldest , db . newest = hash , hash
} else {
2018-11-12 18:47:34 +02:00
db . dirties [ db . newest ] . flushNext , db . newest = hash , hash
2018-02-05 18:40:32 +02:00
}
2018-11-12 18:47:34 +02:00
db . dirtiesSize += common . StorageSize ( common . HashLength + entry . size )
2018-02-05 18:40:32 +02:00
}
// insertPreimage writes a new trie node pre-image to the memory database if it's
// yet unknown. The method will make a copy of the slice.
//
// Note, this method assumes that the database's lock is held!
func ( db * Database ) insertPreimage ( hash common . Hash , preimage [ ] byte ) {
if _ , ok := db . preimages [ hash ] ; ok {
return
}
db . preimages [ hash ] = common . CopyBytes ( preimage )
db . preimagesSize += common . StorageSize ( common . HashLength + len ( preimage ) )
}
2018-06-21 12:28:05 +03:00
// node retrieves a cached trie node from memory, or returns nil if none can be
// found in the memory cache.
2019-03-14 15:25:12 +02:00
func ( db * Database ) node ( hash common . Hash ) node {
2018-11-12 18:47:34 +02:00
// Retrieve the node from the clean cache if available
if db . cleans != nil {
if enc , err := db . cleans . Get ( string ( hash [ : ] ) ) ; err == nil && enc != nil {
memcacheCleanHitMeter . Mark ( 1 )
memcacheCleanReadMeter . Mark ( int64 ( len ( enc ) ) )
2019-03-14 15:25:12 +02:00
return mustDecodeNode ( hash [ : ] , enc )
2018-11-12 18:47:34 +02:00
}
}
// Retrieve the node from the dirty cache if available
2018-06-21 12:28:05 +03:00
db . lock . RLock ( )
2018-11-12 18:47:34 +02:00
dirty := db . dirties [ hash ]
2018-06-21 12:28:05 +03:00
db . lock . RUnlock ( )
2018-11-12 18:47:34 +02:00
if dirty != nil {
2019-03-14 15:25:12 +02:00
return dirty . obj ( hash )
2018-06-21 12:28:05 +03:00
}
// Content unavailable in memory, attempt to retrieve from disk
enc , err := db . diskdb . Get ( hash [ : ] )
if err != nil || enc == nil {
return nil
}
2018-11-12 18:47:34 +02:00
if db . cleans != nil {
db . cleans . Set ( string ( hash [ : ] ) , enc )
memcacheCleanMissMeter . Mark ( 1 )
memcacheCleanWriteMeter . Mark ( int64 ( len ( enc ) ) )
}
2019-03-14 15:25:12 +02:00
return mustDecodeNode ( hash [ : ] , enc )
2018-06-21 12:28:05 +03:00
}
// Node retrieves an encoded cached trie node from memory. If it cannot be found
// cached, the method queries the persistent database for the content.
2018-02-05 18:40:32 +02:00
func ( db * Database ) Node ( hash common . Hash ) ( [ ] byte , error ) {
2018-11-12 18:47:34 +02:00
// Retrieve the node from the clean cache if available
if db . cleans != nil {
if enc , err := db . cleans . Get ( string ( hash [ : ] ) ) ; err == nil && enc != nil {
memcacheCleanHitMeter . Mark ( 1 )
memcacheCleanReadMeter . Mark ( int64 ( len ( enc ) ) )
return enc , nil
}
}
// Retrieve the node from the dirty cache if available
2018-02-05 18:40:32 +02:00
db . lock . RLock ( )
2018-11-12 18:47:34 +02:00
dirty := db . dirties [ hash ]
2018-02-05 18:40:32 +02:00
db . lock . RUnlock ( )
2018-11-12 18:47:34 +02:00
if dirty != nil {
return dirty . rlp ( ) , nil
2018-02-05 18:40:32 +02:00
}
// Content unavailable in memory, attempt to retrieve from disk
2018-11-12 18:47:34 +02:00
enc , err := db . diskdb . Get ( hash [ : ] )
if err == nil && enc != nil {
if db . cleans != nil {
db . cleans . Set ( string ( hash [ : ] ) , enc )
memcacheCleanMissMeter . Mark ( 1 )
memcacheCleanWriteMeter . Mark ( int64 ( len ( enc ) ) )
}
}
return enc , err
2018-02-05 18:40:32 +02:00
}
// preimage retrieves a cached trie node pre-image from memory. If it cannot be
// found cached, the method queries the persistent database for the content.
func ( db * Database ) preimage ( hash common . Hash ) ( [ ] byte , error ) {
// Retrieve the node from cache if available
db . lock . RLock ( )
preimage := db . preimages [ hash ]
db . lock . RUnlock ( )
if preimage != nil {
return preimage , nil
}
// Content unavailable in memory, attempt to retrieve from disk
return db . diskdb . Get ( db . secureKey ( hash [ : ] ) )
}
// secureKey returns the database key for the preimage of key, as an ephemeral
// buffer. The caller must not hold onto the return value because it will become
// invalid on the next call.
func ( db * Database ) secureKey ( key [ ] byte ) [ ] byte {
buf := append ( db . seckeybuf [ : 0 ] , secureKeyPrefix ... )
buf = append ( buf , key ... )
return buf
}
// Nodes retrieves the hashes of all the nodes cached within the memory database.
// This method is extremely expensive and should only be used to validate internal
// states in test code.
func ( db * Database ) Nodes ( ) [ ] common . Hash {
db . lock . RLock ( )
defer db . lock . RUnlock ( )
2018-11-12 18:47:34 +02:00
var hashes = make ( [ ] common . Hash , 0 , len ( db . dirties ) )
for hash := range db . dirties {
2018-02-05 18:40:32 +02:00
if hash != ( common . Hash { } ) { // Special case for "root" references/nodes
hashes = append ( hashes , hash )
}
}
return hashes
}
// Reference adds a new reference from a parent node to a child node.
func ( db * Database ) Reference ( child common . Hash , parent common . Hash ) {
2019-03-26 15:48:31 +01:00
db . lock . Lock ( )
defer db . lock . Unlock ( )
2018-02-05 18:40:32 +02:00
db . reference ( child , parent )
}
// reference is the private locked version of Reference.
func ( db * Database ) reference ( child common . Hash , parent common . Hash ) {
// If the node does not exist, it's a node pulled from disk, skip
2018-11-12 18:47:34 +02:00
node , ok := db . dirties [ child ]
2018-02-05 18:40:32 +02:00
if ! ok {
return
}
// If the reference already exists, only duplicate for roots
2018-11-12 18:47:34 +02:00
if db . dirties [ parent ] . children == nil {
db . dirties [ parent ] . children = make ( map [ common . Hash ] uint16 )
} else if _ , ok = db . dirties [ parent ] . children [ child ] ; ok && parent != ( common . Hash { } ) {
2018-02-05 18:40:32 +02:00
return
}
node . parents ++
2018-11-12 18:47:34 +02:00
db . dirties [ parent ] . children [ child ] ++
2018-02-05 18:40:32 +02:00
}
2018-06-21 12:28:05 +03:00
// Dereference removes an existing reference from a root node.
func ( db * Database ) Dereference ( root common . Hash ) {
2018-08-08 17:16:38 +03:00
// Sanity check to ensure that the meta-root is not removed
if root == ( common . Hash { } ) {
log . Error ( "Attempted to dereference the trie cache meta root" )
return
}
2018-02-05 18:40:32 +02:00
db . lock . Lock ( )
defer db . lock . Unlock ( )
2018-11-12 18:47:34 +02:00
nodes , storage , start := len ( db . dirties ) , db . dirtiesSize , time . Now ( )
2018-06-21 12:28:05 +03:00
db . dereference ( root , common . Hash { } )
2018-02-05 18:40:32 +02:00
2018-11-12 18:47:34 +02:00
db . gcnodes += uint64 ( nodes - len ( db . dirties ) )
db . gcsize += storage - db . dirtiesSize
2018-02-05 18:40:32 +02:00
db . gctime += time . Since ( start )
2018-06-04 10:47:43 +03:00
memcacheGCTimeTimer . Update ( time . Since ( start ) )
2018-11-12 18:47:34 +02:00
memcacheGCSizeMeter . Mark ( int64 ( storage - db . dirtiesSize ) )
memcacheGCNodesMeter . Mark ( int64 ( nodes - len ( db . dirties ) ) )
2018-06-04 10:47:43 +03:00
2018-11-12 18:47:34 +02:00
log . Debug ( "Dereferenced trie from memory database" , "nodes" , nodes - len ( db . dirties ) , "size" , storage - db . dirtiesSize , "time" , time . Since ( start ) ,
"gcnodes" , db . gcnodes , "gcsize" , db . gcsize , "gctime" , db . gctime , "livenodes" , len ( db . dirties ) , "livesize" , db . dirtiesSize )
2018-02-05 18:40:32 +02:00
}
// dereference is the private locked version of Dereference.
func ( db * Database ) dereference ( child common . Hash , parent common . Hash ) {
// Dereference the parent-child
2018-11-12 18:47:34 +02:00
node := db . dirties [ parent ]
2018-02-05 18:40:32 +02:00
2018-06-21 12:28:05 +03:00
if node . children != nil && node . children [ child ] > 0 {
node . children [ child ] --
if node . children [ child ] == 0 {
delete ( node . children , child )
}
2018-02-05 18:40:32 +02:00
}
2018-06-04 10:47:43 +03:00
// If the child does not exist, it's a previously committed node.
2018-11-12 18:47:34 +02:00
node , ok := db . dirties [ child ]
2018-02-05 18:40:32 +02:00
if ! ok {
return
}
// If there are no more references to the child, delete it and cascade
2018-07-02 12:19:41 +03:00
if node . parents > 0 {
// This is a special cornercase where a node loaded from disk (i.e. not in the
// memcache any more) gets reinjected as a new node (short node split into full,
// then reverted into short), causing a cached node to have no parents. That is
// no problem in itself, but don't make maxint parents out of it.
node . parents --
}
2018-02-05 18:40:32 +02:00
if node . parents == 0 {
2018-06-04 10:47:43 +03:00
// Remove the node from the flush-list
2018-07-30 16:31:17 +03:00
switch child {
case db . oldest :
2018-06-04 10:47:43 +03:00
db . oldest = node . flushNext
2018-11-12 18:47:34 +02:00
db . dirties [ node . flushNext ] . flushPrev = common . Hash { }
2018-07-30 16:31:17 +03:00
case db . newest :
db . newest = node . flushPrev
2018-11-12 18:47:34 +02:00
db . dirties [ node . flushPrev ] . flushNext = common . Hash { }
2018-07-30 16:31:17 +03:00
default :
2018-11-12 18:47:34 +02:00
db . dirties [ node . flushPrev ] . flushNext = node . flushNext
db . dirties [ node . flushNext ] . flushPrev = node . flushPrev
2018-06-04 10:47:43 +03:00
}
// Dereference all children and delete the node
2018-06-21 12:28:05 +03:00
for _ , hash := range node . childs ( ) {
2018-02-05 18:40:32 +02:00
db . dereference ( hash , child )
}
2018-11-12 18:47:34 +02:00
delete ( db . dirties , child )
db . dirtiesSize -= common . StorageSize ( common . HashLength + int ( node . size ) )
2018-02-05 18:40:32 +02:00
}
}
2018-06-04 10:47:43 +03:00
// Cap iteratively flushes old but still referenced trie nodes until the total
// memory usage goes below the given threshold.
2019-03-26 15:48:31 +01:00
//
// Note, this method is a non-synchronized mutator. It is unsafe to call this
// concurrently with other mutators.
2018-06-04 10:47:43 +03:00
func ( db * Database ) Cap ( limit common . StorageSize ) error {
// Create a database batch to flush persistent data out. It is important that
// outside code doesn't see an inconsistent state (referenced data removed from
// memory cache during commit but not yet in persistent storage). This is ensured
// by only uncaching existing data when the database write finalizes.
2018-11-12 18:47:34 +02:00
nodes , storage , start := len ( db . dirties ) , db . dirtiesSize , time . Now ( )
2018-06-04 10:47:43 +03:00
batch := db . diskdb . NewBatch ( )
2018-11-12 18:47:34 +02:00
// db.dirtiesSize only contains the useful data in the cache, but when reporting
2018-06-04 10:47:43 +03:00
// the total memory consumption, the maintenance metadata is also needed to be
// counted. For every useful node, we track 2 extra hashes as the flushlist.
2018-11-12 18:47:34 +02:00
size := db . dirtiesSize + common . StorageSize ( ( len ( db . dirties ) - 1 ) * 2 * common . HashLength )
2018-06-04 10:47:43 +03:00
// If the preimage cache got large enough, push to disk. If it's still small
// leave for later to deduplicate writes.
flushPreimages := db . preimagesSize > 4 * 1024 * 1024
if flushPreimages {
for hash , preimage := range db . preimages {
if err := batch . Put ( db . secureKey ( hash [ : ] ) , preimage ) ; err != nil {
log . Error ( "Failed to commit preimage from trie database" , "err" , err )
return err
}
if batch . ValueSize ( ) > ethdb . IdealBatchSize {
if err := batch . Write ( ) ; err != nil {
return err
}
batch . Reset ( )
}
}
}
// Keep committing nodes from the flush-list until we're below allowance
oldest := db . oldest
for size > limit && oldest != ( common . Hash { } ) {
// Fetch the oldest referenced node and push into the batch
2018-11-12 18:47:34 +02:00
node := db . dirties [ oldest ]
2018-06-21 12:28:05 +03:00
if err := batch . Put ( oldest [ : ] , node . rlp ( ) ) ; err != nil {
2018-06-04 10:47:43 +03:00
return err
}
// If we exceeded the ideal batch size, commit and reset
if batch . ValueSize ( ) >= ethdb . IdealBatchSize {
if err := batch . Write ( ) ; err != nil {
log . Error ( "Failed to write flush list to disk" , "err" , err )
return err
}
batch . Reset ( )
}
// Iterate to the next flush item, or abort if the size cap was achieved. Size
// is the total size, including both the useful cached data (hash -> blob), as
// well as the flushlist metadata (2*hash). When flushing items from the cache,
// we need to reduce both.
2018-06-21 12:28:05 +03:00
size -= common . StorageSize ( 3 * common . HashLength + int ( node . size ) )
2018-06-04 10:47:43 +03:00
oldest = node . flushNext
}
// Flush out any remainder data from the last batch
if err := batch . Write ( ) ; err != nil {
log . Error ( "Failed to write flush list to disk" , "err" , err )
return err
}
// Write successful, clear out the flushed data
db . lock . Lock ( )
defer db . lock . Unlock ( )
if flushPreimages {
db . preimages = make ( map [ common . Hash ] [ ] byte )
db . preimagesSize = 0
}
for db . oldest != oldest {
2018-11-12 18:47:34 +02:00
node := db . dirties [ db . oldest ]
delete ( db . dirties , db . oldest )
2018-06-04 10:47:43 +03:00
db . oldest = node . flushNext
2018-11-12 18:47:34 +02:00
db . dirtiesSize -= common . StorageSize ( common . HashLength + int ( node . size ) )
2018-06-04 10:47:43 +03:00
}
if db . oldest != ( common . Hash { } ) {
2018-11-12 18:47:34 +02:00
db . dirties [ db . oldest ] . flushPrev = common . Hash { }
2018-06-04 10:47:43 +03:00
}
2018-11-12 18:47:34 +02:00
db . flushnodes += uint64 ( nodes - len ( db . dirties ) )
db . flushsize += storage - db . dirtiesSize
2018-06-04 10:47:43 +03:00
db . flushtime += time . Since ( start )
memcacheFlushTimeTimer . Update ( time . Since ( start ) )
2018-11-12 18:47:34 +02:00
memcacheFlushSizeMeter . Mark ( int64 ( storage - db . dirtiesSize ) )
memcacheFlushNodesMeter . Mark ( int64 ( nodes - len ( db . dirties ) ) )
2018-06-04 10:47:43 +03:00
2018-11-12 18:47:34 +02:00
log . Debug ( "Persisted nodes from memory database" , "nodes" , nodes - len ( db . dirties ) , "size" , storage - db . dirtiesSize , "time" , time . Since ( start ) ,
"flushnodes" , db . flushnodes , "flushsize" , db . flushsize , "flushtime" , db . flushtime , "livenodes" , len ( db . dirties ) , "livesize" , db . dirtiesSize )
2018-06-04 10:47:43 +03:00
return nil
}
2018-02-05 18:40:32 +02:00
// Commit iterates over all the children of a particular node, writes them out
2019-03-26 15:48:31 +01:00
// to disk, forcefully tearing down all references in both directions. As a side
// effect, all pre-images accumulated up to this point are also written.
2018-02-05 18:40:32 +02:00
//
2019-03-26 15:48:31 +01:00
// Note, this method is a non-synchronized mutator. It is unsafe to call this
// concurrently with other mutators.
2018-02-05 18:40:32 +02:00
func ( db * Database ) Commit ( node common . Hash , report bool ) error {
// Create a database batch to flush persistent data out. It is important that
// outside code doesn't see an inconsistent state (referenced data removed from
// memory cache during commit but not yet in persistent storage). This is ensured
// by only uncaching existing data when the database write finalizes.
start := time . Now ( )
batch := db . diskdb . NewBatch ( )
// Move all of the accumulated preimages into a write batch
for hash , preimage := range db . preimages {
if err := batch . Put ( db . secureKey ( hash [ : ] ) , preimage ) ; err != nil {
log . Error ( "Failed to commit preimage from trie database" , "err" , err )
return err
}
2019-03-26 15:48:31 +01:00
// If the batch is too large, flush to disk
2018-02-05 18:40:32 +02:00
if batch . ValueSize ( ) > ethdb . IdealBatchSize {
if err := batch . Write ( ) ; err != nil {
return err
}
batch . Reset ( )
}
}
2019-03-26 15:48:31 +01:00
// Since we're going to replay trie node writes into the clean cache, flush out
// any batched pre-images before continuing.
if err := batch . Write ( ) ; err != nil {
return err
}
batch . Reset ( )
2018-02-05 18:40:32 +02:00
// Move the trie itself into the batch, flushing if enough data is accumulated
2018-11-12 18:47:34 +02:00
nodes , storage := len ( db . dirties ) , db . dirtiesSize
2019-03-26 15:48:31 +01:00
uncacher := & cleaner { db }
if err := db . commit ( node , batch , uncacher ) ; err != nil {
2018-02-05 18:40:32 +02:00
log . Error ( "Failed to commit trie from trie database" , "err" , err )
return err
}
2019-03-26 15:48:31 +01:00
// Trie mostly committed to disk, flush any batch leftovers
2018-02-05 18:40:32 +02:00
if err := batch . Write ( ) ; err != nil {
log . Error ( "Failed to write trie to disk" , "err" , err )
return err
}
2019-03-26 15:48:31 +01:00
// Uncache any leftovers in the last batch
2018-02-05 18:40:32 +02:00
db . lock . Lock ( )
defer db . lock . Unlock ( )
2019-03-26 15:48:31 +01:00
batch . Replay ( uncacher )
batch . Reset ( )
// Reset the storage counters and bumpd metrics
2018-02-05 18:40:32 +02:00
db . preimages = make ( map [ common . Hash ] [ ] byte )
db . preimagesSize = 0
2018-06-04 10:47:43 +03:00
memcacheCommitTimeTimer . Update ( time . Since ( start ) )
2018-11-12 18:47:34 +02:00
memcacheCommitSizeMeter . Mark ( int64 ( storage - db . dirtiesSize ) )
memcacheCommitNodesMeter . Mark ( int64 ( nodes - len ( db . dirties ) ) )
2018-06-04 10:47:43 +03:00
2018-02-05 18:40:32 +02:00
logger := log . Info
if ! report {
logger = log . Debug
}
2018-11-12 18:47:34 +02:00
logger ( "Persisted trie from memory database" , "nodes" , nodes - len ( db . dirties ) + int ( db . flushnodes ) , "size" , storage - db . dirtiesSize + db . flushsize , "time" , time . Since ( start ) + db . flushtime ,
"gcnodes" , db . gcnodes , "gcsize" , db . gcsize , "gctime" , db . gctime , "livenodes" , len ( db . dirties ) , "livesize" , db . dirtiesSize )
2018-02-05 18:40:32 +02:00
// Reset the garbage collection statistics
db . gcnodes , db . gcsize , db . gctime = 0 , 0 , 0
2018-06-04 10:47:43 +03:00
db . flushnodes , db . flushsize , db . flushtime = 0 , 0 , 0
2018-02-05 18:40:32 +02:00
return nil
}
// commit is the private locked version of Commit.
2019-03-26 15:48:31 +01:00
func ( db * Database ) commit ( hash common . Hash , batch ethdb . Batch , uncacher * cleaner ) error {
2018-02-05 18:40:32 +02:00
// If the node does not exist, it's a previously committed node
2018-11-12 18:47:34 +02:00
node , ok := db . dirties [ hash ]
2018-02-05 18:40:32 +02:00
if ! ok {
return nil
}
2018-06-21 12:28:05 +03:00
for _ , child := range node . childs ( ) {
2019-03-26 15:48:31 +01:00
if err := db . commit ( child , batch , uncacher ) ; err != nil {
2018-02-05 18:40:32 +02:00
return err
}
}
2018-06-21 12:28:05 +03:00
if err := batch . Put ( hash [ : ] , node . rlp ( ) ) ; err != nil {
2018-02-05 18:40:32 +02:00
return err
}
2018-06-04 10:47:43 +03:00
// If we've reached an optimal batch size, commit and start over
2018-02-05 18:40:32 +02:00
if batch . ValueSize ( ) >= ethdb . IdealBatchSize {
if err := batch . Write ( ) ; err != nil {
return err
}
2019-03-26 15:48:31 +01:00
db . lock . Lock ( )
batch . Replay ( uncacher )
2018-02-05 18:40:32 +02:00
batch . Reset ( )
2019-03-26 15:48:31 +01:00
db . lock . Unlock ( )
2018-02-05 18:40:32 +02:00
}
return nil
}
2019-03-26 15:48:31 +01:00
// cleaner is a database batch replayer that takes a batch of write operations
// and cleans up the trie database from anything written to disk.
type cleaner struct {
db * Database
}
// Put reacts to database writes and implements dirty data uncaching. This is the
// post-processing step of a commit operation where the already persisted trie is
// removed from the dirty cache and moved into the clean cache. The reason behind
// the two-phase commit is to ensure ensure data availability while moving from
// memory to disk.
func ( c * cleaner ) Put ( key [ ] byte , rlp [ ] byte ) error {
hash := common . BytesToHash ( key )
2018-02-05 18:40:32 +02:00
// If the node does not exist, we're done on this path
2019-03-26 15:48:31 +01:00
node , ok := c . db . dirties [ hash ]
2018-02-05 18:40:32 +02:00
if ! ok {
2019-03-26 15:48:31 +01:00
return nil
2018-02-05 18:40:32 +02:00
}
2018-06-04 10:47:43 +03:00
// Node still exists, remove it from the flush-list
2018-07-30 16:31:17 +03:00
switch hash {
2019-03-26 15:48:31 +01:00
case c . db . oldest :
c . db . oldest = node . flushNext
c . db . dirties [ node . flushNext ] . flushPrev = common . Hash { }
case c . db . newest :
c . db . newest = node . flushPrev
c . db . dirties [ node . flushPrev ] . flushNext = common . Hash { }
2018-07-30 16:31:17 +03:00
default :
2019-03-26 15:48:31 +01:00
c . db . dirties [ node . flushPrev ] . flushNext = node . flushNext
c . db . dirties [ node . flushNext ] . flushPrev = node . flushPrev
2018-06-04 10:47:43 +03:00
}
2019-03-26 15:48:31 +01:00
// Remove the node from the dirty cache
delete ( c . db . dirties , hash )
c . db . dirtiesSize -= common . StorageSize ( common . HashLength + int ( node . size ) )
// Move the flushed node into the clean cache to prevent insta-reloads
if c . db . cleans != nil {
c . db . cleans . Set ( string ( hash [ : ] ) , rlp )
2018-02-05 18:40:32 +02:00
}
2019-03-26 15:48:31 +01:00
return nil
}
func ( c * cleaner ) Delete ( key [ ] byte ) error {
panic ( "Not implemented" )
2018-02-05 18:40:32 +02:00
}
// Size returns the current storage size of the memory cache in front of the
// persistent database layer.
2018-06-04 10:47:43 +03:00
func ( db * Database ) Size ( ) ( common . StorageSize , common . StorageSize ) {
2018-02-05 18:40:32 +02:00
db . lock . RLock ( )
defer db . lock . RUnlock ( )
2018-11-12 18:47:34 +02:00
// db.dirtiesSize only contains the useful data in the cache, but when reporting
2018-06-04 10:47:43 +03:00
// the total memory consumption, the maintenance metadata is also needed to be
// counted. For every useful node, we track 2 extra hashes as the flushlist.
2018-11-12 18:47:34 +02:00
var flushlistSize = common . StorageSize ( ( len ( db . dirties ) - 1 ) * 2 * common . HashLength )
return db . dirtiesSize + flushlistSize , db . preimagesSize
2018-02-05 18:40:32 +02:00
}
2018-07-02 12:19:41 +03:00
// verifyIntegrity is a debug method to iterate over the entire trie stored in
// memory and check whether every node is reachable from the meta root. The goal
// is to find any errors that might cause memory leaks and or trie nodes to go
// missing.
//
// This method is extremely CPU and memory intensive, only use when must.
func ( db * Database ) verifyIntegrity ( ) {
// Iterate over all the cached nodes and accumulate them into a set
reachable := map [ common . Hash ] struct { } { { } : { } }
2018-11-12 18:47:34 +02:00
for child := range db . dirties [ common . Hash { } ] . children {
2018-07-02 12:19:41 +03:00
db . accumulate ( child , reachable )
}
// Find any unreachable but cached nodes
2019-02-19 05:50:11 -08:00
var unreachable [ ] string
2018-11-12 18:47:34 +02:00
for hash , node := range db . dirties {
2018-07-02 12:19:41 +03:00
if _ , ok := reachable [ hash ] ; ! ok {
unreachable = append ( unreachable , fmt . Sprintf ( "%x: {Node: %v, Parents: %d, Prev: %x, Next: %x}" ,
hash , node . node , node . parents , node . flushPrev , node . flushNext ) )
}
}
if len ( unreachable ) != 0 {
panic ( fmt . Sprintf ( "trie cache memory leak: %v" , unreachable ) )
}
}
// accumulate iterates over the trie defined by hash and accumulates all the
// cached children found in memory.
func ( db * Database ) accumulate ( hash common . Hash , reachable map [ common . Hash ] struct { } ) {
// Mark the node reachable if present in the memory cache
2018-11-12 18:47:34 +02:00
node , ok := db . dirties [ hash ]
2018-07-02 12:19:41 +03:00
if ! ok {
return
}
reachable [ hash ] = struct { } { }
// Iterate over all the children and accumulate them too
for _ , child := range node . childs ( ) {
db . accumulate ( child , reachable )
}
}