Added node aggregation PoC

This commit is contained in:
Vitalik Buterin 2017-08-03 03:20:03 -04:00
parent b82db5d868
commit b6ab900dc5
3 changed files with 172 additions and 0 deletions

View File

@ -11,6 +11,9 @@ class EphemDB():
def put(self, k, v):
self.kv[k] = v
def delete(self, k):
del self.kv[k]
KV_TYPE = 0
BRANCH_TYPE = 1
LEAF_TYPE = 2

View File

@ -0,0 +1,117 @@
from new_bintrie import b0, b1, KV_TYPE, BRANCH_TYPE, LEAF_TYPE, parse_node, encode_kv_node, encode_branch_node, encode_leaf_node
from bin_utils import encode_bin_path, decode_bin_path, common_prefix_length, encode_bin, decode_bin
from ethereum.utils import sha3 as _sha3, encode_hex
sha3_cache = {}
def sha3(x):
if x not in sha3_cache:
sha3_cache[x] = _sha3(x)
return sha3_cache[x]
def quick_encode(nodes):
o = b''
for node in nodes:
o += bytes([len(node) // 65536, len(node) // 256, len(node)]) + node
return o
def quick_decode(nodedata):
o = []
pos = 0
while pos < len(nodedata):
L = nodedata[pos] * 65536 + nodedata[pos+1] * 256 + nodedata[pos+2]
o.append(nodedata[pos+3: pos+3+L])
pos += 3+L
return o
class WrapperDB():
def __init__(self, parent_db):
self.parent_db = parent_db
self.substores = {}
self.node_to_substore = {}
self.new_nodes = {}
self.parent_db_reads = 0
self.parent_db_writes = 0
self.printing_mode = False
# Loads a substore (RLP-encoded list of closeby trie nodes) from the DB
def fetch_substore(self, key):
substore_values = self.parent_db.get(key)
assert substore_values is not None
children = quick_decode(substore_values)
self.parent_db_reads += 1
self.substores[key] = {sha3(n): n for n in children}
self.node_to_substore.update({sha3(n): key for n in children})
assert key in self.node_to_substore and key in self.substores
def get(self, k):
if k in self.new_nodes:
return self.new_nodes[k]
if k not in self.node_to_substore:
self.fetch_substore(k)
o = self.substores[self.node_to_substore[k]][k]
assert sha3(o) == k
return o
def put(self, k, v):
if k not in self.new_nodes and k not in self.node_to_substore:
self.new_nodes[k] = v
# Given a key, returns a collection of candidate nodes to form
# a substore, as well as the children of that substore
def get_substore_candidate_and_children(self, key, depth=5):
if depth == 0:
return [], [key]
elif self.parent_db.get(key) is not None:
return [], [key]
else:
node = self.get(key)
L, R, nodetype = parse_node(node)
if nodetype == BRANCH_TYPE:
Ln, Lc = self.get_substore_candidate_and_children(L, depth-1)
Rn, Rc = self.get_substore_candidate_and_children(R, depth-1)
return [node] + Ln + Rn, Lc + Rc
elif nodetype == KV_TYPE:
Rn, Rc = self.get_substore_candidate_and_children(R, depth-1)
return [node] + Rn, Rc
elif nodetype == LEAF_TYPE:
return [node], []
# Commits to the parent DB
def commit(self):
processed = {}
assert_exists = {}
for k, v in self.new_nodes.items():
if k in processed:
continue
nodes, children = self.get_substore_candidate_and_children(k)
if not nodes:
continue
assert k == sha3(nodes[0])
for c in children:
assert_exists[c] = True
if c not in self.substores:
self.fetch_substore(c)
cvalues = list(self.substores[c].values())
if len(quick_encode(cvalues + nodes)) < 3072:
del self.substores[c]
nodes.extend(cvalues)
self.parent_db.put(k, quick_encode(nodes))
self.parent_db_writes += 1
self.substores[k] = {}
for n in nodes:
h = sha3(n)
self.substores[k][h] = n
self.node_to_substore[h] = k
processed[h] = k
for c in assert_exists:
assert self.parent_db.get(c) is not None
print('reads', self.parent_db_reads, 'writes', self.parent_db_writes)
self.parent_db_reads = self.parent_db_writes = 0
self.new_nodes = {}
def clear_cache(self):
assert len(self.new_nodes) == 0
self.substores = {}
self.node_to_substore = {}

View File

@ -0,0 +1,52 @@
from new_bintrie import Trie, EphemDB, encode_bin, encode_bin_path, decode_bin_path
from new_bintrie_aggregate import WrapperDB
from ethereum.utils import sha3, encode_hex
import random
import rlp
def shuffle_in_place(x):
y = x[::]
random.shuffle(y)
return y
kvpairs = [(sha3(str(i))[12:], str(i).encode('utf-8') * 5) for i in range(2000)]
for path in ([], [1,0,1], [0,0,1,0], [1,0,0,1,0], [1,0,0,1,0,0,1,0], [1,0] * 8):
assert decode_bin_path(encode_bin_path(bytes(path))) == bytes(path)
r1 = None
t = Trie(WrapperDB(EphemDB()), b'')
for i, (k, v) in enumerate(shuffle_in_place(kvpairs)):
#print(t.to_dict())
t.update(k, v)
assert t.get(k) == v
if not i % 50:
t.db.commit()
assert t.db.parent_db.get(t.root) is not None
if not i % 250:
t.to_dict()
print("Length of branch at %d nodes: %d" % (i, len(rlp.encode(t.get_branch(k)))))
assert r1 is None or t.root == r1
r1 = t.root
t.update(kvpairs[0][0], kvpairs[0][1])
assert t.root == r1
print(t.get_branch(kvpairs[0][0]))
print(t.get_branch(kvpairs[0][0][::-1]))
print(encode_hex(t.root))
t.db.commit()
assert t.db.parent_db.get(t.root) is not None
t.db.clear_cache()
t.db.printing_mode = True
for k, v in shuffle_in_place(kvpairs):
t.get(k)
t.db.clear_cache()
print('Average DB reads: %.3f' % (t.db.parent_db_reads / len(kvpairs)))
for k, v in shuffle_in_place(kvpairs):
t.update(k, b'')
if not random.randrange(100):
t.to_dict()
t.db.commit()
#t.print_nodes()
assert t.root == b''