Added node aggregation PoC
This commit is contained in:
parent
b82db5d868
commit
b6ab900dc5
|
@ -11,6 +11,9 @@ class EphemDB():
|
|||
def put(self, k, v):
|
||||
self.kv[k] = v
|
||||
|
||||
def delete(self, k):
|
||||
del self.kv[k]
|
||||
|
||||
KV_TYPE = 0
|
||||
BRANCH_TYPE = 1
|
||||
LEAF_TYPE = 2
|
||||
|
|
|
@ -0,0 +1,117 @@
|
|||
from new_bintrie import b0, b1, KV_TYPE, BRANCH_TYPE, LEAF_TYPE, parse_node, encode_kv_node, encode_branch_node, encode_leaf_node
|
||||
from bin_utils import encode_bin_path, decode_bin_path, common_prefix_length, encode_bin, decode_bin
|
||||
from ethereum.utils import sha3 as _sha3, encode_hex
|
||||
|
||||
sha3_cache = {}
|
||||
|
||||
def sha3(x):
|
||||
if x not in sha3_cache:
|
||||
sha3_cache[x] = _sha3(x)
|
||||
return sha3_cache[x]
|
||||
|
||||
def quick_encode(nodes):
|
||||
o = b''
|
||||
for node in nodes:
|
||||
o += bytes([len(node) // 65536, len(node) // 256, len(node)]) + node
|
||||
return o
|
||||
|
||||
def quick_decode(nodedata):
|
||||
o = []
|
||||
pos = 0
|
||||
while pos < len(nodedata):
|
||||
L = nodedata[pos] * 65536 + nodedata[pos+1] * 256 + nodedata[pos+2]
|
||||
o.append(nodedata[pos+3: pos+3+L])
|
||||
pos += 3+L
|
||||
return o
|
||||
|
||||
|
||||
class WrapperDB():
|
||||
def __init__(self, parent_db):
|
||||
self.parent_db = parent_db
|
||||
self.substores = {}
|
||||
self.node_to_substore = {}
|
||||
self.new_nodes = {}
|
||||
self.parent_db_reads = 0
|
||||
self.parent_db_writes = 0
|
||||
self.printing_mode = False
|
||||
|
||||
# Loads a substore (RLP-encoded list of closeby trie nodes) from the DB
|
||||
def fetch_substore(self, key):
|
||||
substore_values = self.parent_db.get(key)
|
||||
assert substore_values is not None
|
||||
children = quick_decode(substore_values)
|
||||
self.parent_db_reads += 1
|
||||
self.substores[key] = {sha3(n): n for n in children}
|
||||
self.node_to_substore.update({sha3(n): key for n in children})
|
||||
assert key in self.node_to_substore and key in self.substores
|
||||
|
||||
def get(self, k):
|
||||
if k in self.new_nodes:
|
||||
return self.new_nodes[k]
|
||||
if k not in self.node_to_substore:
|
||||
self.fetch_substore(k)
|
||||
o = self.substores[self.node_to_substore[k]][k]
|
||||
assert sha3(o) == k
|
||||
return o
|
||||
|
||||
def put(self, k, v):
|
||||
if k not in self.new_nodes and k not in self.node_to_substore:
|
||||
self.new_nodes[k] = v
|
||||
|
||||
# Given a key, returns a collection of candidate nodes to form
|
||||
# a substore, as well as the children of that substore
|
||||
def get_substore_candidate_and_children(self, key, depth=5):
|
||||
if depth == 0:
|
||||
return [], [key]
|
||||
elif self.parent_db.get(key) is not None:
|
||||
return [], [key]
|
||||
else:
|
||||
node = self.get(key)
|
||||
L, R, nodetype = parse_node(node)
|
||||
if nodetype == BRANCH_TYPE:
|
||||
Ln, Lc = self.get_substore_candidate_and_children(L, depth-1)
|
||||
Rn, Rc = self.get_substore_candidate_and_children(R, depth-1)
|
||||
return [node] + Ln + Rn, Lc + Rc
|
||||
elif nodetype == KV_TYPE:
|
||||
Rn, Rc = self.get_substore_candidate_and_children(R, depth-1)
|
||||
return [node] + Rn, Rc
|
||||
elif nodetype == LEAF_TYPE:
|
||||
return [node], []
|
||||
|
||||
# Commits to the parent DB
|
||||
def commit(self):
|
||||
processed = {}
|
||||
assert_exists = {}
|
||||
for k, v in self.new_nodes.items():
|
||||
if k in processed:
|
||||
continue
|
||||
nodes, children = self.get_substore_candidate_and_children(k)
|
||||
if not nodes:
|
||||
continue
|
||||
assert k == sha3(nodes[0])
|
||||
for c in children:
|
||||
assert_exists[c] = True
|
||||
if c not in self.substores:
|
||||
self.fetch_substore(c)
|
||||
cvalues = list(self.substores[c].values())
|
||||
if len(quick_encode(cvalues + nodes)) < 3072:
|
||||
del self.substores[c]
|
||||
nodes.extend(cvalues)
|
||||
self.parent_db.put(k, quick_encode(nodes))
|
||||
self.parent_db_writes += 1
|
||||
self.substores[k] = {}
|
||||
for n in nodes:
|
||||
h = sha3(n)
|
||||
self.substores[k][h] = n
|
||||
self.node_to_substore[h] = k
|
||||
processed[h] = k
|
||||
for c in assert_exists:
|
||||
assert self.parent_db.get(c) is not None
|
||||
print('reads', self.parent_db_reads, 'writes', self.parent_db_writes)
|
||||
self.parent_db_reads = self.parent_db_writes = 0
|
||||
self.new_nodes = {}
|
||||
|
||||
def clear_cache(self):
|
||||
assert len(self.new_nodes) == 0
|
||||
self.substores = {}
|
||||
self.node_to_substore = {}
|
|
@ -0,0 +1,52 @@
|
|||
from new_bintrie import Trie, EphemDB, encode_bin, encode_bin_path, decode_bin_path
|
||||
from new_bintrie_aggregate import WrapperDB
|
||||
from ethereum.utils import sha3, encode_hex
|
||||
import random
|
||||
import rlp
|
||||
|
||||
def shuffle_in_place(x):
|
||||
y = x[::]
|
||||
random.shuffle(y)
|
||||
return y
|
||||
|
||||
kvpairs = [(sha3(str(i))[12:], str(i).encode('utf-8') * 5) for i in range(2000)]
|
||||
|
||||
|
||||
for path in ([], [1,0,1], [0,0,1,0], [1,0,0,1,0], [1,0,0,1,0,0,1,0], [1,0] * 8):
|
||||
assert decode_bin_path(encode_bin_path(bytes(path))) == bytes(path)
|
||||
|
||||
r1 = None
|
||||
|
||||
t = Trie(WrapperDB(EphemDB()), b'')
|
||||
for i, (k, v) in enumerate(shuffle_in_place(kvpairs)):
|
||||
#print(t.to_dict())
|
||||
t.update(k, v)
|
||||
assert t.get(k) == v
|
||||
if not i % 50:
|
||||
t.db.commit()
|
||||
assert t.db.parent_db.get(t.root) is not None
|
||||
if not i % 250:
|
||||
t.to_dict()
|
||||
print("Length of branch at %d nodes: %d" % (i, len(rlp.encode(t.get_branch(k)))))
|
||||
assert r1 is None or t.root == r1
|
||||
r1 = t.root
|
||||
t.update(kvpairs[0][0], kvpairs[0][1])
|
||||
assert t.root == r1
|
||||
print(t.get_branch(kvpairs[0][0]))
|
||||
print(t.get_branch(kvpairs[0][0][::-1]))
|
||||
print(encode_hex(t.root))
|
||||
t.db.commit()
|
||||
assert t.db.parent_db.get(t.root) is not None
|
||||
t.db.clear_cache()
|
||||
t.db.printing_mode = True
|
||||
for k, v in shuffle_in_place(kvpairs):
|
||||
t.get(k)
|
||||
t.db.clear_cache()
|
||||
print('Average DB reads: %.3f' % (t.db.parent_db_reads / len(kvpairs)))
|
||||
for k, v in shuffle_in_place(kvpairs):
|
||||
t.update(k, b'')
|
||||
if not random.randrange(100):
|
||||
t.to_dict()
|
||||
t.db.commit()
|
||||
#t.print_nodes()
|
||||
assert t.root == b''
|
Loading…
Reference in New Issue