mirror of
https://github.com/status-im/research.git
synced 2025-01-13 00:24:08 +00:00
Added node aggregation PoC
This commit is contained in:
parent
b82db5d868
commit
b6ab900dc5
@ -11,6 +11,9 @@ class EphemDB():
|
|||||||
def put(self, k, v):
|
def put(self, k, v):
|
||||||
self.kv[k] = v
|
self.kv[k] = v
|
||||||
|
|
||||||
|
def delete(self, k):
|
||||||
|
del self.kv[k]
|
||||||
|
|
||||||
KV_TYPE = 0
|
KV_TYPE = 0
|
||||||
BRANCH_TYPE = 1
|
BRANCH_TYPE = 1
|
||||||
LEAF_TYPE = 2
|
LEAF_TYPE = 2
|
||||||
|
117
trie_research/new_bintrie_aggregate.py
Normal file
117
trie_research/new_bintrie_aggregate.py
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
from new_bintrie import b0, b1, KV_TYPE, BRANCH_TYPE, LEAF_TYPE, parse_node, encode_kv_node, encode_branch_node, encode_leaf_node
|
||||||
|
from bin_utils import encode_bin_path, decode_bin_path, common_prefix_length, encode_bin, decode_bin
|
||||||
|
from ethereum.utils import sha3 as _sha3, encode_hex
|
||||||
|
|
||||||
|
sha3_cache = {}
|
||||||
|
|
||||||
|
def sha3(x):
|
||||||
|
if x not in sha3_cache:
|
||||||
|
sha3_cache[x] = _sha3(x)
|
||||||
|
return sha3_cache[x]
|
||||||
|
|
||||||
|
def quick_encode(nodes):
|
||||||
|
o = b''
|
||||||
|
for node in nodes:
|
||||||
|
o += bytes([len(node) // 65536, len(node) // 256, len(node)]) + node
|
||||||
|
return o
|
||||||
|
|
||||||
|
def quick_decode(nodedata):
|
||||||
|
o = []
|
||||||
|
pos = 0
|
||||||
|
while pos < len(nodedata):
|
||||||
|
L = nodedata[pos] * 65536 + nodedata[pos+1] * 256 + nodedata[pos+2]
|
||||||
|
o.append(nodedata[pos+3: pos+3+L])
|
||||||
|
pos += 3+L
|
||||||
|
return o
|
||||||
|
|
||||||
|
|
||||||
|
class WrapperDB():
|
||||||
|
def __init__(self, parent_db):
|
||||||
|
self.parent_db = parent_db
|
||||||
|
self.substores = {}
|
||||||
|
self.node_to_substore = {}
|
||||||
|
self.new_nodes = {}
|
||||||
|
self.parent_db_reads = 0
|
||||||
|
self.parent_db_writes = 0
|
||||||
|
self.printing_mode = False
|
||||||
|
|
||||||
|
# Loads a substore (RLP-encoded list of closeby trie nodes) from the DB
|
||||||
|
def fetch_substore(self, key):
|
||||||
|
substore_values = self.parent_db.get(key)
|
||||||
|
assert substore_values is not None
|
||||||
|
children = quick_decode(substore_values)
|
||||||
|
self.parent_db_reads += 1
|
||||||
|
self.substores[key] = {sha3(n): n for n in children}
|
||||||
|
self.node_to_substore.update({sha3(n): key for n in children})
|
||||||
|
assert key in self.node_to_substore and key in self.substores
|
||||||
|
|
||||||
|
def get(self, k):
|
||||||
|
if k in self.new_nodes:
|
||||||
|
return self.new_nodes[k]
|
||||||
|
if k not in self.node_to_substore:
|
||||||
|
self.fetch_substore(k)
|
||||||
|
o = self.substores[self.node_to_substore[k]][k]
|
||||||
|
assert sha3(o) == k
|
||||||
|
return o
|
||||||
|
|
||||||
|
def put(self, k, v):
|
||||||
|
if k not in self.new_nodes and k not in self.node_to_substore:
|
||||||
|
self.new_nodes[k] = v
|
||||||
|
|
||||||
|
# Given a key, returns a collection of candidate nodes to form
|
||||||
|
# a substore, as well as the children of that substore
|
||||||
|
def get_substore_candidate_and_children(self, key, depth=5):
|
||||||
|
if depth == 0:
|
||||||
|
return [], [key]
|
||||||
|
elif self.parent_db.get(key) is not None:
|
||||||
|
return [], [key]
|
||||||
|
else:
|
||||||
|
node = self.get(key)
|
||||||
|
L, R, nodetype = parse_node(node)
|
||||||
|
if nodetype == BRANCH_TYPE:
|
||||||
|
Ln, Lc = self.get_substore_candidate_and_children(L, depth-1)
|
||||||
|
Rn, Rc = self.get_substore_candidate_and_children(R, depth-1)
|
||||||
|
return [node] + Ln + Rn, Lc + Rc
|
||||||
|
elif nodetype == KV_TYPE:
|
||||||
|
Rn, Rc = self.get_substore_candidate_and_children(R, depth-1)
|
||||||
|
return [node] + Rn, Rc
|
||||||
|
elif nodetype == LEAF_TYPE:
|
||||||
|
return [node], []
|
||||||
|
|
||||||
|
# Commits to the parent DB
|
||||||
|
def commit(self):
|
||||||
|
processed = {}
|
||||||
|
assert_exists = {}
|
||||||
|
for k, v in self.new_nodes.items():
|
||||||
|
if k in processed:
|
||||||
|
continue
|
||||||
|
nodes, children = self.get_substore_candidate_and_children(k)
|
||||||
|
if not nodes:
|
||||||
|
continue
|
||||||
|
assert k == sha3(nodes[0])
|
||||||
|
for c in children:
|
||||||
|
assert_exists[c] = True
|
||||||
|
if c not in self.substores:
|
||||||
|
self.fetch_substore(c)
|
||||||
|
cvalues = list(self.substores[c].values())
|
||||||
|
if len(quick_encode(cvalues + nodes)) < 3072:
|
||||||
|
del self.substores[c]
|
||||||
|
nodes.extend(cvalues)
|
||||||
|
self.parent_db.put(k, quick_encode(nodes))
|
||||||
|
self.parent_db_writes += 1
|
||||||
|
self.substores[k] = {}
|
||||||
|
for n in nodes:
|
||||||
|
h = sha3(n)
|
||||||
|
self.substores[k][h] = n
|
||||||
|
self.node_to_substore[h] = k
|
||||||
|
processed[h] = k
|
||||||
|
for c in assert_exists:
|
||||||
|
assert self.parent_db.get(c) is not None
|
||||||
|
print('reads', self.parent_db_reads, 'writes', self.parent_db_writes)
|
||||||
|
self.parent_db_reads = self.parent_db_writes = 0
|
||||||
|
self.new_nodes = {}
|
||||||
|
|
||||||
|
def clear_cache(self):
|
||||||
|
assert len(self.new_nodes) == 0
|
||||||
|
self.substores = {}
|
||||||
|
self.node_to_substore = {}
|
52
trie_research/new_bintrie_aggregate_tests.py
Normal file
52
trie_research/new_bintrie_aggregate_tests.py
Normal file
@ -0,0 +1,52 @@
|
|||||||
|
from new_bintrie import Trie, EphemDB, encode_bin, encode_bin_path, decode_bin_path
|
||||||
|
from new_bintrie_aggregate import WrapperDB
|
||||||
|
from ethereum.utils import sha3, encode_hex
|
||||||
|
import random
|
||||||
|
import rlp
|
||||||
|
|
||||||
|
def shuffle_in_place(x):
|
||||||
|
y = x[::]
|
||||||
|
random.shuffle(y)
|
||||||
|
return y
|
||||||
|
|
||||||
|
kvpairs = [(sha3(str(i))[12:], str(i).encode('utf-8') * 5) for i in range(2000)]
|
||||||
|
|
||||||
|
|
||||||
|
for path in ([], [1,0,1], [0,0,1,0], [1,0,0,1,0], [1,0,0,1,0,0,1,0], [1,0] * 8):
|
||||||
|
assert decode_bin_path(encode_bin_path(bytes(path))) == bytes(path)
|
||||||
|
|
||||||
|
r1 = None
|
||||||
|
|
||||||
|
t = Trie(WrapperDB(EphemDB()), b'')
|
||||||
|
for i, (k, v) in enumerate(shuffle_in_place(kvpairs)):
|
||||||
|
#print(t.to_dict())
|
||||||
|
t.update(k, v)
|
||||||
|
assert t.get(k) == v
|
||||||
|
if not i % 50:
|
||||||
|
t.db.commit()
|
||||||
|
assert t.db.parent_db.get(t.root) is not None
|
||||||
|
if not i % 250:
|
||||||
|
t.to_dict()
|
||||||
|
print("Length of branch at %d nodes: %d" % (i, len(rlp.encode(t.get_branch(k)))))
|
||||||
|
assert r1 is None or t.root == r1
|
||||||
|
r1 = t.root
|
||||||
|
t.update(kvpairs[0][0], kvpairs[0][1])
|
||||||
|
assert t.root == r1
|
||||||
|
print(t.get_branch(kvpairs[0][0]))
|
||||||
|
print(t.get_branch(kvpairs[0][0][::-1]))
|
||||||
|
print(encode_hex(t.root))
|
||||||
|
t.db.commit()
|
||||||
|
assert t.db.parent_db.get(t.root) is not None
|
||||||
|
t.db.clear_cache()
|
||||||
|
t.db.printing_mode = True
|
||||||
|
for k, v in shuffle_in_place(kvpairs):
|
||||||
|
t.get(k)
|
||||||
|
t.db.clear_cache()
|
||||||
|
print('Average DB reads: %.3f' % (t.db.parent_db_reads / len(kvpairs)))
|
||||||
|
for k, v in shuffle_in_place(kvpairs):
|
||||||
|
t.update(k, b'')
|
||||||
|
if not random.randrange(100):
|
||||||
|
t.to_dict()
|
||||||
|
t.db.commit()
|
||||||
|
#t.print_nodes()
|
||||||
|
assert t.root == b''
|
Loading…
x
Reference in New Issue
Block a user