From 53e70d9bdf3b635dcd5ac450834cf558c101ab43 Mon Sep 17 00:00:00 2001 From: Vitalik Buterin Date: Thu, 3 Aug 2017 23:35:22 -0400 Subject: [PATCH] Added py_ssz --- py_ssz/LICENSE | 22 ++++++++++++ py_ssz/README.md | 18 ++++++++++ py_ssz/py_ssz/__init__.py | 1 + py_ssz/py_ssz/py_ssz.py | 47 ++++++++++++++++++++++++ py_ssz/py_ssz/serializers.py | 69 ++++++++++++++++++++++++++++++++++++ py_ssz/py_ssz/utils.py | 9 +++++ py_ssz/requirements.txt | 0 py_ssz/setup.py | 24 +++++++++++++ py_ssz/tests/tests.py | 35 ++++++++++++++++++ 9 files changed, 225 insertions(+) create mode 100644 py_ssz/LICENSE create mode 100644 py_ssz/README.md create mode 100644 py_ssz/py_ssz/__init__.py create mode 100644 py_ssz/py_ssz/py_ssz.py create mode 100644 py_ssz/py_ssz/serializers.py create mode 100644 py_ssz/py_ssz/utils.py create mode 100644 py_ssz/requirements.txt create mode 100644 py_ssz/setup.py create mode 100644 py_ssz/tests/tests.py diff --git a/py_ssz/LICENSE b/py_ssz/LICENSE new file mode 100644 index 0000000..ce48885 --- /dev/null +++ b/py_ssz/LICENSE @@ -0,0 +1,22 @@ + +The MIT License (MIT) + +Copyright (c) 2015 Vitalik Buterin + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. \ No newline at end of file diff --git a/py_ssz/README.md b/py_ssz/README.md new file mode 100644 index 0000000..17b793c --- /dev/null +++ b/py_ssz/README.md @@ -0,0 +1,18 @@ +## Simple serialization - an alternative to RLP that's much simpler. + +### The spec: + + encode(string) = enc3b(len(string)) + string + +Where `enc3b(x)` encodes `x` as three bytes, in big-endian format + + encode(list) = enc3b(8388608 + sum(len([encode(x) for x in list]))) + \ + b''.join([encode(x) for x in list]) + +And that's it! + +Examples: + + cow -> \x00\x00\x03cow + + [dog, horse] -> \x80\x00\x0e\x00\x00\x03dog\x00\x00\x05horse diff --git a/py_ssz/py_ssz/__init__.py b/py_ssz/py_ssz/__init__.py new file mode 100644 index 0000000..496f892 --- /dev/null +++ b/py_ssz/py_ssz/__init__.py @@ -0,0 +1 @@ +from .py_ssz import encode, decode diff --git a/py_ssz/py_ssz/py_ssz.py b/py_ssz/py_ssz/py_ssz.py new file mode 100644 index 0000000..89e6d77 --- /dev/null +++ b/py_ssz/py_ssz/py_ssz.py @@ -0,0 +1,47 @@ +from .utils import int_to_big_endian +from .serializers import Serializable, CountableList + +def add_length_prefix(obj, is_list=False): + assert len(obj) <= 8388607 + return bytes([(len(obj) >> 16) + 128 * is_list, + (len(obj) >> 8) % 256, + len(obj) % 256]) + obj + + +def encode(obj): + if isinstance(obj, bytes): + return add_length_prefix(obj) + elif isinstance(obj, str): + return add_length_prefix(obj.encode('utf-8')) + elif isinstance(obj, int): + assert obj >= 0 + return add_length_prefix(int_to_big_endian(obj)) + elif isinstance(obj, list): + res = b'' + for o in obj: + res += encode(o) + return add_length_prefix(res, True) + elif isinstance(obj, Serializable): + return encode(obj._s(obj)) + +def decode_raw(obj, pos=0): + startpos = pos + L = ((obj[pos] % 128) << 16) + (obj[pos+1] << 8) + obj[pos+2] + if obj[pos] < 128: + assert len(obj) >= pos+3+L + return obj[pos+3: pos+3+L], pos+3+L + else: + pos += 3 + o = [] + while pos < startpos+3+L: + sub, pos = decode_raw(obj, pos) + o.append(sub) + assert pos == startpos+3+L + return o, pos + +def decode(obj, cls=None, *args, **kwargs): + decoded, endpos = decode_raw(obj) + assert endpos == len(obj) + if cls is None: + return decoded + return cls._d(decoded, *args, **kwargs) diff --git a/py_ssz/py_ssz/serializers.py b/py_ssz/py_ssz/serializers.py new file mode 100644 index 0000000..549e3d8 --- /dev/null +++ b/py_ssz/py_ssz/serializers.py @@ -0,0 +1,69 @@ +from .utils import int_to_big_endian, big_endian_to_int + +class Serializable(): + @classmethod + def _s(cls, obj): + o = [] + for field, serializer in obj.__class__.fields: + member = getattr(obj, field) + if isinstance(serializer, Serializable): + assert isinstance(member, serializer) + o.append(serializer._s(member)) + return o + + @classmethod + def _d(cls, data, *args, **kwargs): + obj = cls(*args, **kwargs) + assert len(data) == len(cls.fields) + for datum, (field, serializer) in zip(data, cls.fields): + setattr(obj, field, serializer._d(datum)) + return obj + + +def int_in_range(_min, _max): + class c(): + @classmethod + def _s(cls, x): + assert isinstance(x, int) and x >= _min and x <= _max + return int_to_big_endian(x) + @classmethod + def _d(cls, x): + assert len(x) == 0 or x[0] != 0 + return big_endian_to_int(x) + return c + +big_endian_int = int_in_range(0, 2**256-1) +int256 = int_in_range(0, 2**2048-1) + +def bytesn(n): + class c(): + @classmethod + def _s(cls, x): + assert isinstance(x, bytes) and len(x) == n + return x + @classmethod + def _d(cls, x): + return x + return c + +hash32 = bytesn(32) + +class binary(): + @classmethod + def _s(cls, x): + assert isinstance(x, bytes) + return x + @classmethod + def _d(cls, x): + return x + +def CountableList(ser): + class c(): + @classmethod + def _s(cls, vals): + assert isinstance(vals, list) + return [ser._s(v) for v in vals] + @classmethod + def _d(cls, vals): + return [ser._d(v) for v in vals] + return c diff --git a/py_ssz/py_ssz/utils.py b/py_ssz/py_ssz/utils.py new file mode 100644 index 0000000..ffb8a23 --- /dev/null +++ b/py_ssz/py_ssz/utils.py @@ -0,0 +1,9 @@ +from math import ceil + +def int_to_big_endian(value): + byte_length = max(ceil(value.bit_length() / 8), 1) + return (value).to_bytes(byte_length, byteorder='big') + + +def big_endian_to_int(value): + return int.from_bytes(value, byteorder='big') diff --git a/py_ssz/requirements.txt b/py_ssz/requirements.txt new file mode 100644 index 0000000..e69de29 diff --git a/py_ssz/setup.py b/py_ssz/setup.py new file mode 100644 index 0000000..196f115 --- /dev/null +++ b/py_ssz/setup.py @@ -0,0 +1,24 @@ +# -*- coding: utf-8 -*- + +from setuptools import setup, find_packages + + +with open('README.md') as f: + readme = f.read() + +with open('LICENSE') as f: + license = f.read() + +setup( + name='py_ssz', + version='1.0.0', + description='Python implementation of SimpleSerialize', + long_description=readme, + author='Vitalik Buterin', + author_email='', + url='https://github.com/ethereum/research', + license=license, + packages=find_packages(exclude=('tests', 'docs')), + install_requires=[ + ], +) diff --git a/py_ssz/tests/tests.py b/py_ssz/tests/tests.py new file mode 100644 index 0000000..05e5404 --- /dev/null +++ b/py_ssz/tests/tests.py @@ -0,0 +1,35 @@ +from py_ssz import encode, decode +from py_ssz.serializers import big_endian_int, binary, hash32, CountableList, Serializable +from py_ssz.utils import int_to_big_endian + +assert decode(encode(b'cow')) == b'cow' +assert decode(encode(123)) == int_to_big_endian(123) +assert decode(encode([b'cow', b'horse', b'pig'])) == [b'cow', b'horse', b'pig'] +assert decode(encode([b'cow', b'horse', [b'o', b'q']])) == [b'cow', b'horse', [b'o', b'q']] +assert decode(encode([[], [[]], [[], [[]]]])) == [[], [[]], [[], [[]]]] + +class Foo(Serializable): + fields = [ + ('bar', big_endian_int), + ('baz', binary) + ] + def __init__(self, bar=0, baz=b''): + self.bar = bar + self.baz = baz + +assert encode(Foo(3, b'cow')) == encode([3, b'cow']) +foo2 = decode(encode(Foo(3, b'cow')), Foo) +assert foo2.bar == 3 and foo2.baz == b'cow' + +class Foo2(Serializable): + fields = [ + ('bat', Foo), + ('bau', CountableList(big_endian_int)), + ('bav', CountableList(Foo)) + ] + +x = Foo2() +x.bat = Foo(3, b'cow') +x.bau = [4,5,6,7,8] +x.bav = [Foo(5, b'horse'), Foo(7, b'mongoose')] +assert encode(x) == encode([[3, b'cow'], [4,5,6,7,8], [[5, b'horse'], [7, b'mongoose']]])