introduce alternate spec parser with spec comments
This commit is contained in:
parent
16ac948d90
commit
de3ac15d9c
195
setup.py
195
setup.py
|
@ -1,4 +1,3 @@
|
||||||
from enum import Enum, auto
|
|
||||||
from setuptools import setup, find_packages, Command
|
from setuptools import setup, find_packages, Command
|
||||||
from setuptools.command.build_py import build_py
|
from setuptools.command.build_py import build_py
|
||||||
from distutils import dir_util
|
from distutils import dir_util
|
||||||
|
@ -6,11 +5,24 @@ from distutils.util import convert_path
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
import string
|
import string
|
||||||
from typing import Dict, NamedTuple, List, Sequence
|
from typing import Dict, NamedTuple, List, Sequence, Optional
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
import ast
|
||||||
|
|
||||||
|
|
||||||
FUNCTION_REGEX = r'^def [\w_]*'
|
# NOTE: have to programmatically include third-party dependencies in `setup.py`.
|
||||||
|
MARKO_VERSION = "marko==1.0.2"
|
||||||
|
try:
|
||||||
|
import marko
|
||||||
|
except ImportError:
|
||||||
|
import pip
|
||||||
|
pip.main(["install", MARKO_VERSION])
|
||||||
|
|
||||||
|
from marko.block import Heading, FencedCode, LinkRefDef, BlankLine
|
||||||
|
from marko.inline import CodeSpan
|
||||||
|
from marko.ext.gfm import gfm
|
||||||
|
from marko.ext.gfm.elements import Table, Paragraph
|
||||||
|
|
||||||
|
|
||||||
# Definitions in context.py
|
# Definitions in context.py
|
||||||
PHASE0 = 'phase0'
|
PHASE0 = 'phase0'
|
||||||
|
@ -45,83 +57,118 @@ class SpecObject(NamedTuple):
|
||||||
dataclasses: Dict[str, str]
|
dataclasses: Dict[str, str]
|
||||||
|
|
||||||
|
|
||||||
class CodeBlockType(Enum):
|
def _get_name_from_heading(heading: Heading) -> Optional[str]:
|
||||||
SSZ = auto()
|
last_child = heading.children[-1]
|
||||||
DATACLASS = auto()
|
if isinstance(last_child, CodeSpan):
|
||||||
FUNCTION = auto()
|
return last_child.children
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _get_source_from_code_block(block: FencedCode) -> str:
|
||||||
|
return block.children[0].children.strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _get_function_name_from_source(source: str) -> str:
|
||||||
|
fn = ast.parse(source).body[0]
|
||||||
|
return fn.name
|
||||||
|
|
||||||
|
|
||||||
|
def _get_class_info_from_source(source: str) -> (str, Optional[str]):
|
||||||
|
class_def = ast.parse(source).body[0]
|
||||||
|
base = class_def.bases[0]
|
||||||
|
if isinstance(base, ast.Name):
|
||||||
|
parent_class = base.id
|
||||||
|
else:
|
||||||
|
# NOTE: SSZ definition derives from earlier phase...
|
||||||
|
# e.g. `phase0.SignedBeaconBlock`
|
||||||
|
# TODO: check for consistency with other phases
|
||||||
|
parent_class = None
|
||||||
|
return class_def.name, parent_class
|
||||||
|
|
||||||
|
|
||||||
|
def _is_constant_id(name: str) -> bool:
|
||||||
|
if name[0] not in string.ascii_uppercase + '_':
|
||||||
|
return False
|
||||||
|
return all(map(lambda c: c in string.ascii_uppercase + '_' + string.digits, name[1:]))
|
||||||
|
|
||||||
|
|
||||||
|
ETH2_SPEC_COMMENT_PREFIX = "eth2spec:"
|
||||||
|
|
||||||
|
|
||||||
|
def _get_eth2_spec_comment(child: LinkRefDef) -> Optional[str]:
|
||||||
|
_, _, title = child._parse_info
|
||||||
|
if not (title[0] == "(" and title[len(title)-1] == ")"):
|
||||||
|
return None
|
||||||
|
title = title[1:len(title)-1]
|
||||||
|
if not title.startswith(ETH2_SPEC_COMMENT_PREFIX):
|
||||||
|
return None
|
||||||
|
return title[len(ETH2_SPEC_COMMENT_PREFIX):].strip()
|
||||||
|
|
||||||
|
|
||||||
def get_spec(file_name: str) -> SpecObject:
|
def get_spec(file_name: str) -> SpecObject:
|
||||||
"""
|
|
||||||
Takes in the file name of a spec.md file, opens it and returns a parsed spec object.
|
|
||||||
|
|
||||||
Note: This function makes heavy use of the inherent ordering of dicts,
|
|
||||||
if this is not supported by your python version, it will not work.
|
|
||||||
"""
|
|
||||||
pulling_from = None # line number of start of latest object
|
|
||||||
current_name = None # most recent section title
|
|
||||||
functions: Dict[str, str] = {}
|
functions: Dict[str, str] = {}
|
||||||
constants: Dict[str, str] = {}
|
constants: Dict[str, str] = {}
|
||||||
ssz_dep_constants: Dict[str, str] = {}
|
ssz_dep_constants: Dict[str, str] = {}
|
||||||
ssz_objects: Dict[str, str] = {}
|
ssz_objects: Dict[str, str] = {}
|
||||||
dataclasses: Dict[str, str] = {}
|
dataclasses: Dict[str, str] = {}
|
||||||
function_matcher = re.compile(FUNCTION_REGEX)
|
|
||||||
block_type = CodeBlockType.FUNCTION
|
|
||||||
custom_types: Dict[str, str] = {}
|
custom_types: Dict[str, str] = {}
|
||||||
for linenum, line in enumerate(open(file_name).readlines()):
|
|
||||||
line = line.rstrip()
|
|
||||||
if pulling_from is None and len(line) > 0 and line[0] == '#' and line[-1] == '`':
|
|
||||||
current_name = line[line[:-1].rfind('`') + 1: -1]
|
|
||||||
if line[:9] == '```python':
|
|
||||||
assert pulling_from is None
|
|
||||||
pulling_from = linenum + 1
|
|
||||||
elif line[:3] == '```':
|
|
||||||
pulling_from = None
|
|
||||||
else:
|
|
||||||
# Handle function definitions & ssz_objects
|
|
||||||
if pulling_from is not None:
|
|
||||||
if len(line) > 18 and line[:6] == 'class ' and (line[-12:] == '(Container):' or '(phase' in line):
|
|
||||||
end = -12 if line[-12:] == '(Container):' else line.find('(')
|
|
||||||
name = line[6:end]
|
|
||||||
# Check consistency with markdown header
|
|
||||||
assert name == current_name
|
|
||||||
block_type = CodeBlockType.SSZ
|
|
||||||
elif line[:10] == '@dataclass':
|
|
||||||
block_type = CodeBlockType.DATACLASS
|
|
||||||
elif function_matcher.match(line) is not None:
|
|
||||||
current_name = function_matcher.match(line).group(0)
|
|
||||||
block_type = CodeBlockType.FUNCTION
|
|
||||||
|
|
||||||
if block_type == CodeBlockType.SSZ:
|
with open(file_name) as source_file:
|
||||||
ssz_objects[current_name] = ssz_objects.get(current_name, '') + line + '\n'
|
document = gfm.parse(source_file.read())
|
||||||
elif block_type == CodeBlockType.DATACLASS:
|
|
||||||
dataclasses[current_name] = dataclasses.get(current_name, '') + line + '\n'
|
|
||||||
elif block_type == CodeBlockType.FUNCTION:
|
|
||||||
functions[current_name] = functions.get(current_name, '') + line + '\n'
|
|
||||||
else:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Handle constant and custom types table entries
|
current_name = None
|
||||||
elif pulling_from is None and len(line) > 0 and line[0] == '|':
|
should_skip = False
|
||||||
row = line[1:].split('|')
|
for child in document.children:
|
||||||
if len(row) >= 2:
|
if isinstance(child, BlankLine):
|
||||||
for i in range(2):
|
continue
|
||||||
row[i] = row[i].strip().strip('`')
|
if should_skip:
|
||||||
if '`' in row[i]:
|
should_skip = False
|
||||||
row[i] = row[i][:row[i].find('`')]
|
continue
|
||||||
is_constant_def = True
|
if isinstance(child, Heading):
|
||||||
if row[0][0] not in string.ascii_uppercase + '_':
|
current_name = _get_name_from_heading(child)
|
||||||
is_constant_def = False
|
elif isinstance(child, FencedCode):
|
||||||
for c in row[0]:
|
if child.lang != "python":
|
||||||
if c not in string.ascii_uppercase + '_' + string.digits:
|
continue
|
||||||
is_constant_def = False
|
source = _get_source_from_code_block(child)
|
||||||
if is_constant_def:
|
if source.startswith("def"):
|
||||||
if row[1].startswith('get_generalized_index'):
|
current_name = _get_function_name_from_source(source)
|
||||||
ssz_dep_constants[row[0]] = row[1]
|
functions[current_name] = "\n".join(line.rstrip() for line in source.splitlines())
|
||||||
|
elif source.startswith("@dataclass"):
|
||||||
|
dataclasses[current_name] = "\n".join(line.rstrip() for line in source.splitlines())
|
||||||
|
elif source.startswith("class"):
|
||||||
|
class_name, parent_class = _get_class_info_from_source(source)
|
||||||
|
# check consistency with spec
|
||||||
|
assert class_name == current_name
|
||||||
|
if parent_class:
|
||||||
|
assert parent_class == "Container"
|
||||||
|
# NOTE: trim whitespace from spec
|
||||||
|
ssz_objects[current_name] = "\n".join(line.rstrip() for line in source.splitlines())
|
||||||
|
else:
|
||||||
|
raise Exception("unrecognized python code element")
|
||||||
|
elif isinstance(child, Table):
|
||||||
|
for row in child.children:
|
||||||
|
cells = row.children
|
||||||
|
if len(cells) >= 2:
|
||||||
|
name_cell = cells[0]
|
||||||
|
name = name_cell.children[0].children
|
||||||
|
value_cell = cells[1]
|
||||||
|
value = value_cell.children[0].children
|
||||||
|
if isinstance(value, list):
|
||||||
|
# marko parses `**X**` as a list containing a X
|
||||||
|
value = value[0].children
|
||||||
|
if _is_constant_id(name):
|
||||||
|
if value.startswith("get_generalized_index"):
|
||||||
|
ssz_dep_constants[name] = value
|
||||||
else:
|
else:
|
||||||
constants[row[0]] = row[1].replace('**TBD**', '2**32')
|
constants[name] = value.replace("TBD", "2**32")
|
||||||
elif row[1].startswith('uint') or row[1].startswith('Bytes') or row[1].startswith('ByteList'):
|
elif value.startswith("uint") or value.startswith("Bytes") or value.startswith("ByteList"):
|
||||||
custom_types[row[0]] = row[1]
|
custom_types[name] = value
|
||||||
|
elif isinstance(child, LinkRefDef):
|
||||||
|
comment = _get_eth2_spec_comment(child)
|
||||||
|
if comment:
|
||||||
|
if comment == "skip":
|
||||||
|
should_skip = True
|
||||||
|
|
||||||
return SpecObject(
|
return SpecObject(
|
||||||
functions=functions,
|
functions=functions,
|
||||||
custom_types=custom_types,
|
custom_types=custom_types,
|
||||||
|
@ -424,7 +471,7 @@ def produce_execution_payload(parent_hash: Hash32, timestamp: uint64) -> Executi
|
||||||
|
|
||||||
spec_builders = {
|
spec_builders = {
|
||||||
builder.fork: builder
|
builder.fork: builder
|
||||||
for builder in (Phase0SpecBuilder, AltairSpecBuilder, MergeSpecBuilder)
|
for builder in (AltairSpecBuilder, )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -452,12 +499,12 @@ def objects_to_spec(spec_object: SpecObject, builder: SpecBuilder, ordered_class
|
||||||
for k in list(spec_object.functions):
|
for k in list(spec_object.functions):
|
||||||
if "ceillog2" in k or "floorlog2" in k:
|
if "ceillog2" in k or "floorlog2" in k:
|
||||||
del spec_object.functions[k]
|
del spec_object.functions[k]
|
||||||
functions_spec = '\n\n'.join(spec_object.functions.values())
|
functions_spec = '\n\n\n'.join(spec_object.functions.values())
|
||||||
for k in list(spec_object.constants.keys()):
|
for k in list(spec_object.constants.keys()):
|
||||||
if k == "BLS12_381_Q":
|
if k == "BLS12_381_Q":
|
||||||
spec_object.constants[k] += " # noqa: E501"
|
spec_object.constants[k] += " # noqa: E501"
|
||||||
constants_spec = '\n'.join(map(lambda x: '%s = %s' % (x, spec_object.constants[x]), spec_object.constants))
|
constants_spec = '\n'.join(map(lambda x: '%s = %s' % (x, spec_object.constants[x]), spec_object.constants))
|
||||||
ordered_class_objects_spec = '\n\n'.join(ordered_class_objects.values())
|
ordered_class_objects_spec = '\n\n\n'.join(ordered_class_objects.values())
|
||||||
ssz_dep_constants = '\n'.join(map(lambda x: '%s = %s' % (x, builder.hardcoded_ssz_dep_constants()[x]), builder.hardcoded_ssz_dep_constants()))
|
ssz_dep_constants = '\n'.join(map(lambda x: '%s = %s' % (x, builder.hardcoded_ssz_dep_constants()[x]), builder.hardcoded_ssz_dep_constants()))
|
||||||
ssz_dep_constants_verification = '\n'.join(map(lambda x: 'assert %s == %s' % (x, spec_object.ssz_dep_constants[x]), builder.hardcoded_ssz_dep_constants()))
|
ssz_dep_constants_verification = '\n'.join(map(lambda x: 'assert %s == %s' % (x, spec_object.ssz_dep_constants[x]), builder.hardcoded_ssz_dep_constants()))
|
||||||
custom_type_dep_constants = '\n'.join(map(lambda x: '%s = %s' % (x, builder.hardcoded_custom_type_dep_constants()[x]), builder.hardcoded_custom_type_dep_constants()))
|
custom_type_dep_constants = '\n'.join(map(lambda x: '%s = %s' % (x, builder.hardcoded_custom_type_dep_constants()[x]), builder.hardcoded_custom_type_dep_constants()))
|
||||||
|
@ -474,8 +521,8 @@ def objects_to_spec(spec_object: SpecObject, builder: SpecBuilder, ordered_class
|
||||||
+ '\n\n' + constants_spec
|
+ '\n\n' + constants_spec
|
||||||
+ '\n\n' + CONFIG_LOADER
|
+ '\n\n' + CONFIG_LOADER
|
||||||
+ '\n\n' + ordered_class_objects_spec
|
+ '\n\n' + ordered_class_objects_spec
|
||||||
+ '\n\n' + functions_spec
|
+ '\n\n\n' + functions_spec
|
||||||
+ '\n' + builder.sundry_functions()
|
+ '\n\n' + builder.sundry_functions()
|
||||||
# Since some constants are hardcoded in setup.py, the following assertions verify that the hardcoded constants are
|
# Since some constants are hardcoded in setup.py, the following assertions verify that the hardcoded constants are
|
||||||
# as same as the spec definition.
|
# as same as the spec definition.
|
||||||
+ ('\n\n\n' + ssz_dep_constants_verification if ssz_dep_constants_verification != '' else '')
|
+ ('\n\n\n' + ssz_dep_constants_verification if ssz_dep_constants_verification != '' else '')
|
||||||
|
@ -619,6 +666,7 @@ class PySpecCommand(Command):
|
||||||
specs/altair/beacon-chain.md
|
specs/altair/beacon-chain.md
|
||||||
specs/altair/fork.md
|
specs/altair/fork.md
|
||||||
specs/altair/validator.md
|
specs/altair/validator.md
|
||||||
|
specs/altair/p2p-interface.md
|
||||||
specs/altair/sync-protocol.md
|
specs/altair/sync-protocol.md
|
||||||
"""
|
"""
|
||||||
elif self.spec_fork == MERGE:
|
elif self.spec_fork == MERGE:
|
||||||
|
@ -756,5 +804,6 @@ setup(
|
||||||
"remerkleable==0.1.19",
|
"remerkleable==0.1.19",
|
||||||
"ruamel.yaml==0.16.5",
|
"ruamel.yaml==0.16.5",
|
||||||
"lru-dict==1.1.6",
|
"lru-dict==1.1.6",
|
||||||
|
"marko==1.0.2",
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue