introduce alternate spec parser with spec comments

This commit is contained in:
Alex Stokes 2021-04-20 14:55:46 -07:00
parent 16ac948d90
commit de3ac15d9c
No known key found for this signature in database
GPG Key ID: 99B3D88FD6C55A69
1 changed files with 122 additions and 73 deletions

195
setup.py
View File

@ -1,4 +1,3 @@
from enum import Enum, auto
from setuptools import setup, find_packages, Command from setuptools import setup, find_packages, Command
from setuptools.command.build_py import build_py from setuptools.command.build_py import build_py
from distutils import dir_util from distutils import dir_util
@ -6,11 +5,24 @@ from distutils.util import convert_path
import os import os
import re import re
import string import string
from typing import Dict, NamedTuple, List, Sequence from typing import Dict, NamedTuple, List, Sequence, Optional
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
import ast
FUNCTION_REGEX = r'^def [\w_]*' # NOTE: have to programmatically include third-party dependencies in `setup.py`.
MARKO_VERSION = "marko==1.0.2"
try:
import marko
except ImportError:
import pip
pip.main(["install", MARKO_VERSION])
from marko.block import Heading, FencedCode, LinkRefDef, BlankLine
from marko.inline import CodeSpan
from marko.ext.gfm import gfm
from marko.ext.gfm.elements import Table, Paragraph
# Definitions in context.py # Definitions in context.py
PHASE0 = 'phase0' PHASE0 = 'phase0'
@ -45,83 +57,118 @@ class SpecObject(NamedTuple):
dataclasses: Dict[str, str] dataclasses: Dict[str, str]
class CodeBlockType(Enum): def _get_name_from_heading(heading: Heading) -> Optional[str]:
SSZ = auto() last_child = heading.children[-1]
DATACLASS = auto() if isinstance(last_child, CodeSpan):
FUNCTION = auto() return last_child.children
return None
def _get_source_from_code_block(block: FencedCode) -> str:
return block.children[0].children.strip()
def _get_function_name_from_source(source: str) -> str:
fn = ast.parse(source).body[0]
return fn.name
def _get_class_info_from_source(source: str) -> (str, Optional[str]):
class_def = ast.parse(source).body[0]
base = class_def.bases[0]
if isinstance(base, ast.Name):
parent_class = base.id
else:
# NOTE: SSZ definition derives from earlier phase...
# e.g. `phase0.SignedBeaconBlock`
# TODO: check for consistency with other phases
parent_class = None
return class_def.name, parent_class
def _is_constant_id(name: str) -> bool:
if name[0] not in string.ascii_uppercase + '_':
return False
return all(map(lambda c: c in string.ascii_uppercase + '_' + string.digits, name[1:]))
ETH2_SPEC_COMMENT_PREFIX = "eth2spec:"
def _get_eth2_spec_comment(child: LinkRefDef) -> Optional[str]:
_, _, title = child._parse_info
if not (title[0] == "(" and title[len(title)-1] == ")"):
return None
title = title[1:len(title)-1]
if not title.startswith(ETH2_SPEC_COMMENT_PREFIX):
return None
return title[len(ETH2_SPEC_COMMENT_PREFIX):].strip()
def get_spec(file_name: str) -> SpecObject: def get_spec(file_name: str) -> SpecObject:
"""
Takes in the file name of a spec.md file, opens it and returns a parsed spec object.
Note: This function makes heavy use of the inherent ordering of dicts,
if this is not supported by your python version, it will not work.
"""
pulling_from = None # line number of start of latest object
current_name = None # most recent section title
functions: Dict[str, str] = {} functions: Dict[str, str] = {}
constants: Dict[str, str] = {} constants: Dict[str, str] = {}
ssz_dep_constants: Dict[str, str] = {} ssz_dep_constants: Dict[str, str] = {}
ssz_objects: Dict[str, str] = {} ssz_objects: Dict[str, str] = {}
dataclasses: Dict[str, str] = {} dataclasses: Dict[str, str] = {}
function_matcher = re.compile(FUNCTION_REGEX)
block_type = CodeBlockType.FUNCTION
custom_types: Dict[str, str] = {} custom_types: Dict[str, str] = {}
for linenum, line in enumerate(open(file_name).readlines()):
line = line.rstrip()
if pulling_from is None and len(line) > 0 and line[0] == '#' and line[-1] == '`':
current_name = line[line[:-1].rfind('`') + 1: -1]
if line[:9] == '```python':
assert pulling_from is None
pulling_from = linenum + 1
elif line[:3] == '```':
pulling_from = None
else:
# Handle function definitions & ssz_objects
if pulling_from is not None:
if len(line) > 18 and line[:6] == 'class ' and (line[-12:] == '(Container):' or '(phase' in line):
end = -12 if line[-12:] == '(Container):' else line.find('(')
name = line[6:end]
# Check consistency with markdown header
assert name == current_name
block_type = CodeBlockType.SSZ
elif line[:10] == '@dataclass':
block_type = CodeBlockType.DATACLASS
elif function_matcher.match(line) is not None:
current_name = function_matcher.match(line).group(0)
block_type = CodeBlockType.FUNCTION
if block_type == CodeBlockType.SSZ: with open(file_name) as source_file:
ssz_objects[current_name] = ssz_objects.get(current_name, '') + line + '\n' document = gfm.parse(source_file.read())
elif block_type == CodeBlockType.DATACLASS:
dataclasses[current_name] = dataclasses.get(current_name, '') + line + '\n'
elif block_type == CodeBlockType.FUNCTION:
functions[current_name] = functions.get(current_name, '') + line + '\n'
else:
pass
# Handle constant and custom types table entries current_name = None
elif pulling_from is None and len(line) > 0 and line[0] == '|': should_skip = False
row = line[1:].split('|') for child in document.children:
if len(row) >= 2: if isinstance(child, BlankLine):
for i in range(2): continue
row[i] = row[i].strip().strip('`') if should_skip:
if '`' in row[i]: should_skip = False
row[i] = row[i][:row[i].find('`')] continue
is_constant_def = True if isinstance(child, Heading):
if row[0][0] not in string.ascii_uppercase + '_': current_name = _get_name_from_heading(child)
is_constant_def = False elif isinstance(child, FencedCode):
for c in row[0]: if child.lang != "python":
if c not in string.ascii_uppercase + '_' + string.digits: continue
is_constant_def = False source = _get_source_from_code_block(child)
if is_constant_def: if source.startswith("def"):
if row[1].startswith('get_generalized_index'): current_name = _get_function_name_from_source(source)
ssz_dep_constants[row[0]] = row[1] functions[current_name] = "\n".join(line.rstrip() for line in source.splitlines())
elif source.startswith("@dataclass"):
dataclasses[current_name] = "\n".join(line.rstrip() for line in source.splitlines())
elif source.startswith("class"):
class_name, parent_class = _get_class_info_from_source(source)
# check consistency with spec
assert class_name == current_name
if parent_class:
assert parent_class == "Container"
# NOTE: trim whitespace from spec
ssz_objects[current_name] = "\n".join(line.rstrip() for line in source.splitlines())
else:
raise Exception("unrecognized python code element")
elif isinstance(child, Table):
for row in child.children:
cells = row.children
if len(cells) >= 2:
name_cell = cells[0]
name = name_cell.children[0].children
value_cell = cells[1]
value = value_cell.children[0].children
if isinstance(value, list):
# marko parses `**X**` as a list containing a X
value = value[0].children
if _is_constant_id(name):
if value.startswith("get_generalized_index"):
ssz_dep_constants[name] = value
else: else:
constants[row[0]] = row[1].replace('**TBD**', '2**32') constants[name] = value.replace("TBD", "2**32")
elif row[1].startswith('uint') or row[1].startswith('Bytes') or row[1].startswith('ByteList'): elif value.startswith("uint") or value.startswith("Bytes") or value.startswith("ByteList"):
custom_types[row[0]] = row[1] custom_types[name] = value
elif isinstance(child, LinkRefDef):
comment = _get_eth2_spec_comment(child)
if comment:
if comment == "skip":
should_skip = True
return SpecObject( return SpecObject(
functions=functions, functions=functions,
custom_types=custom_types, custom_types=custom_types,
@ -424,7 +471,7 @@ def produce_execution_payload(parent_hash: Hash32, timestamp: uint64) -> Executi
spec_builders = { spec_builders = {
builder.fork: builder builder.fork: builder
for builder in (Phase0SpecBuilder, AltairSpecBuilder, MergeSpecBuilder) for builder in (AltairSpecBuilder, )
} }
@ -452,12 +499,12 @@ def objects_to_spec(spec_object: SpecObject, builder: SpecBuilder, ordered_class
for k in list(spec_object.functions): for k in list(spec_object.functions):
if "ceillog2" in k or "floorlog2" in k: if "ceillog2" in k or "floorlog2" in k:
del spec_object.functions[k] del spec_object.functions[k]
functions_spec = '\n\n'.join(spec_object.functions.values()) functions_spec = '\n\n\n'.join(spec_object.functions.values())
for k in list(spec_object.constants.keys()): for k in list(spec_object.constants.keys()):
if k == "BLS12_381_Q": if k == "BLS12_381_Q":
spec_object.constants[k] += " # noqa: E501" spec_object.constants[k] += " # noqa: E501"
constants_spec = '\n'.join(map(lambda x: '%s = %s' % (x, spec_object.constants[x]), spec_object.constants)) constants_spec = '\n'.join(map(lambda x: '%s = %s' % (x, spec_object.constants[x]), spec_object.constants))
ordered_class_objects_spec = '\n\n'.join(ordered_class_objects.values()) ordered_class_objects_spec = '\n\n\n'.join(ordered_class_objects.values())
ssz_dep_constants = '\n'.join(map(lambda x: '%s = %s' % (x, builder.hardcoded_ssz_dep_constants()[x]), builder.hardcoded_ssz_dep_constants())) ssz_dep_constants = '\n'.join(map(lambda x: '%s = %s' % (x, builder.hardcoded_ssz_dep_constants()[x]), builder.hardcoded_ssz_dep_constants()))
ssz_dep_constants_verification = '\n'.join(map(lambda x: 'assert %s == %s' % (x, spec_object.ssz_dep_constants[x]), builder.hardcoded_ssz_dep_constants())) ssz_dep_constants_verification = '\n'.join(map(lambda x: 'assert %s == %s' % (x, spec_object.ssz_dep_constants[x]), builder.hardcoded_ssz_dep_constants()))
custom_type_dep_constants = '\n'.join(map(lambda x: '%s = %s' % (x, builder.hardcoded_custom_type_dep_constants()[x]), builder.hardcoded_custom_type_dep_constants())) custom_type_dep_constants = '\n'.join(map(lambda x: '%s = %s' % (x, builder.hardcoded_custom_type_dep_constants()[x]), builder.hardcoded_custom_type_dep_constants()))
@ -474,8 +521,8 @@ def objects_to_spec(spec_object: SpecObject, builder: SpecBuilder, ordered_class
+ '\n\n' + constants_spec + '\n\n' + constants_spec
+ '\n\n' + CONFIG_LOADER + '\n\n' + CONFIG_LOADER
+ '\n\n' + ordered_class_objects_spec + '\n\n' + ordered_class_objects_spec
+ '\n\n' + functions_spec + '\n\n\n' + functions_spec
+ '\n' + builder.sundry_functions() + '\n\n' + builder.sundry_functions()
# Since some constants are hardcoded in setup.py, the following assertions verify that the hardcoded constants are # Since some constants are hardcoded in setup.py, the following assertions verify that the hardcoded constants are
# as same as the spec definition. # as same as the spec definition.
+ ('\n\n\n' + ssz_dep_constants_verification if ssz_dep_constants_verification != '' else '') + ('\n\n\n' + ssz_dep_constants_verification if ssz_dep_constants_verification != '' else '')
@ -619,6 +666,7 @@ class PySpecCommand(Command):
specs/altair/beacon-chain.md specs/altair/beacon-chain.md
specs/altair/fork.md specs/altair/fork.md
specs/altair/validator.md specs/altair/validator.md
specs/altair/p2p-interface.md
specs/altair/sync-protocol.md specs/altair/sync-protocol.md
""" """
elif self.spec_fork == MERGE: elif self.spec_fork == MERGE:
@ -756,5 +804,6 @@ setup(
"remerkleable==0.1.19", "remerkleable==0.1.19",
"ruamel.yaml==0.16.5", "ruamel.yaml==0.16.5",
"lru-dict==1.1.6", "lru-dict==1.1.6",
"marko==1.0.2",
] ]
) )