Refactor run_generator

This commit is contained in:
Hsiao-Wei Wang 2022-11-10 16:33:50 -05:00
parent b617c62e8d
commit 9f5bb03cb4
No known key found for this signature in database
GPG Key ID: AE3D6B174F971DE4

View File

@ -1,4 +1,7 @@
from eth_utils import encode_hex
from dataclasses import (
dataclass,
field,
)
import os
import time
import shutil
@ -15,6 +18,8 @@ from ruamel.yaml import (
from filelock import FileLock
from snappy import compress
from eth_utils import encode_hex
from eth2spec.test import context
from eth2spec.test.exceptions import SkippedTest
@ -28,6 +33,14 @@ context.is_pytest = False
TIME_THRESHOLD_TO_PRINT = 1.0 # seconds
@dataclass
class Diagnostics(object):
collected_test_count: int = 0
generated_test_count: int = 0
skipped_test_count: int = 0
test_identifiers: list = field(default_factory=list)
def validate_output_dir(path_str):
path = Path(path_str)
@ -40,6 +53,47 @@ def validate_output_dir(path_str):
return path
def get_test_case_dir(test_case, output_dir):
return (
Path(output_dir) / Path(test_case.preset_name) / Path(test_case.fork_name)
/ Path(test_case.runner_name) / Path(test_case.handler_name)
/ Path(test_case.suite_name) / Path(test_case.case_name)
)
def get_test_identifier(test_case):
return "::".join([
test_case.preset_name,
test_case.fork_name,
test_case.runner_name,
test_case.handler_name,
test_case.suite_name,
test_case.case_name
])
def get_incomplete_tag_file(case_dir):
return case_dir / "INCOMPLETE"
def should_skip_case_dir(case_dir, is_force, diagnostics_obj):
is_skip = False
incomplete_tag_file = get_incomplete_tag_file(case_dir)
if case_dir.exists():
if not is_force and not incomplete_tag_file.exists():
diagnostics_obj.skipped_test_count += 1
print(f'Skipping already existing test: {case_dir}')
is_skip = True
else:
print(f'Warning, output directory {case_dir} already exist,'
' old files will be deleted and it will generate test vector files with the latest version')
# Clear the existing case_dir folder
shutil.rmtree(case_dir)
return is_skip, diagnostics_obj
def run_generator(generator_name, test_providers: Iterable[TestProvider]):
"""
Implementation for a general test generator.
@ -129,10 +183,8 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]):
print(f"Filtering test-generator runs to only include presets: {', '.join(presets)}")
collect_only = args.collect_only
collected_test_count = 0
generated_test_count = 0
skipped_test_count = 0
test_identifiers = []
diagnostics_obj = Diagnostics()
provider_start = time.time()
for tprov in test_providers:
@ -145,26 +197,60 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]):
if len(presets) != 0 and test_case.preset_name not in presets:
continue
case_dir = (
Path(output_dir) / Path(test_case.preset_name) / Path(test_case.fork_name)
/ Path(test_case.runner_name) / Path(test_case.handler_name)
/ Path(test_case.suite_name) / Path(test_case.case_name)
)
collected_test_count += 1
case_dir = get_test_case_dir(test_case, output_dir)
print(f"Collected test at: {case_dir}")
diagnostics_obj.collected_test_count += 1
incomplete_tag_file = case_dir / "INCOMPLETE"
if case_dir.exists():
if not args.force and not incomplete_tag_file.exists():
skipped_test_count += 1
print(f'Skipping already existing test: {case_dir}')
is_skip, diagnostics_obj = should_skip_case_dir(case_dir, args.force, diagnostics_obj)
if is_skip:
continue
# generate test vector
is_skip, diagnostics_obj = generate_test_vector_and_diagnose(
test_case, case_dir, log_file, file_mode,
cfg_yaml, yaml, diagnostics_obj,
)
if is_skip:
continue
provider_end = time.time()
span = round(provider_end - provider_start, 2)
if collect_only:
print(f"Collected {diagnostics_obj.collected_test_count} tests in total")
else:
print(f'Warning, output directory {case_dir} already exist,'
f' old files will be deleted and it will generate test vector files with the latest version')
# Clear the existing case_dir folder
shutil.rmtree(case_dir)
summary_message = f"completed generation of {generator_name} with {diagnostics_obj.generated_test_count} tests"
summary_message += f" ({diagnostics_obj.skipped_test_count} skipped tests)"
if span > TIME_THRESHOLD_TO_PRINT:
summary_message += f" in {span} seconds"
print(summary_message)
diagnostics_output = {
"collected_test_count": diagnostics_obj.collected_test_count,
"generated_test_count": diagnostics_obj.generated_test_count,
"skipped_test_count": diagnostics_obj.skipped_test_count,
"test_identifiers": diagnostics_obj.test_identifiers,
"durations": [f"{span} seconds"],
}
diagnostics_path = Path(os.path.join(output_dir, "diagnostics_obj.json"))
diagnostics_lock = FileLock(os.path.join(output_dir, "diagnostics_obj.json.lock"))
with diagnostics_lock:
diagnostics_path.touch(exist_ok=True)
if os.path.getsize(diagnostics_path) == 0:
with open(diagnostics_path, "w+") as f:
json.dump(diagnostics_output, f)
else:
with open(diagnostics_path, "r+") as f:
existing_diagnostics = json.load(f)
for k, v in diagnostics_output.items():
existing_diagnostics[k] += v
with open(diagnostics_path, "w+") as f:
json.dump(existing_diagnostics, f)
print(f"wrote diagnostics_obj to {diagnostics_path}")
def generate_test_vector_and_diagnose(test_case, case_dir, log_file, file_mode, cfg_yaml, yaml, diagnostics_obj):
is_skip = False
print(f'Generating test: {case_dir}')
test_start = time.time()
@ -172,53 +258,26 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]):
written_part = False
# Add `INCOMPLETE` tag file to indicate that the test generation has not completed.
incomplete_tag_file = get_incomplete_tag_file(case_dir)
case_dir.mkdir(parents=True, exist_ok=True)
with incomplete_tag_file.open("w") as f:
f.write("\n")
try:
def output_part(out_kind: str, name: str, fn: Callable[[Path, ], None]):
# make sure the test case directory is created before any test part is written.
case_dir.mkdir(parents=True, exist_ok=True)
try:
fn(case_dir)
except IOError as e:
error_message = (
f'[Error] error when dumping test "{case_dir}", part "{name}", kind "{out_kind}": {e}'
)
# Write to error log file
with log_file.open("a+") as f:
f.write(error_message)
traceback.print_exc(file=f)
f.write('\n')
sys.exit(error_message)
meta = dict()
try:
for (name, out_kind, data) in test_case.case_fn():
written_part = True
if out_kind == "meta":
meta[name] = data
elif out_kind == "cfg":
output_part(out_kind, name, dump_yaml_fn(data, name, file_mode, cfg_yaml))
elif out_kind == "data":
output_part(out_kind, name, dump_yaml_fn(data, name, file_mode, yaml))
elif out_kind == "ssz":
output_part(out_kind, name, dump_ssz_fn(data, name, file_mode))
else:
assert False # Unknown kind
written_part, meta = execute_test(test_case, case_dir, meta, log_file, file_mode, cfg_yaml, yaml)
except SkippedTest as e:
print(e)
skipped_test_count += 1
diagnostics_obj.skipped_test_count += 1
shutil.rmtree(case_dir)
continue
is_skip = True
return is_skip, diagnostics_obj
# Once all meta data is collected (if any), write it to a meta data file.
if len(meta) != 0:
written_part = True
output_part("data", "meta", dump_yaml_fn(meta, "meta", file_mode, yaml))
output_part(case_dir, log_file, "data", "meta", dump_yaml_fn(meta, "meta", file_mode, yaml))
if not written_part:
print(f"test case {case_dir} did not produce any test case parts")
@ -235,16 +294,9 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]):
if not written_part:
shutil.rmtree(case_dir)
else:
generated_test_count += 1
test_identifier = "::".join([
test_case.preset_name,
test_case.fork_name,
test_case.runner_name,
test_case.handler_name,
test_case.suite_name,
test_case.case_name
])
test_identifiers.append(test_identifier)
diagnostics_obj.generated_test_count += 1
test_identifier = get_test_identifier(test_case)
diagnostics_obj.test_identifiers.append(test_identifier)
# Only remove `INCOMPLETE` tag file
os.remove(incomplete_tag_file)
test_end = time.time()
@ -252,39 +304,7 @@ def run_generator(generator_name, test_providers: Iterable[TestProvider]):
if span > TIME_THRESHOLD_TO_PRINT:
print(f' - generated in {span} seconds')
provider_end = time.time()
span = round(provider_end - provider_start, 2)
if collect_only:
print(f"Collected {collected_test_count} tests in total")
else:
summary_message = f"completed generation of {generator_name} with {generated_test_count} tests"
summary_message += f" ({skipped_test_count} skipped tests)"
if span > TIME_THRESHOLD_TO_PRINT:
summary_message += f" in {span} seconds"
print(summary_message)
diagnostics = {
"collected_test_count": collected_test_count,
"generated_test_count": generated_test_count,
"skipped_test_count": skipped_test_count,
"test_identifiers": test_identifiers,
"durations": [f"{span} seconds"],
}
diagnostics_path = Path(os.path.join(output_dir, "diagnostics.json"))
diagnostics_lock = FileLock(os.path.join(output_dir, "diagnostics.json.lock"))
with diagnostics_lock:
diagnostics_path.touch(exist_ok=True)
if os.path.getsize(diagnostics_path) == 0:
with open(diagnostics_path, "w+") as f:
json.dump(diagnostics, f)
else:
with open(diagnostics_path, "r+") as f:
existing_diagnostics = json.load(f)
for k, v in diagnostics.items():
existing_diagnostics[k] += v
with open(diagnostics_path, "w+") as f:
json.dump(existing_diagnostics, f)
print(f"wrote diagnostics to {diagnostics_path}")
return is_skip, diagnostics_obj
def dump_yaml_fn(data: Any, name: str, file_mode: str, yaml_encoder: YAML):
@ -295,6 +315,40 @@ def dump_yaml_fn(data: Any, name: str, file_mode: str, yaml_encoder: YAML):
return dump
def output_part(case_dir, log_file, out_kind: str, name: str, fn: Callable[[Path, ], None]):
# make sure the test case directory is created before any test part is written.
case_dir.mkdir(parents=True, exist_ok=True)
try:
fn(case_dir)
except IOError as e:
error_message = f'[Error] error when dumping test "{case_dir}", part "{name}", kind "{out_kind}": {e}'
# Write to error log file
with log_file.open("a+") as f:
f.write(error_message)
traceback.print_exc(file=f)
f.write('\n')
sys.exit(error_message)
def execute_test(test_case, case_dir, meta, log_file, file_mode, cfg_yaml, yaml):
result = test_case.case_fn()
for (name, out_kind, data) in result:
written_part = True
if out_kind == "meta":
meta[name] = data
elif out_kind == "cfg":
output_part(case_dir, log_file, out_kind, name, dump_yaml_fn(data, name, file_mode, cfg_yaml))
elif out_kind == "data":
output_part(case_dir, log_file, out_kind, name, dump_yaml_fn(data, name, file_mode, yaml))
elif out_kind == "ssz":
output_part(case_dir, log_file, out_kind, name, dump_ssz_fn(data, name, file_mode))
else:
raise ValueError("Unknown out_kind %s" % out_kind)
return written_part, meta
def dump_ssz_fn(data: AnyStr, name: str, file_mode: str):
def dump(case_path: Path):
out_path = case_path / Path(name + '.ssz_snappy')