mirror of
https://github.com/logos-storage/bittorrent-benchmarks.git
synced 2026-01-07 15:33:10 +00:00
feat: add ES slicing to CLI
This commit is contained in:
parent
a9b9fd8332
commit
cbba235f2e
@ -155,25 +155,26 @@ def _parse_config(
|
|||||||
sys.exit(-1)
|
sys.exit(-1)
|
||||||
|
|
||||||
|
|
||||||
def _configure_source(args, dump=False):
|
def _configure_logstash_source(args, structured_only=True):
|
||||||
# TODO we should probably have builders for sources as well, but for now
|
import urllib3
|
||||||
# we'll just keep it simple.
|
|
||||||
if args.source_file:
|
|
||||||
if not args.source_file.exists():
|
|
||||||
print(f"Log source file {args.source_file} does not exist.")
|
|
||||||
sys.exit(-1)
|
|
||||||
return VectorFlatFileSource(
|
|
||||||
app_name="codex-benchmarks", file=args.source_file.open(encoding="utf-8")
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
import urllib3
|
|
||||||
|
|
||||||
urllib3.disable_warnings()
|
urllib3.disable_warnings()
|
||||||
|
|
||||||
return LogstashSource(
|
return LogstashSource(
|
||||||
Elasticsearch(args.es_url, verify_certs=False),
|
Elasticsearch(args.es_url, verify_certs=False),
|
||||||
structured_only=not dump,
|
chronological=args.chronological,
|
||||||
)
|
structured_only=structured_only,
|
||||||
|
slices=args.slices,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _configure_vector_source(args):
|
||||||
|
if not args.source_file.exists():
|
||||||
|
print(f"Log source file {args.source_file} does not exist.")
|
||||||
|
sys.exit(-1)
|
||||||
|
return VectorFlatFileSource(
|
||||||
|
app_name="codex-benchmarks", file=args.source_file.open(encoding="utf-8")
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _init_logging():
|
def _init_logging():
|
||||||
@ -186,10 +187,15 @@ def _init_logging():
|
|||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
# TODO this is getting unwieldy, need pull this apart into submodules. For now we get away
|
||||||
|
# with title comments.
|
||||||
parser = argparse.ArgumentParser()
|
parser = argparse.ArgumentParser()
|
||||||
|
|
||||||
commands = parser.add_subparsers(required=True)
|
commands = parser.add_subparsers(required=True)
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
# Experiments #
|
||||||
|
###########################################################################
|
||||||
experiments = commands.add_parser(
|
experiments = commands.add_parser(
|
||||||
"experiments", help="List or run experiments in config file."
|
"experiments", help="List or run experiments in config file."
|
||||||
)
|
)
|
||||||
@ -226,6 +232,9 @@ def main():
|
|||||||
|
|
||||||
describe_cmd.set_defaults(func=cmd_describe_experiment)
|
describe_cmd.set_defaults(func=cmd_describe_experiment)
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
# Logs #
|
||||||
|
###########################################################################
|
||||||
logs_cmd = commands.add_parser("logs", help="Parse logs.")
|
logs_cmd = commands.add_parser("logs", help="Parse logs.")
|
||||||
log_subcommands = logs_cmd.add_subparsers(required=True)
|
log_subcommands = logs_cmd.add_subparsers(required=True)
|
||||||
|
|
||||||
@ -243,15 +252,6 @@ def main():
|
|||||||
log_source_cmd = log_subcommands.add_parser(
|
log_source_cmd = log_subcommands.add_parser(
|
||||||
"source", help="Parse logs from a log source."
|
"source", help="Parse logs from a log source."
|
||||||
)
|
)
|
||||||
|
|
||||||
source_group = log_source_cmd.add_mutually_exclusive_group(required=True)
|
|
||||||
source_group.add_argument(
|
|
||||||
"--source-file", type=Path, help="Vector log file to parse from."
|
|
||||||
)
|
|
||||||
source_group.add_argument(
|
|
||||||
"--es-url", type=str, help="URL to a logstash Elasticsearch instance."
|
|
||||||
)
|
|
||||||
|
|
||||||
log_source_cmd.add_argument(
|
log_source_cmd.add_argument(
|
||||||
"group_id", type=str, help="ID of experiment group to parse."
|
"group_id", type=str, help="ID of experiment group to parse."
|
||||||
)
|
)
|
||||||
@ -267,16 +267,48 @@ def main():
|
|||||||
type=Path,
|
type=Path,
|
||||||
help="Splits logs for the entire group into the specified folder.",
|
help="Splits logs for the entire group into the specified folder.",
|
||||||
)
|
)
|
||||||
log_source_cmd.set_defaults(
|
|
||||||
func=lambda args: cmd_split_log_source(
|
single_or_split.set_defaults(
|
||||||
_configure_source(args, dump=False), args.group_id, args.output_dir
|
func=lambda args: cmd_dump_single_experiment(
|
||||||
|
args.source(args, False), args.group_id, args.experiment_id
|
||||||
)
|
)
|
||||||
if args.output_dir
|
if args.experiment_id
|
||||||
else cmd_dump_single_experiment(
|
else cmd_split_log_source(
|
||||||
_configure_source(args, dump=True), args.group_id, args.experiment_id
|
args.source(args, True), args.group_id, args.output_dir
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
source_type = log_source_cmd.add_subparsers(required=True)
|
||||||
|
es_source = source_type.add_parser("logstash", help="Logstash source.")
|
||||||
|
es_source.add_argument(
|
||||||
|
"es_url", type=str, help="URL to a logstash Elasticsearch instance."
|
||||||
|
)
|
||||||
|
es_source.add_argument(
|
||||||
|
"--chronological", action="store_true", help="Sort logs chronologically."
|
||||||
|
)
|
||||||
|
es_source.add_argument(
|
||||||
|
"--slices",
|
||||||
|
type=int,
|
||||||
|
help="Number of scroll slices to use when reading the log.",
|
||||||
|
default=2,
|
||||||
|
)
|
||||||
|
|
||||||
|
es_source.set_defaults(
|
||||||
|
source=lambda args, structured_only: _configure_logstash_source(
|
||||||
|
args, structured_only=structured_only
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
vector_source = source_type.add_parser("vector", help="Vector flat file source.")
|
||||||
|
vector_source.add_argument(
|
||||||
|
"source_file", type=Path, help="Vector log file to parse from."
|
||||||
|
)
|
||||||
|
|
||||||
|
vector_source.set_defaults(source=lambda args, _: _configure_vector_source(args))
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
# Agents #
|
||||||
|
###########################################################################
|
||||||
agent_cmd = commands.add_parser("agent", help="Starts a local agent.")
|
agent_cmd = commands.add_parser("agent", help="Starts a local agent.")
|
||||||
agent_cmd.add_argument(
|
agent_cmd.add_argument(
|
||||||
"config", type=Path, help="Path to the agent configuration file."
|
"config", type=Path, help="Path to the agent configuration file."
|
||||||
|
|||||||
@ -34,6 +34,7 @@ def pflatmap(
|
|||||||
finally:
|
finally:
|
||||||
q.put(_End())
|
q.put(_End())
|
||||||
|
|
||||||
|
# TODO handle SIGTERM properly
|
||||||
executor = ThreadPoolExecutor(max_workers=workers)
|
executor = ThreadPoolExecutor(max_workers=workers)
|
||||||
try:
|
try:
|
||||||
task_futures = [executor.submit(_consume, task) for task in tasks]
|
task_futures = [executor.submit(_consume, task) for task in tasks]
|
||||||
|
|||||||
@ -102,6 +102,7 @@ class LogstashSource(LogSource):
|
|||||||
]
|
]
|
||||||
|
|
||||||
if self.slices > 1:
|
if self.slices > 1:
|
||||||
|
logger.info(f"Querying ES with {self.slices} scroll slices.")
|
||||||
yield from pflatmap(
|
yield from pflatmap(
|
||||||
[
|
[
|
||||||
self._run_scroll(sliced_query, actual_indexes)
|
self._run_scroll(sliced_query, actual_indexes)
|
||||||
@ -153,8 +154,11 @@ class LogstashSource(LogSource):
|
|||||||
|
|
||||||
# Get next batch of results
|
# Get next batch of results
|
||||||
scroll_response = self.client.scroll(scroll_id=scroll_id, scroll="2m")
|
scroll_response = self.client.scroll(scroll_id=scroll_id, scroll="2m")
|
||||||
|
except Exception as e:
|
||||||
|
logger.exception(f"Error while scrolling: {e}")
|
||||||
finally:
|
finally:
|
||||||
# Clean up scroll context
|
# Clean up scroll context
|
||||||
|
logger.info("Worker done, clearing scroll context.")
|
||||||
self.client.clear_scroll(scroll_id=scroll_id)
|
self.client.clear_scroll(scroll_id=scroll_id)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user