From aef2dbc870d3acf819e87afd9368234d9edfd029 Mon Sep 17 00:00:00 2001 From: gmega Date: Thu, 19 Oct 2023 20:21:06 -0300 Subject: [PATCH] fix bug, add constant column option to csv conversion --- adhoc/identify_uploads.py | 4 ++-- logtools/cli/to_csv.py | 15 ++++++++++++--- logtools/cli/utils.py | 14 ++++++++++++++ logtools/log/sources/log_parsers.py | 11 +++++++---- .../log/sources/tests/test_filtered_source.py | 1 - logtools/log/sources/tests/test_log_parsers.py | 5 +++++ 6 files changed, 40 insertions(+), 10 deletions(-) create mode 100644 logtools/cli/utils.py diff --git a/adhoc/identify_uploads.py b/adhoc/identify_uploads.py index e7be8fc..ebb5b62 100644 --- a/adhoc/identify_uploads.py +++ b/adhoc/identify_uploads.py @@ -10,8 +10,8 @@ for line in sys.stdin: if uploading: line = line.strip() - line = line.rsplit(' ', maxsplit=1) - line = ' '.join([line[0], f'upload={upload_no}', line[1]]) + parts = line.rsplit(' ', maxsplit=1) + line = ' '.join([parts[0], f'upload={upload_no}', parts[1]]) print(line) if 'Uploaded file' in line: diff --git a/logtools/cli/to_csv.py b/logtools/cli/to_csv.py index e2e2a6c..342614d 100644 --- a/logtools/cli/to_csv.py +++ b/logtools/cli/to_csv.py @@ -1,16 +1,22 @@ -"""Parses a log stream, possibly pre-filtered and/or merged, into a CSV file. Allows chronicles topics to be +"""Parses a log stream, possibly pre-filtered and/or merged, into a CSV file. Allows Chronicles topics to be extracted into their own columns.""" import sys from argparse import ArgumentParser from csv import DictWriter +from logtools.cli.utils import kv_pair from logtools.log.sources.stream_log_source import StreamLogSource def to_csv(args): fields = args.extract_fields - writer = DictWriter(sys.stdout, - fieldnames=['timestamp', 'line_number', 'level', 'fields', 'count', 'message'] + fields) + constant_columns = dict(args.constant_column) if args.constant_column else {} + writer = DictWriter( + sys.stdout, + fieldnames=['timestamp', 'line_number', + 'level', 'fields', 'count', 'message'] + fields + list(constant_columns.keys()) + ) + writer.writeheader() for line in StreamLogSource(sys.stdin): line_fields = {field: line.fields.get(field, 'NA') for field in fields} @@ -22,6 +28,7 @@ def to_csv(args): 'count': line.count, 'message': line.message, **line_fields, + **constant_columns, }) @@ -29,6 +36,8 @@ def main(): argparse = ArgumentParser() argparse.add_argument('--extract-fields', nargs='+', default=[], help='Extract chronicles topics into CSV columns') + argparse.add_argument('--constant-column', metavar='KEY=VALUE', nargs='+', type=kv_pair, + help='Adds a column with key KEY and constant value VALUE to the CSV') to_csv(argparse.parse_args()) diff --git a/logtools/cli/utils.py b/logtools/cli/utils.py new file mode 100644 index 0000000..b1355d3 --- /dev/null +++ b/logtools/cli/utils.py @@ -0,0 +1,14 @@ +import argparse +from typing import Tuple + + +def kv_pair(raw: str) -> Tuple[str, str]: + """ + Parse a string of the form 'key=value' and return a tuple (key, value). + """ + if '=' not in raw: + msg = f'{raw} is not a valid key=value pair' + raise argparse.ArgumentTypeError(msg) + + key, value = raw.split("=", 1) + return key, value diff --git a/logtools/log/sources/log_parsers.py b/logtools/log/sources/log_parsers.py index 418e48f..7b65636 100644 --- a/logtools/log/sources/log_parsers.py +++ b/logtools/log/sources/log_parsers.py @@ -30,8 +30,11 @@ TOPICS = re.compile(r'((\w+=("[\w\s]+"|\S+) )+)?\w+=("[\w\s]+"|\S+)$') def parse_raw(line: str, parse_datetime: bool = True) -> Optional[TrackedLogLine[LineNumberLocation]]: parsed = LOG_LINE.search(line) + if not parsed: + return None + topics = TOPICS.search(parsed['message']) - if not parsed or not topics: + if not topics: return None return TrackedLogLine( @@ -60,7 +63,7 @@ def raw_parser(stream: TextIO, parse_datetime=True) -> LogSource: def csv_parser(stream: TextIO, parse_datetime=True) -> LogSource: for line_number, line in enumerate(DictReader(stream), start=1): try: - line = TrackedLogLine( + parsed_line: TrackedLogLine = TrackedLogLine( raw=line['message'], # FIXME this is NOT the raw line... timestamp=line['timestamp'], message=line['message'], @@ -70,7 +73,7 @@ def csv_parser(stream: TextIO, parse_datetime=True) -> LogSource: ) if parse_datetime: - line.timestamp = tsparser.parse(cast(str, line.timestamp)) - yield line + parsed_line.timestamp = tsparser.parse(cast(str, parsed_line.timestamp)) + yield parsed_line except ValueError: print(f'Skip unparseable line: {line}', file=sys.stderr) diff --git a/logtools/log/sources/tests/test_filtered_source.py b/logtools/log/sources/tests/test_filtered_source.py index be6d4c4..4b51e85 100644 --- a/logtools/log/sources/tests/test_filtered_source.py +++ b/logtools/log/sources/tests/test_filtered_source.py @@ -1,6 +1,5 @@ from dateutil import parser -from logtools.log.log_line import LogLine from logtools.log.sources.filtered_source import FilteredSource, timestamp_range from logtools.log.sources.log_parsers import parse_raw from logtools.log.sources.tests.string_log_source import StringLogSource diff --git a/logtools/log/sources/tests/test_log_parsers.py b/logtools/log/sources/tests/test_log_parsers.py index 2ecd61c..792f7bb 100644 --- a/logtools/log/sources/tests/test_log_parsers.py +++ b/logtools/log/sources/tests/test_log_parsers.py @@ -17,3 +17,8 @@ def test_raw_parser_should_parse_logline_from_string(): assert line.message == 'Sending want list to peer' assert line.topics == 'topics="codex blockexcnetwork" tid=1 peer=16U*7mogoM type=WantBlock items=1' assert line.count == 870781 + + +def test_raw_parser_should_return_none_if_line_is_not_parseable(): + line = parse_raw('This is not a log line', parse_datetime=True) + assert line is None \ No newline at end of file