fix bug, add constant column option to csv conversion

This commit is contained in:
gmega 2023-10-19 20:21:06 -03:00
parent 0f8c058ff4
commit aef2dbc870
No known key found for this signature in database
GPG Key ID: FFD8DAF00660270F
6 changed files with 40 additions and 10 deletions

View File

@ -10,8 +10,8 @@ for line in sys.stdin:
if uploading: if uploading:
line = line.strip() line = line.strip()
line = line.rsplit(' ', maxsplit=1) parts = line.rsplit(' ', maxsplit=1)
line = ' '.join([line[0], f'upload={upload_no}', line[1]]) line = ' '.join([parts[0], f'upload={upload_no}', parts[1]])
print(line) print(line)
if 'Uploaded file' in line: if 'Uploaded file' in line:

View File

@ -1,16 +1,22 @@
"""Parses a log stream, possibly pre-filtered and/or merged, into a CSV file. Allows chronicles topics to be """Parses a log stream, possibly pre-filtered and/or merged, into a CSV file. Allows Chronicles topics to be
extracted into their own columns.""" extracted into their own columns."""
import sys import sys
from argparse import ArgumentParser from argparse import ArgumentParser
from csv import DictWriter from csv import DictWriter
from logtools.cli.utils import kv_pair
from logtools.log.sources.stream_log_source import StreamLogSource from logtools.log.sources.stream_log_source import StreamLogSource
def to_csv(args): def to_csv(args):
fields = args.extract_fields fields = args.extract_fields
writer = DictWriter(sys.stdout, constant_columns = dict(args.constant_column) if args.constant_column else {}
fieldnames=['timestamp', 'line_number', 'level', 'fields', 'count', 'message'] + fields) writer = DictWriter(
sys.stdout,
fieldnames=['timestamp', 'line_number',
'level', 'fields', 'count', 'message'] + fields + list(constant_columns.keys())
)
writer.writeheader() writer.writeheader()
for line in StreamLogSource(sys.stdin): for line in StreamLogSource(sys.stdin):
line_fields = {field: line.fields.get(field, 'NA') for field in fields} line_fields = {field: line.fields.get(field, 'NA') for field in fields}
@ -22,6 +28,7 @@ def to_csv(args):
'count': line.count, 'count': line.count,
'message': line.message, 'message': line.message,
**line_fields, **line_fields,
**constant_columns,
}) })
@ -29,6 +36,8 @@ def main():
argparse = ArgumentParser() argparse = ArgumentParser()
argparse.add_argument('--extract-fields', nargs='+', default=[], argparse.add_argument('--extract-fields', nargs='+', default=[],
help='Extract chronicles topics into CSV columns') help='Extract chronicles topics into CSV columns')
argparse.add_argument('--constant-column', metavar='KEY=VALUE', nargs='+', type=kv_pair,
help='Adds a column with key KEY and constant value VALUE to the CSV')
to_csv(argparse.parse_args()) to_csv(argparse.parse_args())

14
logtools/cli/utils.py Normal file
View File

@ -0,0 +1,14 @@
import argparse
from typing import Tuple
def kv_pair(raw: str) -> Tuple[str, str]:
"""
Parse a string of the form 'key=value' and return a tuple (key, value).
"""
if '=' not in raw:
msg = f'{raw} is not a valid key=value pair'
raise argparse.ArgumentTypeError(msg)
key, value = raw.split("=", 1)
return key, value

View File

@ -30,8 +30,11 @@ TOPICS = re.compile(r'((\w+=("[\w\s]+"|\S+) )+)?\w+=("[\w\s]+"|\S+)$')
def parse_raw(line: str, parse_datetime: bool = True) -> Optional[TrackedLogLine[LineNumberLocation]]: def parse_raw(line: str, parse_datetime: bool = True) -> Optional[TrackedLogLine[LineNumberLocation]]:
parsed = LOG_LINE.search(line) parsed = LOG_LINE.search(line)
if not parsed:
return None
topics = TOPICS.search(parsed['message']) topics = TOPICS.search(parsed['message'])
if not parsed or not topics: if not topics:
return None return None
return TrackedLogLine( return TrackedLogLine(
@ -60,7 +63,7 @@ def raw_parser(stream: TextIO, parse_datetime=True) -> LogSource:
def csv_parser(stream: TextIO, parse_datetime=True) -> LogSource: def csv_parser(stream: TextIO, parse_datetime=True) -> LogSource:
for line_number, line in enumerate(DictReader(stream), start=1): for line_number, line in enumerate(DictReader(stream), start=1):
try: try:
line = TrackedLogLine( parsed_line: TrackedLogLine = TrackedLogLine(
raw=line['message'], # FIXME this is NOT the raw line... raw=line['message'], # FIXME this is NOT the raw line...
timestamp=line['timestamp'], timestamp=line['timestamp'],
message=line['message'], message=line['message'],
@ -70,7 +73,7 @@ def csv_parser(stream: TextIO, parse_datetime=True) -> LogSource:
) )
if parse_datetime: if parse_datetime:
line.timestamp = tsparser.parse(cast(str, line.timestamp)) parsed_line.timestamp = tsparser.parse(cast(str, parsed_line.timestamp))
yield line yield parsed_line
except ValueError: except ValueError:
print(f'Skip unparseable line: {line}', file=sys.stderr) print(f'Skip unparseable line: {line}', file=sys.stderr)

View File

@ -1,6 +1,5 @@
from dateutil import parser from dateutil import parser
from logtools.log.log_line import LogLine
from logtools.log.sources.filtered_source import FilteredSource, timestamp_range from logtools.log.sources.filtered_source import FilteredSource, timestamp_range
from logtools.log.sources.log_parsers import parse_raw from logtools.log.sources.log_parsers import parse_raw
from logtools.log.sources.tests.string_log_source import StringLogSource from logtools.log.sources.tests.string_log_source import StringLogSource

View File

@ -17,3 +17,8 @@ def test_raw_parser_should_parse_logline_from_string():
assert line.message == 'Sending want list to peer' assert line.message == 'Sending want list to peer'
assert line.topics == 'topics="codex blockexcnetwork" tid=1 peer=16U*7mogoM type=WantBlock items=1' assert line.topics == 'topics="codex blockexcnetwork" tid=1 peer=16U*7mogoM type=WantBlock items=1'
assert line.count == 870781 assert line.count == 870781
def test_raw_parser_should_return_none_if_line_is_not_parseable():
line = parse_raw('This is not a log line', parse_datetime=True)
assert line is None