fix bug, add constant column option to csv conversion

This commit is contained in:
gmega 2023-10-19 20:21:06 -03:00
parent 0f8c058ff4
commit aef2dbc870
No known key found for this signature in database
GPG Key ID: FFD8DAF00660270F
6 changed files with 40 additions and 10 deletions

View File

@ -10,8 +10,8 @@ for line in sys.stdin:
if uploading:
line = line.strip()
line = line.rsplit(' ', maxsplit=1)
line = ' '.join([line[0], f'upload={upload_no}', line[1]])
parts = line.rsplit(' ', maxsplit=1)
line = ' '.join([parts[0], f'upload={upload_no}', parts[1]])
print(line)
if 'Uploaded file' in line:

View File

@ -1,16 +1,22 @@
"""Parses a log stream, possibly pre-filtered and/or merged, into a CSV file. Allows chronicles topics to be
"""Parses a log stream, possibly pre-filtered and/or merged, into a CSV file. Allows Chronicles topics to be
extracted into their own columns."""
import sys
from argparse import ArgumentParser
from csv import DictWriter
from logtools.cli.utils import kv_pair
from logtools.log.sources.stream_log_source import StreamLogSource
def to_csv(args):
fields = args.extract_fields
writer = DictWriter(sys.stdout,
fieldnames=['timestamp', 'line_number', 'level', 'fields', 'count', 'message'] + fields)
constant_columns = dict(args.constant_column) if args.constant_column else {}
writer = DictWriter(
sys.stdout,
fieldnames=['timestamp', 'line_number',
'level', 'fields', 'count', 'message'] + fields + list(constant_columns.keys())
)
writer.writeheader()
for line in StreamLogSource(sys.stdin):
line_fields = {field: line.fields.get(field, 'NA') for field in fields}
@ -22,6 +28,7 @@ def to_csv(args):
'count': line.count,
'message': line.message,
**line_fields,
**constant_columns,
})
@ -29,6 +36,8 @@ def main():
argparse = ArgumentParser()
argparse.add_argument('--extract-fields', nargs='+', default=[],
help='Extract chronicles topics into CSV columns')
argparse.add_argument('--constant-column', metavar='KEY=VALUE', nargs='+', type=kv_pair,
help='Adds a column with key KEY and constant value VALUE to the CSV')
to_csv(argparse.parse_args())

14
logtools/cli/utils.py Normal file
View File

@ -0,0 +1,14 @@
import argparse
from typing import Tuple
def kv_pair(raw: str) -> Tuple[str, str]:
"""
Parse a string of the form 'key=value' and return a tuple (key, value).
"""
if '=' not in raw:
msg = f'{raw} is not a valid key=value pair'
raise argparse.ArgumentTypeError(msg)
key, value = raw.split("=", 1)
return key, value

View File

@ -30,8 +30,11 @@ TOPICS = re.compile(r'((\w+=("[\w\s]+"|\S+) )+)?\w+=("[\w\s]+"|\S+)$')
def parse_raw(line: str, parse_datetime: bool = True) -> Optional[TrackedLogLine[LineNumberLocation]]:
parsed = LOG_LINE.search(line)
if not parsed:
return None
topics = TOPICS.search(parsed['message'])
if not parsed or not topics:
if not topics:
return None
return TrackedLogLine(
@ -60,7 +63,7 @@ def raw_parser(stream: TextIO, parse_datetime=True) -> LogSource:
def csv_parser(stream: TextIO, parse_datetime=True) -> LogSource:
for line_number, line in enumerate(DictReader(stream), start=1):
try:
line = TrackedLogLine(
parsed_line: TrackedLogLine = TrackedLogLine(
raw=line['message'], # FIXME this is NOT the raw line...
timestamp=line['timestamp'],
message=line['message'],
@ -70,7 +73,7 @@ def csv_parser(stream: TextIO, parse_datetime=True) -> LogSource:
)
if parse_datetime:
line.timestamp = tsparser.parse(cast(str, line.timestamp))
yield line
parsed_line.timestamp = tsparser.parse(cast(str, parsed_line.timestamp))
yield parsed_line
except ValueError:
print(f'Skip unparseable line: {line}', file=sys.stderr)

View File

@ -1,6 +1,5 @@
from dateutil import parser
from logtools.log.log_line import LogLine
from logtools.log.sources.filtered_source import FilteredSource, timestamp_range
from logtools.log.sources.log_parsers import parse_raw
from logtools.log.sources.tests.string_log_source import StringLogSource

View File

@ -17,3 +17,8 @@ def test_raw_parser_should_parse_logline_from_string():
assert line.message == 'Sending want list to peer'
assert line.topics == 'topics="codex blockexcnetwork" tid=1 peer=16U*7mogoM type=WantBlock items=1'
assert line.count == 870781
def test_raw_parser_should_return_none_if_line_is_not_parseable():
line = parse_raw('This is not a log line', parse_datetime=True)
assert line is None