fix bug, add constant column option to csv conversion
This commit is contained in:
parent
0f8c058ff4
commit
aef2dbc870
|
@ -10,8 +10,8 @@ for line in sys.stdin:
|
||||||
|
|
||||||
if uploading:
|
if uploading:
|
||||||
line = line.strip()
|
line = line.strip()
|
||||||
line = line.rsplit(' ', maxsplit=1)
|
parts = line.rsplit(' ', maxsplit=1)
|
||||||
line = ' '.join([line[0], f'upload={upload_no}', line[1]])
|
line = ' '.join([parts[0], f'upload={upload_no}', parts[1]])
|
||||||
print(line)
|
print(line)
|
||||||
|
|
||||||
if 'Uploaded file' in line:
|
if 'Uploaded file' in line:
|
||||||
|
|
|
@ -1,16 +1,22 @@
|
||||||
"""Parses a log stream, possibly pre-filtered and/or merged, into a CSV file. Allows chronicles topics to be
|
"""Parses a log stream, possibly pre-filtered and/or merged, into a CSV file. Allows Chronicles topics to be
|
||||||
extracted into their own columns."""
|
extracted into their own columns."""
|
||||||
import sys
|
import sys
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from csv import DictWriter
|
from csv import DictWriter
|
||||||
|
|
||||||
|
from logtools.cli.utils import kv_pair
|
||||||
from logtools.log.sources.stream_log_source import StreamLogSource
|
from logtools.log.sources.stream_log_source import StreamLogSource
|
||||||
|
|
||||||
|
|
||||||
def to_csv(args):
|
def to_csv(args):
|
||||||
fields = args.extract_fields
|
fields = args.extract_fields
|
||||||
writer = DictWriter(sys.stdout,
|
constant_columns = dict(args.constant_column) if args.constant_column else {}
|
||||||
fieldnames=['timestamp', 'line_number', 'level', 'fields', 'count', 'message'] + fields)
|
writer = DictWriter(
|
||||||
|
sys.stdout,
|
||||||
|
fieldnames=['timestamp', 'line_number',
|
||||||
|
'level', 'fields', 'count', 'message'] + fields + list(constant_columns.keys())
|
||||||
|
)
|
||||||
|
|
||||||
writer.writeheader()
|
writer.writeheader()
|
||||||
for line in StreamLogSource(sys.stdin):
|
for line in StreamLogSource(sys.stdin):
|
||||||
line_fields = {field: line.fields.get(field, 'NA') for field in fields}
|
line_fields = {field: line.fields.get(field, 'NA') for field in fields}
|
||||||
|
@ -22,6 +28,7 @@ def to_csv(args):
|
||||||
'count': line.count,
|
'count': line.count,
|
||||||
'message': line.message,
|
'message': line.message,
|
||||||
**line_fields,
|
**line_fields,
|
||||||
|
**constant_columns,
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@ -29,6 +36,8 @@ def main():
|
||||||
argparse = ArgumentParser()
|
argparse = ArgumentParser()
|
||||||
argparse.add_argument('--extract-fields', nargs='+', default=[],
|
argparse.add_argument('--extract-fields', nargs='+', default=[],
|
||||||
help='Extract chronicles topics into CSV columns')
|
help='Extract chronicles topics into CSV columns')
|
||||||
|
argparse.add_argument('--constant-column', metavar='KEY=VALUE', nargs='+', type=kv_pair,
|
||||||
|
help='Adds a column with key KEY and constant value VALUE to the CSV')
|
||||||
|
|
||||||
to_csv(argparse.parse_args())
|
to_csv(argparse.parse_args())
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,14 @@
|
||||||
|
import argparse
|
||||||
|
from typing import Tuple
|
||||||
|
|
||||||
|
|
||||||
|
def kv_pair(raw: str) -> Tuple[str, str]:
|
||||||
|
"""
|
||||||
|
Parse a string of the form 'key=value' and return a tuple (key, value).
|
||||||
|
"""
|
||||||
|
if '=' not in raw:
|
||||||
|
msg = f'{raw} is not a valid key=value pair'
|
||||||
|
raise argparse.ArgumentTypeError(msg)
|
||||||
|
|
||||||
|
key, value = raw.split("=", 1)
|
||||||
|
return key, value
|
|
@ -30,8 +30,11 @@ TOPICS = re.compile(r'((\w+=("[\w\s]+"|\S+) )+)?\w+=("[\w\s]+"|\S+)$')
|
||||||
|
|
||||||
def parse_raw(line: str, parse_datetime: bool = True) -> Optional[TrackedLogLine[LineNumberLocation]]:
|
def parse_raw(line: str, parse_datetime: bool = True) -> Optional[TrackedLogLine[LineNumberLocation]]:
|
||||||
parsed = LOG_LINE.search(line)
|
parsed = LOG_LINE.search(line)
|
||||||
|
if not parsed:
|
||||||
|
return None
|
||||||
|
|
||||||
topics = TOPICS.search(parsed['message'])
|
topics = TOPICS.search(parsed['message'])
|
||||||
if not parsed or not topics:
|
if not topics:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return TrackedLogLine(
|
return TrackedLogLine(
|
||||||
|
@ -60,7 +63,7 @@ def raw_parser(stream: TextIO, parse_datetime=True) -> LogSource:
|
||||||
def csv_parser(stream: TextIO, parse_datetime=True) -> LogSource:
|
def csv_parser(stream: TextIO, parse_datetime=True) -> LogSource:
|
||||||
for line_number, line in enumerate(DictReader(stream), start=1):
|
for line_number, line in enumerate(DictReader(stream), start=1):
|
||||||
try:
|
try:
|
||||||
line = TrackedLogLine(
|
parsed_line: TrackedLogLine = TrackedLogLine(
|
||||||
raw=line['message'], # FIXME this is NOT the raw line...
|
raw=line['message'], # FIXME this is NOT the raw line...
|
||||||
timestamp=line['timestamp'],
|
timestamp=line['timestamp'],
|
||||||
message=line['message'],
|
message=line['message'],
|
||||||
|
@ -70,7 +73,7 @@ def csv_parser(stream: TextIO, parse_datetime=True) -> LogSource:
|
||||||
)
|
)
|
||||||
|
|
||||||
if parse_datetime:
|
if parse_datetime:
|
||||||
line.timestamp = tsparser.parse(cast(str, line.timestamp))
|
parsed_line.timestamp = tsparser.parse(cast(str, parsed_line.timestamp))
|
||||||
yield line
|
yield parsed_line
|
||||||
except ValueError:
|
except ValueError:
|
||||||
print(f'Skip unparseable line: {line}', file=sys.stderr)
|
print(f'Skip unparseable line: {line}', file=sys.stderr)
|
||||||
|
|
|
@ -1,6 +1,5 @@
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
|
|
||||||
from logtools.log.log_line import LogLine
|
|
||||||
from logtools.log.sources.filtered_source import FilteredSource, timestamp_range
|
from logtools.log.sources.filtered_source import FilteredSource, timestamp_range
|
||||||
from logtools.log.sources.log_parsers import parse_raw
|
from logtools.log.sources.log_parsers import parse_raw
|
||||||
from logtools.log.sources.tests.string_log_source import StringLogSource
|
from logtools.log.sources.tests.string_log_source import StringLogSource
|
||||||
|
|
|
@ -17,3 +17,8 @@ def test_raw_parser_should_parse_logline_from_string():
|
||||||
assert line.message == 'Sending want list to peer'
|
assert line.message == 'Sending want list to peer'
|
||||||
assert line.topics == 'topics="codex blockexcnetwork" tid=1 peer=16U*7mogoM type=WantBlock items=1'
|
assert line.topics == 'topics="codex blockexcnetwork" tid=1 peer=16U*7mogoM type=WantBlock items=1'
|
||||||
assert line.count == 870781
|
assert line.count == 870781
|
||||||
|
|
||||||
|
|
||||||
|
def test_raw_parser_should_return_none_if_line_is_not_parseable():
|
||||||
|
line = parse_raw('This is not a log line', parse_datetime=True)
|
||||||
|
assert line is None
|
Loading…
Reference in New Issue