reorder regex so escaped strings match first

This commit is contained in:
gmega 2024-02-05 19:12:16 -03:00
parent aa1bd2e161
commit 4e88571216
No known key found for this signature in database
GPG Key ID: FFD8DAF00660270F
3 changed files with 21 additions and 3 deletions

View File

@ -14,9 +14,9 @@ _LOG_LINE = re.compile(
r'count=(?P<count>\d+)$' r'count=(?P<count>\d+)$'
) )
_TOPICS = re.compile(r'((\w+=("[^"]+"|\S+) )+)?\w+=("([^"\\]|\\")+"|\S+)$') _TOPICS = re.compile(r'((\w+=("[^"]+"|\S+) )+)?\w+=("(\\"|[^"])+"|\S+)$')
_TOPICS_KV = re.compile(r'(?P<key>\w+)=(?P<value>"(?:[^"\\]|\\")+"|\S+)') _TOPICS_KV = re.compile(r'(?P<key>\w+)=(?P<value>"(?:\\"|[^"])+"|\S+)')
class LogLevel(Enum): class LogLevel(Enum):

View File

@ -86,3 +86,21 @@ def test_should_parse_topics_with_escaped_quotes_in_values():
'topics': '"codex pendingblocks"', 'topics': '"codex pendingblocks"',
'address': '"cid: \\"zDx*QP4zx9\\""', 'address': '"cid: \\"zDx*QP4zx9\\""',
} }
def test_should_parse_topics_with_escaped_quotes_and_backlashes_in_value():
source = ChroniclesRawSource(
StringLogSource(
lines='TRC 2024-02-02 20:37:18.316+00:00 Starting codex node topics="codex node" '
'config="some \\"quoted\\" string with \'more\' escape chars" count=7'
)
).__iter__()
line = next(source)
assert line.message == "Starting codex node"
assert line.count == 7
assert line.fields == {
'topics': '"codex node"',
'config': '"some \\"quoted\\" string with \'more\' escape chars"',
}

View File

@ -1,6 +1,6 @@
[tool.poetry] [tool.poetry]
name = "logtools" name = "logtools"
version = "1.1.2" version = "1.1.3"
description = "" description = ""
authors = ["gmega <giuliano@status.im>"] authors = ["gmega <giuliano@status.im>"]
readme = "README.md" readme = "README.md"