From 950484a674899699de1f1313b902c12f1fd17490 Mon Sep 17 00:00:00 2001 From: Jazz Turner-Baggs <473256+jazzz@users.noreply.github.com> Date: Tue, 2 Jun 2026 21:08:53 -0700 Subject: [PATCH] Add logging helper scripts --- align_logs.py | 58 +++++++++++++++++++++++++++++++++++++++++++++++ normalize_logs.py | 34 +++++++++++++++++++++++++++ process_logs.sh | 19 ++++++++++++++++ 3 files changed, 111 insertions(+) create mode 100644 align_logs.py create mode 100755 normalize_logs.py create mode 100755 process_logs.sh diff --git a/align_logs.py b/align_logs.py new file mode 100644 index 0000000..5e0ee94 --- /dev/null +++ b/align_logs.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python3 +""" +Usage: python3 align_logs.py + +Translates identifiers in target_file to match reference_file, +based on order of first appearance. Output is the translated target_file. +Mapping is printed to stderr. +""" +import re +import sys + +PATTERNS = [ + ('app', re.compile(r'(?<=app=")([^"]+)(?=")')), + ('proposal_id', re.compile(r'(?<=proposal_id=)(\d+)')), + ('conversation',re.compile(r'(?<=conversation=")([^"]+)(?=")')), + ('convo', re.compile(r'(?<=convo=")([^"]+)(?=")')), +] + +def extract_ordered(lines, pattern): + seen, seen_set = [], set() + for line in lines: + for m in pattern.finditer(line): + v = m.group(1) + if v not in seen_set: + seen.append(v) + seen_set.add(v) + return seen + +def build_mapping(ref_lines, tgt_lines): + mapping = {} + for name, pat in PATTERNS: + ref_vals = extract_ordered(ref_lines, pat) + tgt_vals = extract_ordered(tgt_lines, pat) + for tgt, ref in zip(tgt_vals, ref_vals): + if tgt != ref: + mapping[tgt] = ref + print(f" [{name}] {tgt} -> {ref}", file=sys.stderr) + return mapping + +def apply_mapping(lines, mapping): + if not mapping: + return lines + keys = sorted(mapping, key=len, reverse=True) + pat = re.compile('|'.join(re.escape(k) for k in keys)) + return [pat.sub(lambda m: mapping[m.group(0)], line) for line in lines] + +if len(sys.argv) != 3: + print(f"Usage: {sys.argv[0]} ", file=sys.stderr) + sys.exit(1) + +with open(sys.argv[1]) as f: + ref_lines = f.readlines() +with open(sys.argv[2]) as f: + tgt_lines = f.readlines() + +mapping = build_mapping(ref_lines, tgt_lines) +for line in apply_mapping(tgt_lines, mapping): + print(line, end="") diff --git a/normalize_logs.py b/normalize_logs.py new file mode 100755 index 0000000..de1e363 --- /dev/null +++ b/normalize_logs.py @@ -0,0 +1,34 @@ +#!/usr/bin/env python3 +import re +import sys +from datetime import datetime + +TIMESTAMP_RE = re.compile(r'^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d+Z)') + +def parse_ts(s): + return datetime.strptime(s, "%Y-%m-%dT%H:%M:%S.%fZ") + +def fmt_offset(ms): + if ms < 1000: + return f"+{ms:7.0f}ms" + else: + return f"+{ms/1000:7.3f}s " + +lines = sys.stdin.readlines() +base = None + +for line in lines: + m = TIMESTAMP_RE.match(line) + if not m: + print(line, end="") + continue + + ts = parse_ts(m.group(1)) + if base is None: + base = ts + + delta_us = (ts - base).total_seconds() * 1_000_000 + rounded_ms = round(delta_us / 5000) * 5 + + rest = line[m.end():] + print(f"{fmt_offset(rounded_ms)} {rest}", end="") diff --git a/process_logs.sh b/process_logs.sh new file mode 100755 index 0000000..cca6bec --- /dev/null +++ b/process_logs.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -e + +if [ "$#" -ne 2 ]; then + echo "Usage: $0 " >&2 + exit 1 +fi + +FILE1="$1" +FILE2="$2" +SCRIPTS="$(dirname "$0")" + +NORM1=$(python3 "$SCRIPTS/normalize_logs.py" < "$FILE1") +NORM2=$(python3 "$SCRIPTS/normalize_logs.py" < "$FILE2") + +ALIGNED2=$(python3 "$SCRIPTS/align_logs.py" <(echo "$NORM1") <(echo "$NORM2") 2>/dev/null) + +echo "$NORM1" > "$FILE1" +echo "$ALIGNED2" > "$FILE2"