From 71a481c27fa27dd72a90dc7ab4e7875ed6a651c5 Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 27 Jun 2025 09:52:03 +0800 Subject: [PATCH] fix: skip DEBUG and TRACE lines without matching keywords --- src/tfidf/tfidf.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/tfidf/tfidf.py b/src/tfidf/tfidf.py index 829c8db..71b4ac2 100644 --- a/src/tfidf/tfidf.py +++ b/src/tfidf/tfidf.py @@ -56,7 +56,7 @@ class LogTfidf: df.columns = ["d1", "d2"] # Simplified column naming for clarity df = df.sort_values(by="d2", ascending=False) pattern = "|".join(keywords) - df = df[~((df["d1"].str.contains("INFO")) & (~df["d1"].str.contains(pattern)))] + df = df[~((df["d1"].str.contains("INFO|DEBUG|TRACE")) & (~df["d1"].str.contains(pattern)))] # Normalize and deduplicate df["d1_normalized"] = df["d1"].apply(normalize_log_message)