From 62a2a3c3f766bb4e467fa3522457e9a1f2cb01d2 Mon Sep 17 00:00:00 2001 From: Roman Date: Fri, 27 Jun 2025 16:31:30 +0800 Subject: [PATCH] fix: disable parsed log output to file --- src/node/nomos_node.py | 2 +- src/tfidf/tfidf.py | 20 ++++++++++++-------- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/src/node/nomos_node.py b/src/node/nomos_node.py index 663f09b..e2646a9 100644 --- a/src/node/nomos_node.py +++ b/src/node/nomos_node.py @@ -152,7 +152,7 @@ class NomosNode: logger.debug(f"Parsing log for node {self.name()}") log_tfidf = LogTfidf() - log_tfidf.parse_log(self._log_path, f"{self._log_path}.parsed", keywords, True) + log_tfidf.parse_log(self._log_path, keywords, None) def extract_config(self, target_file): # Copy the config file from first node diff --git a/src/tfidf/tfidf.py b/src/tfidf/tfidf.py index 71b4ac2..cf41bbe 100644 --- a/src/tfidf/tfidf.py +++ b/src/tfidf/tfidf.py @@ -15,6 +15,16 @@ def normalize_log_message(text): return " ".join(text.split()) +def write_output(df, output_file=None): + lines = df["d1"].astype(str) + "\n" + + if output_file: + with open(output_file, "w") as out_file: + out_file.writelines(lines) + + print("".join(lines), end="") + + class LogTfidf: def __init__(self): self.stemmer = PorterStemmer() @@ -33,7 +43,7 @@ class LogTfidf: tokens = word_tokenize(text.lower()) return self.get_stemmed_tokens(tokens) - def parse_log(self, input_file, output_file, keywords, print_to_stdout=True): + def parse_log(self, input_file, keywords, output_file=None): vectorizer = ext.CountVectorizer(tokenizer=self.get_tokens, stop_words=self.stop_words, token_pattern=None) with open(input_file, "r") as file: lines = [line.rstrip() for line in file] @@ -63,10 +73,4 @@ class LogTfidf: df = df.drop_duplicates(subset="d1_normalized", keep="first") df = df.drop(columns="d1_normalized") - with open(output_file, "w") as out_file: - for index, row in df.iterrows(): - line = "{0}\n" - line = line.format(row["d1"]) - out_file.write(line) - if print_to_stdout: - print(line) + write_output(df, output_file)