2023-08-14 23:36:22 +00:00
|
|
|
import pandas as pd
|
|
|
|
import json
|
|
|
|
import argparse
|
2023-08-15 10:34:11 +00:00
|
|
|
import os
|
2023-08-14 23:36:22 +00:00
|
|
|
|
|
|
|
|
|
|
|
def data_to_csv(data_path, output_path):
|
|
|
|
header_written = False
|
|
|
|
|
|
|
|
with open(data_path, 'r') as f:
|
2023-08-15 10:34:11 +00:00
|
|
|
step = 0
|
2023-08-14 23:36:22 +00:00
|
|
|
for line in f:
|
|
|
|
try:
|
|
|
|
clean_line = line.rstrip(",\n")
|
|
|
|
data = json.loads(clean_line)
|
|
|
|
normalized = pd.json_normalize(data)
|
2023-08-15 10:34:11 +00:00
|
|
|
normalized['step'] = step
|
2023-08-14 23:36:22 +00:00
|
|
|
normalized.to_csv(output_path, mode='a', header=not header_written, index=False)
|
|
|
|
|
|
|
|
# Set the header_written flag to True after the first write
|
|
|
|
header_written = True
|
|
|
|
except json.JSONDecodeError:
|
|
|
|
print(f"Failed to parse line: {line}")
|
2023-08-15 10:34:11 +00:00
|
|
|
step += 1
|
|
|
|
|
|
|
|
def all_data_to_csv(all_data_path):
|
|
|
|
for filename in os.listdir(all_data_path):
|
|
|
|
config_name = os.path.splitext(filename)[0]
|
|
|
|
data_to_csv(f"{all_data_path}/{config_name}.json", f"{all_data_path}/{config_name}.csv")
|
2023-08-14 23:36:22 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = argparse.ArgumentParser(description="Normalize JSON lines in a file to a Pandas DataFrame and append to CSV.")
|
|
|
|
parser.add_argument("data_path", type=str, help="Path to the file containing JSON lines.")
|
|
|
|
|
|
|
|
args = parser.parse_args()
|
2023-08-15 10:34:11 +00:00
|
|
|
all_data_to_csv(args.data_path)
|