Update data_to_csv to convert provided dir to csv

This commit is contained in:
Gusto 2023-08-15 13:34:11 +03:00
parent c03bc400eb
commit 7b188012fa
1 changed files with 10 additions and 2 deletions

View File

@ -1,28 +1,36 @@
import pandas as pd
import json
import argparse
import os
def data_to_csv(data_path, output_path):
header_written = False
with open(data_path, 'r') as f:
step = 0
for line in f:
try:
clean_line = line.rstrip(",\n")
data = json.loads(clean_line)
normalized = pd.json_normalize(data)
normalized['step'] = step
normalized.to_csv(output_path, mode='a', header=not header_written, index=False)
# Set the header_written flag to True after the first write
header_written = True
except json.JSONDecodeError:
print(f"Failed to parse line: {line}")
step += 1
def all_data_to_csv(all_data_path):
for filename in os.listdir(all_data_path):
config_name = os.path.splitext(filename)[0]
data_to_csv(f"{all_data_path}/{config_name}.json", f"{all_data_path}/{config_name}.csv")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Normalize JSON lines in a file to a Pandas DataFrame and append to CSV.")
parser.add_argument("data_path", type=str, help="Path to the file containing JSON lines.")
parser.add_argument("output_path", type=str, help="Path to the output CSV file.")
args = parser.parse_args()
data_to_csv(args.data_path, args.output_path)
all_data_to_csv(args.data_path)