Spaces:
Running
Running
| import pandas as pd | |
| import json | |
| import argparse | |
| from pathlib import Path | |
| from typing import List | |
| ##TODO: later add logs | |
| def create_json(csv_file_loc:str, output_folder:str, output_file_name:str, | |
| base_data_folder:str, data_attribute:str="data_files") -> List: | |
| """ | |
| Creates a json file containing the causal query and its associated metadata from | |
| the csv file | |
| Args: | |
| csv_file_loc: path to the csv file | |
| output_folder: path to the folder where the json file is saved | |
| output_file_name: name of the output json file | |
| base_data_folder: path to the folder where the data is saved | |
| data_attribute: name of the column in the csv file containing the data file name | |
| """ | |
| try: | |
| df = pd.read_csv(csv_file_loc) | |
| except FileNotFoundError: | |
| print(f"File not found:{csv_file_loc}. Make sure the file path is correct.") | |
| json_df = df.to_dict(orient="records") | |
| print("Checking if referenced csv files are available") | |
| all_exists = True | |
| for data in json_df: | |
| #print(base_data_folder, data[data_attribute]) | |
| full_path = Path(base_data_folder) / data[data_attribute] | |
| if not full_path.exists(): | |
| print(f"File not found: {full_path}. Re-check the name of the data file.") | |
| all_exists = False & all_exists | |
| else: | |
| data[data_attribute] = str(full_path) | |
| if not all_exists: | |
| print("Some data files are missing or incorrectly name") | |
| else: | |
| print("All data files are available. Good to go.") | |
| if ".json" not in output_file_name: | |
| output_file_name = output_file_name + ".json" | |
| output_path = Path(output_folder) | |
| output_path.mkdir(parents=True, exist_ok=True) | |
| output_file_path = output_path / output_file_name | |
| with open(output_file_path, "w") as f: | |
| json.dump(json_df, f, indent=4) | |
| print(f"Json file created at {output_file_path}") | |
| f.close() | |
| return json_df |