import os from datasets import load_from_disk, concatenate_datasets def load_and_merge_datasets(directories): datasets = [] for directory in directories: dataset = load_from_disk(directory) datasets.append(dataset) merged_dataset = concatenate_datasets(datasets) return merged_dataset data_directories = ["/bask/projects/p/phwq4930-gbm/Zeyu/WSI_Dataset/WVLMdata_part0", "/bask/projects/p/phwq4930-gbm/Zeyu/WSI_Dataset/WVLMdata_part1", "/bask/projects/p/phwq4930-gbm/Zeyu/WSI_Dataset/WVLMdata_part2", "/bask/projects/p/phwq4930-gbm/Zeyu/WSI_Dataset/WVLMdata_part3"] merged_dataset = load_and_merge_datasets(data_directories) merged_dataset.push_to_hub("CNX-PathLLM/TCGA-WSI-Text")