Spaces:
Build error
Build error
| """ | |
| Module which updates any of the issues to reflect changes in the issue state e.g. new comments | |
| The module can be run from the command line using the following arguments: | |
| --input_filename: The name of the input file containing the issues | |
| --output_filename: The name of the output file to save the updated issues | |
| --github_api_version: The version of the GitHub API to use | |
| --owner: The owner of the repo | |
| --repo: The name of the repo | |
| --token: The GitHub token to use | |
| --n_pages: The number of pages to fetch. Useful for testing | |
| """ | |
| import argparse | |
| import json | |
| import logging | |
| import os | |
| import numpy as np | |
| import requests | |
| from .defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION, ISSUE_JSON_FILE | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| JSON_FILE = "issues.json" | |
| def update_issues( | |
| input_filename=ISSUE_JSON_FILE, | |
| output_filename=ISSUE_JSON_FILE, | |
| github_api_version=GITHUB_API_VERSION, | |
| owner=OWNER, | |
| repo=REPO, | |
| token=TOKEN, | |
| n_pages=-1, | |
| ): | |
| """ | |
| Function to get the issues from the transformers repo and save them to a json file | |
| """ | |
| with open("issues_dict.json", "r") as f: | |
| issues = json.load(f) | |
| # Get most recent updated at information | |
| updated_at = [issue["updated_at"] for issue in issues.values()] | |
| most_recent = max(updated_at) | |
| # If file exists and we want to overwrite it, delete it | |
| if not os.path.exists(output_filename): | |
| raise ValueError(f"File {output_filename} does not exist") | |
| # Define the URL and headers | |
| url = f"https://api.github.com/repos/{owner}/{repo}/issues" | |
| headers = { | |
| "Accept": "application/vnd.github+json", | |
| "Authorization": f"{token}", | |
| "X-GitHub-Api-Version": f"{github_api_version}", | |
| "User-Agent": "amyeroberts", | |
| } | |
| per_page = 100 | |
| page = 1 | |
| query_params = { | |
| "state": "all", | |
| "since": most_recent, | |
| "sort": "created", | |
| "direction": "asc", | |
| "page": page, | |
| } | |
| new_lines = [] | |
| page_limit = (n_pages + page) if n_pages > 0 else np.inf | |
| while True: | |
| if page >= page_limit: | |
| break | |
| # Send the GET request | |
| response = requests.get(url, headers=headers, params=query_params) | |
| if not response.status_code == 200: | |
| raise ValueError( | |
| f"Request failed with status code {response.status_code} and message {response.text}" | |
| ) | |
| json_response = response.json() | |
| logger.info(f"Page: {page}, number of issues: {len(json_response)}") | |
| # If we get an empty response, we've reached the end of the issues | |
| if len(json_response) == 0: | |
| break | |
| new_lines.extend(json_response) | |
| # If we get less than the number of issues per page, we've reached the end of the issues | |
| if len(json_response) < per_page: | |
| break | |
| page += 1 | |
| query_params["page"] = page | |
| issue_lines_map = {issue["number"]: issue for issue in new_lines} | |
| updated_issues = [] | |
| # Update any issues that already exist | |
| with open(input_filename, "r") as f: | |
| with open("tmp_" + output_filename, "a") as g: | |
| for line in f: | |
| issue = json.loads(line) | |
| number = issue["number"] | |
| if number in issue_lines_map: | |
| g.write(json.dumps(issue_lines_map[number])) | |
| g.write("\n") | |
| updated_issues.append(number) | |
| else: | |
| g.write(line) | |
| # Append any new issues | |
| new_issues = [issue for issue in new_lines if issue["number"] not in updated_issues] | |
| with open("tmp_" + output_filename, "a") as g: | |
| for issue in new_issues: | |
| g.write(json.dumps(issue)) | |
| g.write("\n") | |
| # Overwrite the old file with the new file | |
| os.rename("tmp_" + output_filename, output_filename) | |
| # Save a record of the updated issues for the embedding update | |
| with open("updated_issues.json", "w") as f: | |
| json.dump(issue_lines_map, f, indent=4, sort_keys=True) | |
| return output_filename | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--input_filename", type=str, default=JSON_FILE) | |
| parser.add_argument("--output_filename", type=str, default=JSON_FILE) | |
| parser.add_argument("--github_api_version", type=str, default=GITHUB_API_VERSION) | |
| parser.add_argument("--owner", type=str, default=OWNER) | |
| parser.add_argument("--repo", type=str, default=REPO) | |
| parser.add_argument("--token", type=str, default=TOKEN) | |
| parser.add_argument("--n_pages", type=int, default=-1) | |
| args = parser.parse_args() | |
| update_issues(**vars(args)) | |