Spaces:
Build error
Build error
Amy Roberts
commited on
Commit
Β·
7d5704e
1
Parent(s):
12ae336
Move to utils
Browse files- app.py +9 -10
- retrieval.py +0 -80
- utils/__init__.py +0 -0
- build_embeddings.py β utils/build_embeddings.py +0 -0
- build_issue_dict.py β utils/build_issue_dict.py +0 -0
- defaults.py β utils/defaults.py +0 -0
- fetch.py β utils/fetch.py +1 -1
- find_similar_issues.py β utils/find_similar_issues.py +0 -0
- update_embeddings.py β utils/update_embeddings.py +0 -0
- update_stored_issues.py β utils/update_stored_issues.py +1 -1
app.py
CHANGED
|
@@ -1,17 +1,16 @@
|
|
| 1 |
import datetime
|
| 2 |
-
import gradio as gr
|
| 3 |
import os
|
| 4 |
-
from find_similar_issues import get_similar_issues
|
| 5 |
-
import requests
|
| 6 |
-
|
| 7 |
-
from defaults import OWNER, REPO
|
| 8 |
-
|
| 9 |
-
import build_issue_dict
|
| 10 |
-
import build_embeddings
|
| 11 |
import shutil
|
| 12 |
-
from fetch import get_issues
|
| 13 |
-
from update_stored_issues import update_issues
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
|
| 16 |
|
| 17 |
def get_query_issue_information(issue_no, token):
|
|
|
|
| 1 |
import datetime
|
|
|
|
| 2 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
import shutil
|
|
|
|
|
|
|
| 4 |
|
| 5 |
+
import gradio as gr
|
| 6 |
+
import requests
|
| 7 |
+
|
| 8 |
+
from utils import build_issue_dict
|
| 9 |
+
from utils import build_embeddings
|
| 10 |
+
from utils.defaults import OWNER, REPO
|
| 11 |
+
from utils.fetch import get_issues
|
| 12 |
+
from utils.find_similar_issues import get_similar_issues
|
| 13 |
+
from utils.update_stored_issues import update_issues
|
| 14 |
|
| 15 |
|
| 16 |
def get_query_issue_information(issue_no, token):
|
retrieval.py
DELETED
|
@@ -1,80 +0,0 @@
|
|
| 1 |
-
"""
|
| 2 |
-
Module which contains functionality to retrieve the most similar issues for a given query
|
| 3 |
-
"""
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
import argparse
|
| 7 |
-
import json
|
| 8 |
-
|
| 9 |
-
import numpy as np
|
| 10 |
-
from sentence_transformers import SentenceTransformer
|
| 11 |
-
|
| 12 |
-
def cosine_similarity(a, b):
|
| 13 |
-
if a.ndim == 1:
|
| 14 |
-
a = a.reshape(1, -1)
|
| 15 |
-
|
| 16 |
-
if b.ndim == 1:
|
| 17 |
-
b = b.reshape(1, -1)
|
| 18 |
-
|
| 19 |
-
return np.dot(a, b.T) / (np.linalg.norm(a, axis=1) * np.linalg.norm(b, axis=1))
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
def retrieve_issue_rankings(
|
| 23 |
-
query: str,
|
| 24 |
-
model_id: str,
|
| 25 |
-
input_embedding_filename: str,
|
| 26 |
-
):
|
| 27 |
-
"""
|
| 28 |
-
Given a query returns the list of issues sorted by similarity to the query
|
| 29 |
-
according to their embedding index
|
| 30 |
-
"""
|
| 31 |
-
model = SentenceTransformer(model_id)
|
| 32 |
-
|
| 33 |
-
embeddings = np.load(input_embedding_filename)
|
| 34 |
-
|
| 35 |
-
query_embedding = model.encode(query)
|
| 36 |
-
|
| 37 |
-
# Calculate the cosine similarity between the query and all the issues
|
| 38 |
-
cosine_similarities = cosine_similarity(query_embedding, embeddings)
|
| 39 |
-
|
| 40 |
-
# Get the index of the most similar issue
|
| 41 |
-
most_similar_indices = np.argsort(cosine_similarities)
|
| 42 |
-
most_similar_indices = most_similar_indices[0][::-1]
|
| 43 |
-
return most_similar_indices
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
def print_issue(issues, issue_id):
|
| 47 |
-
# Get the issue id of the most similar issue
|
| 48 |
-
issue_info = issues[issue_id]
|
| 49 |
-
|
| 50 |
-
print(f"#{issue_id}", issue_info["title"])
|
| 51 |
-
print(issue_info["body"])
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
if __name__ == "__main__":
|
| 55 |
-
parser = argparse.ArgumentParser()
|
| 56 |
-
parser.add_argument("query", type=str)
|
| 57 |
-
parser.add_argument("--model_id", type=str, default="all-mpnet-base-v2")
|
| 58 |
-
parser.add_argument("--input_embedding_filename", type=str, default="issue_embeddings.npy")
|
| 59 |
-
parser.add_argument("--input_index_filename", type=str, default="embedding_index_to_issue.json")
|
| 60 |
-
|
| 61 |
-
args = parser.parse_args()
|
| 62 |
-
|
| 63 |
-
issue_rankings = retrieve_issue_rankings(
|
| 64 |
-
query=args.query,
|
| 65 |
-
model_id=args.model_id,
|
| 66 |
-
input_embedding_filename=args.input_embedding_filename,
|
| 67 |
-
)
|
| 68 |
-
|
| 69 |
-
with open("issues_dict.json", "r") as f:
|
| 70 |
-
issues = json.load(f)
|
| 71 |
-
|
| 72 |
-
with open(args.input_index_filename, "r") as f:
|
| 73 |
-
embedding_index_to_issue = json.load(f)
|
| 74 |
-
|
| 75 |
-
issue_ids = [embedding_index_to_issue[str(i)] for i in issue_rankings]
|
| 76 |
-
|
| 77 |
-
for issue_id in issue_ids[:3]:
|
| 78 |
-
print(issue_id)
|
| 79 |
-
print_issue(issues, issue_id)
|
| 80 |
-
print("\n\n\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
utils/__init__.py
ADDED
|
File without changes
|
build_embeddings.py β utils/build_embeddings.py
RENAMED
|
File without changes
|
build_issue_dict.py β utils/build_issue_dict.py
RENAMED
|
File without changes
|
defaults.py β utils/defaults.py
RENAMED
|
File without changes
|
fetch.py β utils/fetch.py
RENAMED
|
@@ -22,7 +22,7 @@ import os
|
|
| 22 |
import requests
|
| 23 |
import numpy as np
|
| 24 |
|
| 25 |
-
from defaults import OWNER, REPO, GITHUB_API_VERSION, TOKEN, ISSUE_JSON_FILE
|
| 26 |
|
| 27 |
logging.basicConfig(level=logging.INFO)
|
| 28 |
logger = logging.getLogger(__name__)
|
|
|
|
| 22 |
import requests
|
| 23 |
import numpy as np
|
| 24 |
|
| 25 |
+
from .defaults import OWNER, REPO, GITHUB_API_VERSION, TOKEN, ISSUE_JSON_FILE
|
| 26 |
|
| 27 |
logging.basicConfig(level=logging.INFO)
|
| 28 |
logger = logging.getLogger(__name__)
|
find_similar_issues.py β utils/find_similar_issues.py
RENAMED
|
File without changes
|
update_embeddings.py β utils/update_embeddings.py
RENAMED
|
File without changes
|
update_stored_issues.py β utils/update_stored_issues.py
RENAMED
|
@@ -18,7 +18,7 @@ import os
|
|
| 18 |
import numpy as np
|
| 19 |
import requests
|
| 20 |
|
| 21 |
-
from defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION, ISSUE_JSON_FILE
|
| 22 |
|
| 23 |
logging.basicConfig(level=logging.INFO)
|
| 24 |
logger = logging.getLogger(__name__)
|
|
|
|
| 18 |
import numpy as np
|
| 19 |
import requests
|
| 20 |
|
| 21 |
+
from .defaults import TOKEN, OWNER, REPO, GITHUB_API_VERSION, ISSUE_JSON_FILE
|
| 22 |
|
| 23 |
logging.basicConfig(level=logging.INFO)
|
| 24 |
logger = logging.getLogger(__name__)
|