Skip to content

Commit 05072fe

Browse files
committed
Skip new repositories that were not successfully mined
1 parent 914488f commit 05072fe

1 file changed

Lines changed: 39 additions & 34 deletions

File tree

osdash/preprocess/wrangle_data.py

Lines changed: 39 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -31,40 +31,45 @@ def get_metrics(mined_data: list) -> dict:
3131
#
3232

3333
for repo in mined_data:
34-
# Process `Repository`
35-
repositories_list.append(repo["Repository"])
36-
# Process `Branches`
37-
for branch in repo["Branches"]:
38-
# Start with columns for repository info
39-
branch_row: dict = {
40-
"repo_name": repo["Repository"]["name"],
41-
# Include `repo_url` so that there's a more unique identifier:
42-
"repo_url": repo["Repository"]["repo_url"],
43-
"branch": branch
44-
}
45-
branches_list.append(branch_row)
46-
# Process `Commits`
47-
for commit in repo["Commits"]:
48-
# Start with columns for repository info
49-
commit_row: dict = {
50-
"repo_name": repo["Repository"]["name"],
51-
# Include `repo_url` so that there's a more unique identifier:
52-
"repo_url": repo["Repository"]["repo_url"]
53-
}
54-
# Then add other columns from the commit
55-
commit_row.update(commit)
56-
commits_list.append(commit_row)
57-
# Process `Tickets`
58-
for ticket in repo["Tickets"]:
59-
# Start with columns for repository info
60-
ticket_row: dict = {
61-
"repo_name": repo["Repository"]["name"],
62-
# Include `repo_url` so that there's a more unique identifier:
63-
"repo_url": repo["Repository"]["repo_url"]
64-
}
65-
# Then add other columns from the ticket
66-
ticket_row.update(ticket)
67-
tickets_list.append(ticket_row)
34+
# First, check if last mined timestamp is empty. If so, then `osmine`
35+
# did not complete successfuly for this repository so skip for now.
36+
if repo["Repository"]["last_mined"] == "":
37+
pass
38+
else:
39+
# Process `Repository`
40+
repositories_list.append(repo["Repository"])
41+
# Process `Branches`
42+
for branch in repo["Branches"]:
43+
# Start with columns for repository info
44+
branch_row: dict = {
45+
"repo_name": repo["Repository"]["name"],
46+
# Include `repo_url` so that there's a more unique identifier:
47+
"repo_url": repo["Repository"]["repo_url"],
48+
"branch": branch
49+
}
50+
branches_list.append(branch_row)
51+
# Process `Commits`
52+
for commit in repo["Commits"]:
53+
# Start with columns for repository info
54+
commit_row: dict = {
55+
"repo_name": repo["Repository"]["name"],
56+
# Include `repo_url` so that there's a more unique identifier:
57+
"repo_url": repo["Repository"]["repo_url"]
58+
}
59+
# Then add other columns from the commit
60+
commit_row.update(commit)
61+
commits_list.append(commit_row)
62+
# Process `Tickets`
63+
for ticket in repo["Tickets"]:
64+
# Start with columns for repository info
65+
ticket_row: dict = {
66+
"repo_name": repo["Repository"]["name"],
67+
# Include `repo_url` so that there's a more unique identifier:
68+
"repo_url": repo["Repository"]["repo_url"]
69+
}
70+
# Then add other columns from the ticket
71+
ticket_row.update(ticket)
72+
tickets_list.append(ticket_row)
6873

6974
#
7075
# Convert lists to Pandas dataframes and process

0 commit comments

Comments
 (0)