Skip to content

Commit 6be10cf

Browse files
Added analysis.py to the main flow of the program
1 parent ac7a629 commit 6be10cf

4 files changed

Lines changed: 41 additions & 37 deletions

File tree

src/analysis/analysis.py

Lines changed: 35 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
1-
from src.infrastructure.file_utils import read_csv
1+
import os
22
import matplotlib.pyplot as plt
33
import numpy as np
4+
from src.infrastructure import file_utils, repository_utils
45

5-
def create_size_impact_plot():
6+
def _save_plot(plot: plt, name: str):
7+
file_path = os.path.join(file_utils.CHARTS_PATH, f"{name}.jpg")
8+
plot.savefig(file_path)
9+
10+
def _create_size_impact_plot():
611
# 3. How does the size of a commit impact the results?
712

813
# Read data from the repo_data csv
9-
repo_data = read_csv("repo_data")
14+
repo_data = file_utils.read_csv("repo_data")
1015

1116
# Initialize arrays to store plot points
1217
x = []
@@ -19,6 +24,7 @@ def create_size_impact_plot():
1924

2025
# Get the total number of tests for the repo
2126
total_test_count = int(repo['Test Before']) + int(repo['Test During']) + int(repo['Test After'])
27+
total_test_count = 1 if total_test_count == 0 else total_test_count
2228

2329
# Append the percentage of TDD for the repo to Y
2430
y.append((int(repo['Test Before'])/total_test_count)*100)
@@ -35,18 +41,16 @@ def create_size_impact_plot():
3541
plt.plot(x, a * x + b, color="red", alpha=0.5)
3642

3743
# Set title and axes labels
38-
plt.xlabel("Commit Count")
44+
plt.xlabel("Repo Size (No. of files)")
3945
plt.ylabel("Percentage of TDD")
40-
plt.title("Scatter plot showing how the size of commits impacts results")
41-
42-
# Show the plot
43-
plt.show()
46+
plt.title("Repo size and TDD percentage")
47+
_save_plot(plt, "Size Impact")
4448

45-
def create_box_plot():
49+
def _create_box_plot():
4650
# 2. How often is a test class (file) created (a) before, (b) after, or (c) in the same commit as a tested class (file)?
4751

4852
# Read data from the repo_data csv
49-
repo_data = read_csv("repo_data")
53+
repo_data = file_utils.read_csv("repo_data")
5054

5155
# Initialize arrays to store percentages for each repo
5256
before = []
@@ -57,58 +61,55 @@ def create_box_plot():
5761
for repo in repo_data:
5862
# Get the total number of tests for the repo
5963
total_test_count = int(repo['Test Before']) + int(repo['Test During']) + int(repo['Test After'])
64+
total_test_count = 1 if total_test_count == 0 else total_test_count
6065

6166
# Append the percentage data to each array
6267
before.append((int(repo['Test Before']) / total_test_count) * 100)
6368
during.append((int(repo['Test During']) / total_test_count) * 100)
6469
after.append((int(repo['Test After']) / total_test_count) * 100)
6570

6671
# Plot the box plots
67-
boxplt = plt.boxplot([before, during, after], patch_artist=True, tick_labels=["Before", "During", "After"], flierprops= dict(markerfacecolor='coral'))
72+
boxplt = plt.boxplot([before, after, during], patch_artist=True, tick_labels=["Before", "After", "During"], flierprops= dict(markerfacecolor='coral'))
6873

6974
colors = ['palegreen', 'lightblue', 'lightskyblue']
7075
for patch, color in zip(boxplt['boxes'], colors):
7176
patch.set_facecolor(color)
7277

7378
# Set title and axes labels
7479
plt.ylabel("Percentage")
75-
plt.title("Boxplot showing how often a test is created\nbefore, during and after implementation")
76-
77-
# Show the plot
78-
plt.show()
80+
plt.title("How often a test is created before, after and during implementation")
81+
_save_plot(plt, "TDD Usage Statistics")
7982

8083

81-
def create_avg_commit_size_plot():
84+
def _create_avg_commit_size_plot():
8285
# Read data from the repo_data csv
83-
repo_data = read_csv("repo_data")
86+
repo_data = file_utils.read_csv("repo_data")
8487

8588
# Initialize variables to store averages for each repo
8689
before_avg = 0
87-
during_avg = 0
8890
after_avg = 0
91+
during_avg = 0
8992

9093
# Iterate through each repo and update the before, during and after averages
9194
for repo in repo_data:
9295
before_avg = (before_avg + float(repo['Avg Before Commit Size'])) / 2
93-
during_avg = (during_avg + float(repo['Avg During Commit Size'])) / 2
9496
after_avg = (after_avg + float(repo['Avg After Commit Size'])) / 2
97+
during_avg = (during_avg + float(repo['Avg During Commit Size'])) / 2
9598

9699
# Plot the bar chart
97100
colors = ['palegreen', 'lightblue', 'lightskyblue']
98-
plt.bar(["Before", "During", "After"], [before_avg, during_avg, after_avg], align='center', color=colors)
101+
plt.bar(["Before", "After", "During"], [before_avg, after_avg, during_avg], align='center', color=colors)
99102

100103
# Set title and axes labels
101104
plt.xlabel("Commit relation between tests and implementation")
102105
plt.ylabel("Average Commit Size (No. of files)")
103-
plt.title("Bar chart showing the average commit size when a\ntests are created before, during and after implementation")
104-
105-
# Show the plot
106-
plt.show()
106+
plt.title("Average commit size when tests are created \nbefore, after and during implementation")
107+
_save_plot(plt, "Average Commit Size")
107108

108109

109-
def create_pie_plot():
110+
def _create_pie_plot():
110111
# Read data from the author_data csv
111-
author_data = read_csv("author_data")
112+
author_data = file_utils.read_csv("author_data")
112113

113114
# Initialize Counters
114115
#10 25 50 70 90 100
@@ -117,7 +118,7 @@ def create_pie_plot():
117118

118119
for author in author_data:
119120
# Calculate the percentage of TDD of the author
120-
TDD_percent = (float(author['Test Before']) / (float(author['Test Before']) + float(author['Test During']) + float(author['Test After']))) * 100
121+
TDD_percent = (float(author['Test Before']) / max(1, float(author['Test Before']) + float(author['Test During']) + float(author['Test After']))) * 100
121122

122123
# Update the counters array based on this result
123124
if TDD_percent < 10:
@@ -134,7 +135,7 @@ def create_pie_plot():
134135
counters[5] += 1
135136

136137
# Convert the counters into percentages using a lambda function and map
137-
percentages = list(map(lambda x: x/len(author_data)*100, counters))
138+
percentages = list(map(lambda x: x/max(1, len(author_data))*100, counters))
138139

139140
labels = ['Non TDD', 'Rarely TDD', 'Occasionally TDD', 'Somewhat TDD', 'Mostly TDD', 'Consistently TDD']
140141
for i in range(len(labels)):
@@ -148,13 +149,11 @@ def create_pie_plot():
148149
plt.legend(patches, labels, loc="upper left")
149150
plt.axis('equal')
150151
plt.title("Pie chart showing the percentage of authors using levels of TDD")
151-
152-
# Show the plot
153-
plt.show()
154-
155-
create_size_impact_plot()
156-
create_box_plot()
157-
create_avg_commit_size_plot()
158-
create_pie_plot()
152+
_save_plot(plt, "TDD Categories")
159153

160154

155+
def create_plots():
156+
_create_size_impact_plot()
157+
_create_box_plot()
158+
_create_avg_commit_size_plot()
159+
_create_pie_plot()

src/infrastructure/configuration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111

1212
def setup_directories():
1313
file_utils.create_directory(file_utils.RESULTS_PATH, delete_existing=True)
14+
file_utils.create_directory(file_utils.CHARTS_PATH)
1415
file_utils.create_directory(file_utils.LOGS_PATH)
1516
file_utils.create_directory(file_utils.COMMITS_PATH)
1617

src/infrastructure/file_utils.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
ROOT_PATH = os.path.abspath(os.path.join(os.path.dirname(__file__), "../.."))
77
RESOURCES_PATH = os.path.join(ROOT_PATH, "resources", "repositories")
88
RESULTS_PATH = os.path.join(ROOT_PATH, "results")
9+
CHARTS_PATH = os.path.join(RESULTS_PATH, "charts")
910
LOGS_PATH = os.path.join(ROOT_PATH, "logs")
1011
COMMITS_PATH = os.path.join(ROOT_PATH, "commits")
1112

src/presentation/analysis_manager.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from src.mining import commit_retrieval as retrieval
1010
from src.mining.csv_export import update_author_count, update_author_data, update_repo_data, anonymyse_authors
1111
from src.models.Repository import Repository
12+
from src.analysis import analysis
1213

1314
class AnalysisManager():
1415
def __init__(self, date_of_experiment: datetime):
@@ -114,10 +115,12 @@ async def process_and_update(repo):
114115
async def perform_analysis_on_repo(self, repo: Repository, file_handler: LanguageFileHandler, force_mine: bool):
115116
await self._process_repositories([repo], file_handler, batch_size=1, force_mine=force_mine)
116117
anonymyse_authors()
118+
analysis.create_plots()
117119

118120
async def perform_analysis(self, file_handlers: list, batch_size: int, force_mine: bool):
119121
for file_handler in file_handlers:
120122
repositories = repository_utils.read_repositories(file_handler.name.lower())
121123
await self._process_repositories(repositories, file_handler, batch_size, force_mine)
122124

123-
anonymyse_authors()
125+
anonymyse_authors()
126+
analysis.create_plots()

0 commit comments

Comments
 (0)