Skip to content

Commit 16e67a4

Browse files
Added Analysis plots and updated the reqs file
1 parent d7d9e83 commit 16e67a4

2 files changed

Lines changed: 97 additions & 1 deletion

File tree

requirements.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,4 +3,5 @@ pytest
33
pytest-asyncio
44
matplotlib~=3.9.4
55
tqdm
6-
requests
6+
requests
7+
numpy

src/analysis/analysis.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
from distutils.dep_util import newer_pairwise
2+
from idlelib.colorizer import color_config
3+
4+
from PIL.ImageColor import colormap
5+
6+
from src.infrastructure.file_utils import read_csv
7+
import matplotlib.pyplot as plt
8+
import numpy as np
9+
10+
def create_size_impact_plot():
11+
# 3. How does the size of a commit impact the results?
12+
13+
# Read data from the repo_data csv
14+
repo_data = read_csv("repo_data")
15+
16+
# Initialize arrays to store plot points
17+
x = []
18+
y = []
19+
20+
# Iterate through each repo and create a point in the two arrays
21+
for repo in repo_data:
22+
# Append the commit count of the repo to X
23+
x.append(int(repo['Commit Count']))
24+
25+
# Get the total number of tests for the repo
26+
total_test_count = int(repo['Test Before']) + int(repo['Test During']) + int(repo['Test After'])
27+
28+
# Append the percentage of TDD for the repo to Y
29+
y.append((int(repo['Test Before'])/total_test_count)*100)
30+
31+
# Convert arrays to numpy arrays
32+
x = np.array(x)
33+
y = np.array(y)
34+
35+
# Plot the scatter points
36+
plt.scatter(x, y, c=y, cmap = 'winter')
37+
38+
# Calculate and plot the line of best fit
39+
a, b = np.polyfit(x, y, 1)
40+
plt.plot(x, a * x + b, color="red", alpha=0.5)
41+
42+
# Set title and axes labels
43+
plt.xlabel("Commit Count")
44+
plt.ylabel("Percentage of TDD")
45+
plt.title("Scatter plot showing how the size of commits impacts results")
46+
47+
# Show the plot
48+
plt.show()
49+
50+
def create_box_plot():
51+
# 2. How often is a test class (file) created (a) before, (b) after, or (c) in the same commit as a tested class (file)?
52+
53+
# Read data from the repo_data csv
54+
repo_data = read_csv("repo_data")
55+
56+
# Initialize arrays to store percentages for each repo
57+
before = []
58+
during = []
59+
after = []
60+
61+
# Iterate through each repo and append the before, during and after percentages to each array
62+
for repo in repo_data:
63+
# Get the total number of tests for the repo
64+
total_test_count = int(repo['Test Before']) + int(repo['Test During']) + int(repo['Test After'])
65+
66+
# Append the percentage data to each array
67+
before.append((int(repo['Test Before']) / total_test_count) * 100)
68+
during.append((int(repo['Test During']) / total_test_count) * 100)
69+
after.append((int(repo['Test After']) / total_test_count) * 100)
70+
71+
# Plot the box plots
72+
boxplt = plt.boxplot([before, during, after], patch_artist=True, tick_labels=["Before", "During", "After"], flierprops= dict(markerfacecolor='coral'))
73+
74+
colors = ['lightskyblue', 'paleturquoise', 'palegreen']
75+
for patch, color in zip(boxplt['boxes'], colors):
76+
patch.set_facecolor(color)
77+
78+
# Set title and axes labels
79+
plt.ylabel("Percentage")
80+
plt.title("Boxplot showing how often a test is created\nbefore, during and after implementation")
81+
82+
# Show the plot
83+
plt.show()
84+
85+
86+
create_size_impact_plot()
87+
create_box_plot()
88+
89+
'''
90+
91+
8. How does TDD adoption vary between projects?
92+
93+
'''
94+
95+

0 commit comments

Comments
 (0)