Skip to content

Commit 4e185c0

Browse files
IMPLEMENTED - bar chart for tdd percentage per langange - ignoring during AND simple tweaks elsewhere
1 parent 8caf8f3 commit 4e185c0

1 file changed

Lines changed: 49 additions & 12 deletions

File tree

src/analysis/analysis.py

Lines changed: 49 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,7 @@ def _create_size_impact_plot():
4444

4545
# Set title and axes labels
4646
plt.xlabel("Repo Size (No. of files)")
47-
plt.xlabel("Repo Size (No. of files)")
48-
plt.ylabel("Percentage of TDD")
47+
plt.ylabel("TDD Percentage (%)")
4948
plt.title("Repo size and TDD percentage")
5049

5150
# Save the plot
@@ -90,7 +89,7 @@ def _create_box_plot():
9089
_save_plot(plt, "TDD Usage Statistics")
9190

9291

93-
def _create_avg_commit_size_plot():
92+
def _create_avg_commit_size_bar_plot():
9493
# Read data from the repo_data csv
9594
repo_data = file_utils.read_csv("repo_data")
9695

@@ -119,7 +118,7 @@ def _create_avg_commit_size_plot():
119118

120119
# Place values at the top of each bar
121120
for index, value in enumerate([before_avg, after_avg, during_avg]):
122-
plt.text(index, value+0.5, round(value, 1), ha='center')
121+
plt.text(index, value+0.25, round(value, 1), ha='center')
123122

124123
# Set title and axes labels
125124
plt.ylabel("Average Commit Size (No. of files)")
@@ -137,7 +136,6 @@ def _create_pie_plot_tdd_author_categories():
137136
#10 25 50 70 90 100
138137
counters = [0,0,0,0,0,0]
139138

140-
141139
for author in author_data:
142140
# Calculate the percentage of TDD of the author
143141
# we don't count test_during as we want TDD percentage, not before, during and after percentage
@@ -202,7 +200,6 @@ def _create_pie_plot_overall_tdd_raw():
202200
# Convert the data into percentages using a lambda function and map
203201
percentages = list(map(lambda x: x / max(1, total) * 100, data))
204202

205-
206203
# Update labels to include percentage values for each slice
207204
labels = ['TDD', 'Not TDD', 'Unclear']
208205
for i in range(len(labels)):
@@ -277,7 +274,6 @@ def _create_pie_plot_tdd_repo_categories():
277274
#10 25 50 70 90 100
278275
counters = [0,0,0,0,0,0]
279276

280-
281277
for repo in repo_data:
282278
# Calculate the percentage of TDD of the author
283279
# we don't count test_during as we want TDD percentage, not before, during and after percentage
@@ -324,24 +320,65 @@ def _create_pie_plot_tdd_repo_categories():
324320
# Save the plot
325321
_save_plot(plt, "TDD Repo Categories")
326322

323+
324+
def _create_language_tdd_bar_plot():
325+
# Read data from the repo_data csv
326+
repo_data = file_utils.read_csv("repo_data")
327+
328+
# Initialize variables to store total for each repo
329+
labels = ["Java", "C++", "C#", "Kotlin", "Python"]
330+
percentage_total = {"Java":0, "C++":0, "C#":0, "Kotlin":0, "Python":0}
331+
repo_count = {"Java":0, "C++":0, "C#":0, "Kotlin":0, "Python":0}
332+
333+
# Iterate through each repo and update the percentage total and repo count for the respective language
334+
for repo in repo_data:
335+
language = repo['Language']
336+
repo_count[language] += 1
337+
try:
338+
percentage_total[language] += (int(repo['Test Before']) / (int(repo['Test Before']) + int(repo['Test After']))) * 100
339+
except ZeroDivisionError:
340+
pass
341+
342+
percentage_avg = []
343+
for language in labels:
344+
percentage_avg.append((percentage_total[language] / repo_count[language]))
345+
346+
# Clear any existing plot
347+
plt.clf()
348+
349+
# Plot the bar chart
350+
colors = ['#66c2a5','#fc8d62','#8da0cb','#e78ac3','#a6d854']
351+
plt.bar(labels, percentage_avg, align='center', color=colors)
352+
353+
# Place values at the top of each bar
354+
for index, value in enumerate(percentage_avg):
355+
plt.text(index, value+0.25, round(value, 1), ha='center')
356+
357+
# Set title and axes labels
358+
plt.ylabel("TDD Percentage (%)")
359+
plt.xlabel("Language")
360+
plt.title("TDD percentage observed between programming languages")
361+
362+
# Save the plot
363+
_save_plot(plt, "Language TDD Percentage")
364+
327365
def create_plots():
328366
_create_box_plot()
329367
_create_size_impact_plot()
330-
_create_avg_commit_size_plot()
368+
_create_avg_commit_size_bar_plot()
331369
_create_pie_plot_tdd_author_categories()
332370
_create_pie_plot_overall_tdd_raw()
333371
_create_pie_overall_tdd_percentage()
334372
_create_pie_plot_tdd_repo_categories()
373+
_create_language_tdd_bar_plot()
335374

336-
# REMEMBER TO REMOVE THESE
375+
# REMEMBER TO REMOVE THIS
337376
create_plots()
338377

339378
'''
340379
todo -
341380
write the adjustments/estimates code in python
342381
343382
todo -
344-
345-
bar chart for tdd percentace per langange - ignoring during
346-
383+
the two categories plots - maybe generalise some of it into a separate function
347384
'''

0 commit comments

Comments
 (0)