Skip to content

Commit b414785

Browse files
Reordered functions and neatened up naming
1 parent 6230b09 commit b414785

1 file changed

Lines changed: 86 additions & 88 deletions

File tree

src/analysis/analysis.py

Lines changed: 86 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ def _get_category_index(tdd_percentage):
2323
elif tdd_percentage <= 100:
2424
return 5
2525

26-
def _create_size_impact_plot():
26+
def _create_size_impact_scatter():
2727
# Read data from the repo_data csv
2828
repo_data = file_utils.read_csv("repo_data")
2929

@@ -64,9 +64,9 @@ def _create_size_impact_plot():
6464
plt.title("Repo size and TDD percentage")
6565

6666
# Save the plot
67-
_save_plot(plt, "Size Impact")
67+
_save_plot(plt, "1 - Size Impact")
6868

69-
def _create_box_plot():
69+
def _create_tdd_usage_box_plot():
7070
# Read data from the repo_data csv
7171
repo_data = file_utils.read_csv("repo_data")
7272

@@ -102,10 +102,10 @@ def _create_box_plot():
102102
plt.title("Percentage of tests created before, after and during implementation")
103103

104104
# Save the plot
105-
_save_plot(plt, "TDD Usage Statistics")
105+
_save_plot(plt, "2 - TDD Usage Statistics")
106106

107107

108-
def _create_avg_commit_size_bar_plot():
108+
def _create_avg_commit_size_bar_graph():
109109
# Read data from the repo_data csv
110110
repo_data = file_utils.read_csv("repo_data")
111111

@@ -141,54 +141,52 @@ def _create_avg_commit_size_bar_plot():
141141
plt.title("Average commit size when tests are created \nbefore, after and during implementation")
142142

143143
# Save the plot
144-
_save_plot(plt, "Average Commit Size")
144+
_save_plot(plt, "3 - Average Commit Size")
145145

146146

147-
def _create_pie_plot_tdd_author_categories():
148-
# Read data from the author_data csv
149-
author_data = file_utils.read_csv("author_data")
150-
151-
# Initialize Counters
152-
#10 25 50 70 90 100
153-
counters = [0,0,0,0,0,0]
154-
155-
for author in author_data:
156-
# Calculate the percentage of TDD of the author
157-
# we don't count test_during as we want TDD percentage, not before, during and after percentage
158-
tdd_percentage = (float(author['Test Before']) / max(1, float(author['Test Before']) + float(author['Test After']))) * 100
147+
def _create_tdd_languages_bar_graph():
148+
# Read data from the repo_data csv
149+
repo_data = file_utils.read_csv("repo_data")
159150

160-
# Update the counters array based on this result
161-
counters[_get_category_index(tdd_percentage)] += 1
151+
# Initialize variables to store total for each repo
152+
labels = ["Java", "C++", "C#", "Kotlin", "Python"]
153+
percentage_total = {"Java":0, "C++":0, "C#":0, "Kotlin":0, "Python":0}
154+
repo_count = {"Java":0, "C++":0, "C#":0, "Kotlin":0, "Python":0}
162155

163-
# Convert the counters into percentages using a lambda function and map
164-
percentages = list(map(lambda x: x/max(1, len(author_data))*100, counters))
156+
# Iterate through each repo and update the percentage total and repo count for the respective language
157+
for repo in repo_data:
158+
language = repo['Language']
159+
repo_count[language] += 1
160+
try:
161+
percentage_total[language] += (int(repo['Test Before']) / (int(repo['Test Before']) + int(repo['Test After']))) * 100
162+
except ZeroDivisionError:
163+
pass
165164

166-
# Update labels to include percentage values for each slice
167-
labels = ['Non TDD', 'Rarely TDD', 'Occasionally TDD', 'Somewhat TDD', 'Mostly TDD', 'Consistently TDD']
168-
for i in range(len(labels)):
169-
labels[i] = labels[i] + ' - ' + str(round(percentages[i], 1)) + '%'
165+
percentage_avg = []
166+
for language in labels:
167+
percentage_avg.append((percentage_total[language] / repo_count[language]))
170168

171169
# Clear any existing plot
172170
plt.clf()
173171

174-
# Plot the pie
175-
colors = ['#225ea8', '#1d91c0', '#41b6c4', '#7fcdbb', '#c7e9b4', '#71cb71']
176-
patches, texts = plt.pie(percentages, colors=colors)
172+
# Plot the bar chart
173+
colors = ['#66c2a5','#fc8d62','#8da0cb','#e78ac3','#a6d854']
174+
plt.bar(labels, percentage_avg, align='center', color=colors)
177175

178-
# Plot the legend
179-
plt.legend(patches, labels, loc="upper left")
176+
# Place values at the top of each bar
177+
for index, value in enumerate(percentage_avg):
178+
plt.text(index, value+0.25, round(value, 1), ha='center')
180179

181-
# Set the title and specify axis setting
182-
plt.axis('equal')
183-
plt.title("Pie chart showing levels of TDD usage by authors")
184-
plt.rcParams["figure.figsize"] = [7.5, 4.25]
185-
plt.rcParams["figure.autolayout"] = True
180+
# Set title and axes labels
181+
plt.ylabel("TDD Percentage (%)")
182+
plt.xlabel("Language")
183+
plt.title("TDD percentage observed between programming languages")
186184

187185
# Save the plot
188-
_save_plot(plt, "TDD Author Categories")
186+
_save_plot(plt, "4 - Language TDD Percentage")
189187

190188

191-
def _create_pie_plot_overall_tdd_raw():
189+
def _create_raw_tdd_percentage_pie():
192190
# Read data from the author_data csv
193191
repo_data = file_utils.read_csv("repo_data")
194192

@@ -227,10 +225,10 @@ def _create_pie_plot_overall_tdd_raw():
227225
plt.rcParams["figure.autolayout"] = True
228226

229227
# Save the plot
230-
_save_plot(plt, "Overall TDD Usage Raw")
228+
_save_plot(plt, "5 - Raw TDD Percentage")
231229

232230

233-
def _create_pie_overall_tdd_percentage():
231+
def _create_overall_tdd_percentage_pie():
234232
# Read data from the author_data csv
235233
repo_data = file_utils.read_csv("repo_data")
236234

@@ -269,26 +267,27 @@ def _create_pie_overall_tdd_percentage():
269267
plt.rcParams["figure.autolayout"] = True
270268

271269
# Save the plot
272-
_save_plot(plt, "Overall TDD Percentage")
270+
_save_plot(plt, "6 - Overall TDD Percentage")
271+
273272

274-
def _create_pie_plot_tdd_repo_categories():
273+
def _create_tdd_author_categories_pie():
275274
# Read data from the author_data csv
276-
repo_data = file_utils.read_csv("repo_data")
275+
author_data = file_utils.read_csv("author_data")
277276

278277
# Initialize Counters
279278
#10 25 50 70 90 100
280279
counters = [0,0,0,0,0,0]
281280

282-
for repo in repo_data:
281+
for author in author_data:
283282
# Calculate the percentage of TDD of the author
284283
# we don't count test_during as we want TDD percentage, not before, during and after percentage
285-
tdd_percentage = (float(repo['Test Before']) / max(1, float(repo['Test Before']) + float(repo['Test After']))) * 100
284+
tdd_percentage = (float(author['Test Before']) / max(1, float(author['Test Before']) + float(author['Test After']))) * 100
286285

287286
# Update the counters array based on this result
288287
counters[_get_category_index(tdd_percentage)] += 1
289288

290289
# Convert the counters into percentages using a lambda function and map
291-
percentages = list(map(lambda x: x/max(1, len(repo_data))*100, counters))
290+
percentages = list(map(lambda x: x/max(1, len(author_data))*100, counters))
292291

293292
# Update labels to include percentage values for each slice
294293
labels = ['Non TDD', 'Rarely TDD', 'Occasionally TDD', 'Somewhat TDD', 'Mostly TDD', 'Consistently TDD']
@@ -307,72 +306,71 @@ def _create_pie_plot_tdd_repo_categories():
307306

308307
# Set the title and specify axis setting
309308
plt.axis('equal')
310-
plt.title("Pie chart showing levels of TDD usage seen in repositories")
309+
plt.title("Pie chart showing levels of TDD usage by authors")
311310
plt.rcParams["figure.figsize"] = [7.5, 4.25]
312311
plt.rcParams["figure.autolayout"] = True
313312

314313
# Save the plot
315-
_save_plot(plt, "TDD Repo Categories")
314+
_save_plot(plt, "7 - TDD Author Categories")
316315

317316

318-
def _create_language_tdd_bar_plot():
319-
# Read data from the repo_data csv
317+
def _create_tdd_repo_categories_pie():
318+
# Read data from the author_data csv
320319
repo_data = file_utils.read_csv("repo_data")
321320

322-
# Initialize variables to store total for each repo
323-
labels = ["Java", "C++", "C#", "Kotlin", "Python"]
324-
percentage_total = {"Java":0, "C++":0, "C#":0, "Kotlin":0, "Python":0}
325-
repo_count = {"Java":0, "C++":0, "C#":0, "Kotlin":0, "Python":0}
321+
# Initialize Counters
322+
#10 25 50 70 90 100
323+
counters = [0,0,0,0,0,0]
326324

327-
# Iterate through each repo and update the percentage total and repo count for the respective language
328325
for repo in repo_data:
329-
language = repo['Language']
330-
repo_count[language] += 1
331-
try:
332-
percentage_total[language] += (int(repo['Test Before']) / (int(repo['Test Before']) + int(repo['Test After']))) * 100
333-
except ZeroDivisionError:
334-
pass
326+
# Calculate the percentage of TDD of the author
327+
# we don't count test_during as we want TDD percentage, not before, during and after percentage
328+
tdd_percentage = (float(repo['Test Before']) / max(1, float(repo['Test Before']) + float(repo['Test After']))) * 100
335329

336-
percentage_avg = []
337-
for language in labels:
338-
percentage_avg.append((percentage_total[language] / repo_count[language]))
330+
# Update the counters array based on this result
331+
counters[_get_category_index(tdd_percentage)] += 1
332+
333+
# Convert the counters into percentages using a lambda function and map
334+
percentages = list(map(lambda x: x/max(1, len(repo_data))*100, counters))
335+
336+
# Update labels to include percentage values for each slice
337+
labels = ['Non TDD', 'Rarely TDD', 'Occasionally TDD', 'Somewhat TDD', 'Mostly TDD', 'Consistently TDD']
338+
for i in range(len(labels)):
339+
labels[i] = labels[i] + ' - ' + str(round(percentages[i], 1)) + '%'
339340

340341
# Clear any existing plot
341342
plt.clf()
342343

343-
# Plot the bar chart
344-
colors = ['#66c2a5','#fc8d62','#8da0cb','#e78ac3','#a6d854']
345-
plt.bar(labels, percentage_avg, align='center', color=colors)
344+
# Plot the pie
345+
colors = ['#225ea8', '#1d91c0', '#41b6c4', '#7fcdbb', '#c7e9b4', '#71cb71']
346+
patches, texts = plt.pie(percentages, colors=colors)
346347

347-
# Place values at the top of each bar
348-
for index, value in enumerate(percentage_avg):
349-
plt.text(index, value+0.25, round(value, 1), ha='center')
348+
# Plot the legend
349+
plt.legend(patches, labels, loc="upper left")
350350

351-
# Set title and axes labels
352-
plt.ylabel("TDD Percentage (%)")
353-
plt.xlabel("Language")
354-
plt.title("TDD percentage observed between programming languages")
351+
# Set the title and specify axis setting
352+
plt.axis('equal')
353+
plt.title("Pie chart showing levels of TDD usage seen in repositories")
354+
plt.rcParams["figure.figsize"] = [7.5, 4.25]
355+
plt.rcParams["figure.autolayout"] = True
355356

356357
# Save the plot
357-
_save_plot(plt, "Language TDD Percentage")
358+
_save_plot(plt, "8 - TDD Repo Categories")
359+
358360

359361
def create_plots():
360-
_create_box_plot()
361-
_create_size_impact_plot()
362-
_create_avg_commit_size_bar_plot()
363-
_create_pie_plot_tdd_author_categories()
364-
_create_pie_plot_overall_tdd_raw()
365-
_create_pie_overall_tdd_percentage()
366-
_create_pie_plot_tdd_repo_categories()
367-
_create_language_tdd_bar_plot()
368-
369-
# REMEMBER TO REMOVE THIS
362+
_create_size_impact_scatter()
363+
_create_tdd_usage_box_plot()
364+
_create_avg_commit_size_bar_graph()
365+
_create_tdd_languages_bar_graph()
366+
_create_raw_tdd_percentage_pie()
367+
_create_overall_tdd_percentage_pie()
368+
_create_tdd_author_categories_pie()
369+
_create_tdd_repo_categories_pie()
370+
370371
create_plots()
371372

372373
'''
373374
todo -
374375
write the adjustments/estimates code in python
375-
376-
todo -
377-
the two categories plots - maybe generalise some of it into a separate function
378376
'''

0 commit comments

Comments
 (0)