Reordered functions and neatened up naming

NishilAmin1213 · NishilAmin1213 · commit b4147851c592 · 2025-01-05T15:30:20.000Z
diff --git a/src/analysis/analysis.py b/src/analysis/analysis.py
@@ -23,7 +23,7 @@ def _get_category_index(tdd_percentage):
     elif tdd_percentage <= 100:
         return 5
 
-def _create_size_impact_plot():
+def _create_size_impact_scatter():
     # Read data from the repo_data csv
     repo_data = file_utils.read_csv("repo_data")
 
@@ -64,9 +64,9 @@ def _create_size_impact_plot():
     plt.title("Repo size and TDD percentage")
 
     # Save the plot
-    _save_plot(plt, "Size Impact")
+    _save_plot(plt, "1 - Size Impact")
 
-def _create_box_plot():
+def _create_tdd_usage_box_plot():
     # Read data from the repo_data csv
     repo_data = file_utils.read_csv("repo_data")
 
@@ -102,10 +102,10 @@ def _create_box_plot():
     plt.title("Percentage of tests created before, after and during implementation")
 
     # Save the plot
-    _save_plot(plt, "TDD Usage Statistics")
+    _save_plot(plt, "2 - TDD Usage Statistics")
 
 
-def _create_avg_commit_size_bar_plot():
+def _create_avg_commit_size_bar_graph():
     # Read data from the repo_data csv
     repo_data = file_utils.read_csv("repo_data")
 
@@ -141,54 +141,52 @@ def _create_avg_commit_size_bar_plot():
     plt.title("Average commit size when tests are created \nbefore, after and during implementation")
 
     # Save the plot
-    _save_plot(plt, "Average Commit Size")
+    _save_plot(plt, "3 - Average Commit Size")
 
 
-def _create_pie_plot_tdd_author_categories():
-    # Read data from the author_data csv
-    author_data = file_utils.read_csv("author_data")
-
-    # Initialize Counters
-    #10 25 50 70 90 100
-    counters = [0,0,0,0,0,0]
-
-    for author in author_data:
-        # Calculate the percentage of TDD of the author
-        # we don't count test_during as we want TDD percentage, not before, during and after percentage
-        tdd_percentage = (float(author['Test Before']) / max(1, float(author['Test Before']) + float(author['Test After']))) * 100
+def _create_tdd_languages_bar_graph():
+    # Read data from the repo_data csv
+    repo_data = file_utils.read_csv("repo_data")
 
-        # Update the counters array based on this result
-        counters[_get_category_index(tdd_percentage)] += 1
+    # Initialize variables to store total for each repo
+    labels = ["Java", "C++", "C#", "Kotlin", "Python"]
+    percentage_total = {"Java":0, "C++":0, "C#":0, "Kotlin":0, "Python":0}
+    repo_count = {"Java":0, "C++":0, "C#":0, "Kotlin":0, "Python":0}
 
-    # Convert the counters into percentages using a lambda function and map
-    percentages = list(map(lambda x: x/max(1, len(author_data))*100, counters))
+    # Iterate through each repo and update the percentage total and repo count for the respective language
+    for repo in repo_data:
+        language = repo['Language']
+        repo_count[language] += 1
+        try:
+            percentage_total[language] += (int(repo['Test Before']) / (int(repo['Test Before']) + int(repo['Test After']))) * 100
+        except ZeroDivisionError:
+            pass
 
-    # Update labels to include percentage values for each slice
-    labels = ['Non TDD', 'Rarely TDD', 'Occasionally TDD', 'Somewhat TDD', 'Mostly TDD', 'Consistently TDD']
-    for i in range(len(labels)):
-        labels[i] = labels[i] + ' - ' + str(round(percentages[i], 1)) + '%'
+    percentage_avg = []
+    for language in labels:
+        percentage_avg.append((percentage_total[language] / repo_count[language]))
 
     # Clear any existing plot
     plt.clf()
 
-    # Plot the pie
-    colors = ['#225ea8', '#1d91c0', '#41b6c4', '#7fcdbb', '#c7e9b4', '#71cb71']
-    patches, texts = plt.pie(percentages, colors=colors)
+    # Plot the bar chart
+    colors = ['#66c2a5','#fc8d62','#8da0cb','#e78ac3','#a6d854']
+    plt.bar(labels, percentage_avg, align='center', color=colors)
 
-    # Plot the legend
-    plt.legend(patches, labels, loc="upper left")
+    # Place values at the top of each bar
+    for index, value in enumerate(percentage_avg):
+        plt.text(index, value+0.25, round(value, 1), ha='center')
 
-    # Set the title and specify axis setting
-    plt.axis('equal')
-    plt.title("Pie chart showing levels of TDD usage by authors")
-    plt.rcParams["figure.figsize"] = [7.5, 4.25]
-    plt.rcParams["figure.autolayout"] = True
+    # Set title and axes labels
+    plt.ylabel("TDD Percentage (%)")
+    plt.xlabel("Language")
+    plt.title("TDD percentage observed between programming languages")
 
     # Save the plot
-    _save_plot(plt, "TDD Author Categories")
+    _save_plot(plt, "4 - Language TDD Percentage")
 
 
-def _create_pie_plot_overall_tdd_raw():
+def _create_raw_tdd_percentage_pie():
     # Read data from the author_data csv
     repo_data = file_utils.read_csv("repo_data")
 
@@ -227,10 +225,10 @@ def _create_pie_plot_overall_tdd_raw():
     plt.rcParams["figure.autolayout"] = True
 
     # Save the plot
-    _save_plot(plt, "Overall TDD Usage Raw")
+    _save_plot(plt, "5 - Raw TDD Percentage")
 
 
-def _create_pie_overall_tdd_percentage():
+def _create_overall_tdd_percentage_pie():
     # Read data from the author_data csv
     repo_data = file_utils.read_csv("repo_data")
 
@@ -269,26 +267,27 @@ def _create_pie_overall_tdd_percentage():
     plt.rcParams["figure.autolayout"] = True
 
     # Save the plot
-    _save_plot(plt, "Overall TDD Percentage")
+    _save_plot(plt, "6 - Overall TDD Percentage")
+
 
-def _create_pie_plot_tdd_repo_categories():
+def _create_tdd_author_categories_pie():
     # Read data from the author_data csv
-    repo_data = file_utils.read_csv("repo_data")
+    author_data = file_utils.read_csv("author_data")
 
     # Initialize Counters
     #10 25 50 70 90 100
     counters = [0,0,0,0,0,0]
 
-    for repo in repo_data:
+    for author in author_data:
         # Calculate the percentage of TDD of the author
         # we don't count test_during as we want TDD percentage, not before, during and after percentage
-        tdd_percentage = (float(repo['Test Before']) / max(1, float(repo['Test Before']) + float(repo['Test After']))) * 100
+        tdd_percentage = (float(author['Test Before']) / max(1, float(author['Test Before']) + float(author['Test After']))) * 100
 
         # Update the counters array based on this result
         counters[_get_category_index(tdd_percentage)] += 1
 
     # Convert the counters into percentages using a lambda function and map
-    percentages = list(map(lambda x: x/max(1, len(repo_data))*100, counters))
+    percentages = list(map(lambda x: x/max(1, len(author_data))*100, counters))
 
     # Update labels to include percentage values for each slice
     labels = ['Non TDD', 'Rarely TDD', 'Occasionally TDD', 'Somewhat TDD', 'Mostly TDD', 'Consistently TDD']
@@ -307,72 +306,71 @@ def _create_pie_plot_tdd_repo_categories():
 
     # Set the title and specify axis setting
     plt.axis('equal')
-    plt.title("Pie chart showing levels of TDD usage seen in repositories")
+    plt.title("Pie chart showing levels of TDD usage by authors")
     plt.rcParams["figure.figsize"] = [7.5, 4.25]
     plt.rcParams["figure.autolayout"] = True
 
     # Save the plot
-    _save_plot(plt, "TDD Repo Categories")
+    _save_plot(plt, "7 - TDD Author Categories")
 
 
-def _create_language_tdd_bar_plot():
-    # Read data from the repo_data csv
+def _create_tdd_repo_categories_pie():
+    # Read data from the author_data csv
     repo_data = file_utils.read_csv("repo_data")
 
-    # Initialize variables to store total for each repo
-    labels = ["Java", "C++", "C#", "Kotlin", "Python"]
-    percentage_total = {"Java":0, "C++":0, "C#":0, "Kotlin":0, "Python":0}
-    repo_count = {"Java":0, "C++":0, "C#":0, "Kotlin":0, "Python":0}
+    # Initialize Counters
+    #10 25 50 70 90 100
+    counters = [0,0,0,0,0,0]
 
-    # Iterate through each repo and update the percentage total and repo count for the respective language
     for repo in repo_data:
-        language = repo['Language']
-        repo_count[language] += 1
-        try:
-            percentage_total[language] += (int(repo['Test Before']) / (int(repo['Test Before']) + int(repo['Test After']))) * 100
-        except ZeroDivisionError:
-            pass
+        # Calculate the percentage of TDD of the author
+        # we don't count test_during as we want TDD percentage, not before, during and after percentage
+        tdd_percentage = (float(repo['Test Before']) / max(1, float(repo['Test Before']) + float(repo['Test After']))) * 100
 
-    percentage_avg = []
-    for language in labels:
-        percentage_avg.append((percentage_total[language] / repo_count[language]))
+        # Update the counters array based on this result
+        counters[_get_category_index(tdd_percentage)] += 1
+
+    # Convert the counters into percentages using a lambda function and map
+    percentages = list(map(lambda x: x/max(1, len(repo_data))*100, counters))
+
+    # Update labels to include percentage values for each slice
+    labels = ['Non TDD', 'Rarely TDD', 'Occasionally TDD', 'Somewhat TDD', 'Mostly TDD', 'Consistently TDD']
+    for i in range(len(labels)):
+        labels[i] = labels[i] + ' - ' + str(round(percentages[i], 1)) + '%'
 
     # Clear any existing plot
     plt.clf()
 
-    # Plot the bar chart
-    colors = ['#66c2a5','#fc8d62','#8da0cb','#e78ac3','#a6d854']
-    plt.bar(labels, percentage_avg, align='center', color=colors)
+    # Plot the pie
+    colors = ['#225ea8', '#1d91c0', '#41b6c4', '#7fcdbb', '#c7e9b4', '#71cb71']
+    patches, texts = plt.pie(percentages, colors=colors)
 
-    # Place values at the top of each bar
-    for index, value in enumerate(percentage_avg):
-        plt.text(index, value+0.25, round(value, 1), ha='center')
+    # Plot the legend
+    plt.legend(patches, labels, loc="upper left")
 
-    # Set title and axes labels
-    plt.ylabel("TDD Percentage (%)")
-    plt.xlabel("Language")
-    plt.title("TDD percentage observed between programming languages")
+    # Set the title and specify axis setting
+    plt.axis('equal')
+    plt.title("Pie chart showing levels of TDD usage seen in repositories")
+    plt.rcParams["figure.figsize"] = [7.5, 4.25]
+    plt.rcParams["figure.autolayout"] = True
 
     # Save the plot
-    _save_plot(plt, "Language TDD Percentage")
+    _save_plot(plt, "8 - TDD Repo Categories")
+
 
 def create_plots():
-    _create_box_plot()
-    _create_size_impact_plot()
-    _create_avg_commit_size_bar_plot()
-    _create_pie_plot_tdd_author_categories()
-    _create_pie_plot_overall_tdd_raw()
-    _create_pie_overall_tdd_percentage()
-    _create_pie_plot_tdd_repo_categories()
-    _create_language_tdd_bar_plot()
-
-# REMEMBER TO REMOVE THIS
+    _create_size_impact_scatter()
+    _create_tdd_usage_box_plot()
+    _create_avg_commit_size_bar_graph()
+    _create_tdd_languages_bar_graph()
+    _create_raw_tdd_percentage_pie()
+    _create_overall_tdd_percentage_pie()
+    _create_tdd_author_categories_pie()
+    _create_tdd_repo_categories_pie()
+
 create_plots()
 
 '''
 todo - 
 write the adjustments/estimates code in python
-
-todo - 
-the two categories plots - maybe generalise some of it into a separate function
 '''