IMPLEMENTED - bar chart for tdd percentage per langange - ignoring during AND simple tweaks elsewhere

NishilAmin1213 · NishilAmin1213 · commit 4e185c0cb2e2 · 2025-01-05T15:10:59.000Z
diff --git a/src/analysis/analysis.py b/src/analysis/analysis.py
@@ -44,8 +44,7 @@ def _create_size_impact_plot():
 
     # Set title and axes labels
     plt.xlabel("Repo Size (No. of files)")
-    plt.xlabel("Repo Size (No. of files)")
-    plt.ylabel("Percentage of TDD")
+    plt.ylabel("TDD Percentage (%)")
     plt.title("Repo size and TDD percentage")
 
     # Save the plot
@@ -90,7 +89,7 @@ def _create_box_plot():
     _save_plot(plt, "TDD Usage Statistics")
 
 
-def _create_avg_commit_size_plot():
+def _create_avg_commit_size_bar_plot():
     # Read data from the repo_data csv
     repo_data = file_utils.read_csv("repo_data")
 
@@ -119,7 +118,7 @@ def _create_avg_commit_size_plot():
 
     # Place values at the top of each bar
     for index, value in enumerate([before_avg, after_avg, during_avg]):
-        plt.text(index, value+0.5, round(value, 1), ha='center')
+        plt.text(index, value+0.25, round(value, 1), ha='center')
 
     # Set title and axes labels
     plt.ylabel("Average Commit Size (No. of files)")
@@ -137,7 +136,6 @@ def _create_pie_plot_tdd_author_categories():
     #10 25 50 70 90 100
     counters = [0,0,0,0,0,0]
 
-
     for author in author_data:
         # Calculate the percentage of TDD of the author
         # we don't count test_during as we want TDD percentage, not before, during and after percentage
@@ -202,7 +200,6 @@ def _create_pie_plot_overall_tdd_raw():
     # Convert the data into percentages using a lambda function and map
     percentages = list(map(lambda x: x / max(1, total) * 100, data))
 
-
     # Update labels to include percentage values for each slice
     labels = ['TDD', 'Not TDD', 'Unclear']
     for i in range(len(labels)):
@@ -277,7 +274,6 @@ def _create_pie_plot_tdd_repo_categories():
     #10 25 50 70 90 100
     counters = [0,0,0,0,0,0]
 
-
     for repo in repo_data:
         # Calculate the percentage of TDD of the author
         # we don't count test_during as we want TDD percentage, not before, during and after percentage
@@ -324,24 +320,65 @@ def _create_pie_plot_tdd_repo_categories():
     # Save the plot
     _save_plot(plt, "TDD Repo Categories")
 
+
+def _create_language_tdd_bar_plot():
+    # Read data from the repo_data csv
+    repo_data = file_utils.read_csv("repo_data")
+
+    # Initialize variables to store total for each repo
+    labels = ["Java", "C++", "C#", "Kotlin", "Python"]
+    percentage_total = {"Java":0, "C++":0, "C#":0, "Kotlin":0, "Python":0}
+    repo_count = {"Java":0, "C++":0, "C#":0, "Kotlin":0, "Python":0}
+
+    # Iterate through each repo and update the percentage total and repo count for the respective language
+    for repo in repo_data:
+        language = repo['Language']
+        repo_count[language] += 1
+        try:
+            percentage_total[language] += (int(repo['Test Before']) / (int(repo['Test Before']) + int(repo['Test After']))) * 100
+        except ZeroDivisionError:
+            pass
+
+    percentage_avg = []
+    for language in labels:
+        percentage_avg.append((percentage_total[language] / repo_count[language]))
+
+    # Clear any existing plot
+    plt.clf()
+
+    # Plot the bar chart
+    colors = ['#66c2a5','#fc8d62','#8da0cb','#e78ac3','#a6d854']
+    plt.bar(labels, percentage_avg, align='center', color=colors)
+
+    # Place values at the top of each bar
+    for index, value in enumerate(percentage_avg):
+        plt.text(index, value+0.25, round(value, 1), ha='center')
+
+    # Set title and axes labels
+    plt.ylabel("TDD Percentage (%)")
+    plt.xlabel("Language")
+    plt.title("TDD percentage observed between programming languages")
+
+    # Save the plot
+    _save_plot(plt, "Language TDD Percentage")
+
 def create_plots():
     _create_box_plot()
     _create_size_impact_plot()
-    _create_avg_commit_size_plot()
+    _create_avg_commit_size_bar_plot()
     _create_pie_plot_tdd_author_categories()
     _create_pie_plot_overall_tdd_raw()
     _create_pie_overall_tdd_percentage()
     _create_pie_plot_tdd_repo_categories()
+    _create_language_tdd_bar_plot()
 
-# REMEMBER TO REMOVE THESE
+# REMEMBER TO REMOVE THIS
 create_plots()
 
 '''
 todo - 
 write the adjustments/estimates code in python
 
 todo - 
-
-bar chart for tdd percentace per langange - ignoring during
-
+the two categories plots - maybe generalise some of it into a separate function
 '''