Skip to content

Commit 0028ff2

Browse files
authored
Merge pull request #41 from OPPIDA/feat/sarif-parser
2 parents 789b0b8 + 5e4dc4d commit 0028ff2

42 files changed

Lines changed: 11506 additions & 4015 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.pre-commit-config.yaml

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,4 @@ repos:
2727
hooks:
2828
- id: conventional-pre-commit
2929
stages: [commit-msg]
30-
args: []
31-
32-
- repo: https://github.com/google/osv-scanner
33-
rev: v2.3.1
34-
hooks:
35-
- id: osv-scanner
30+
args: []

README.md

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,10 +23,11 @@ A framework for code security that provides abstractions for static analysis too
2323
**CodeSecTools** is a collection of scripts and wrappers that abstract external resources (such as SAST tools, datasets, and codebases), providing standardized interfaces to help them interact easily.
2424

2525
<div align="center">
26-
<img src="docs/assets/overview.svg" alt="CodeSecTools Overview" style="width: 75%; height: auto;" />
26+
<img src="docs/assets/workflow.svg" alt="Workflow" style="width: 85%; height: auto;" />
27+
<img src="docs/assets/workflow_example.svg" alt="Workflow" style="width: 85%; height: auto;" />
2728
</div>
2829

29-
For step-by-step instructions on installation, configuration, and basic usage, please refer to the [quick start guide](https://oppida.github.io/CodeSecTools/home/quick_start_guide.html).
30+
For step-by-step instructions on installation, configuration, and basic usage, please refer to the [**quick start guide**](https://oppida.github.io/CodeSecTools/home/quick_start_guide.html).
3031

3132
For more details on the design and integration of SAST tools and datasets in CodeSecTools, please refer to the [documentation](https://oppida.github.io/CodeSecTools).
3233

@@ -47,9 +48,9 @@ For more details on the design and integration of SAST tools and datasets in Cod
4748

4849
|SAST Tool|Languages|Maintained|Included in Docker|Continuous Testing|Last Test Date|
4950
|:---:|:---:|:---:|:---:|:---:|:---:|
50-
|Coverity|Java|⚠️<br>(Deprioritized)||❌<br>(Proprietary)|October 2025|
51+
|Coverity|C/C++, Java|||❌<br>(Proprietary)|February 2026|
5152
|Semgrep Community Edition|C/C++, Java||||[Latest PR](https://github.com/OPPIDA/CodeSecTools/actions/workflows/ci.yaml)|
52-
|Snyk Code|C/C++, Java|||❌<br>(Rate limited)|November 2025|
53+
|Snyk Code|C/C++, Java|||❌<br>(Rate limited)|February 2026|
5354
|Bearer|Java||||[Latest PR](https://github.com/OPPIDA/CodeSecTools/actions/workflows/ci.yaml)|
5455
|SpotBugs|Java||||[Latest PR](https://github.com/OPPIDA/CodeSecTools/actions/workflows/ci.yaml)|
5556
|Cppcheck|C/C++||||[Latest PR](https://github.com/OPPIDA/CodeSecTools/actions/workflows/ci.yaml)|
@@ -97,10 +98,10 @@ Mount necessary directories if you want to include:
9798
- a target (`-v ./myproject:/home/codesectools/myproject`)
9899
- existing CodeSecTools data (`-v $HOME/.codesectools:/home/codesectools/.codesectools`)
99100
100-
A better way is to use the CLI:
101+
A simpler way is to use the CLI:
101102
102103
```bash
103-
$ cstools -d docker --help
104+
$ cstools docker --help
104105
105106
Usage: cstools docker [OPTIONS]
106107

codesectools/datasets/core/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -641,7 +641,7 @@ def validate(self, analysis_results: list[AnalysisResult]) -> GitRepoDatasetData
641641
"fp_cwes": fp_cwes,
642642
"fn_cwes": fn_cwes,
643643
"time": analysis_result.time,
644-
"loc": analysis_result.loc,
644+
"lines_of_codes": analysis_result.lines_of_codes,
645645
}
646646
validated_repos.append(result)
647647

codesectools/sasts/all/cli.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,7 +177,7 @@ def list_() -> None:
177177
"Dataset",
178178
", ".join(
179179
f"[b]{sast.name}[/b]"
180-
for sast in all_sast.sasts
180+
for sast in all_sast.any_sasts
181181
if dataset_full_name in sast.list_results(dataset=True)
182182
),
183183
)
@@ -187,7 +187,7 @@ def list_() -> None:
187187
"Project",
188188
", ".join(
189189
f"[b]{sast.name}[/b]"
190-
for sast in all_sast.sasts
190+
for sast in all_sast.any_sasts
191191
if project in sast.list_results(project=True)
192192
),
193193
)

codesectools/sasts/all/graphics.py

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ class Graphics(CoreGraphics):
1515
project_name (str): The name of the project being visualized.
1616
all_sast (AllSAST): The instance managing all SAST tools.
1717
output_dir (Path): The directory containing the aggregated results.
18-
color_mapping (dict): A dictionary mapping SAST tool names to colors.
18+
sast_color (dict): A dictionary mapping SAST tool names to colors.
1919
sast_names (list[str]): A list of names of the SAST tools involved in the analysis.
2020
plot_functions (list): A list of methods responsible for generating plots.
2121
@@ -26,12 +26,12 @@ def __init__(self, project_name: str) -> None:
2626
self.project_name = project_name
2727
self.all_sast = AllSAST()
2828
self.output_dir = self.all_sast.output_dir / project_name
29-
self.color_mapping = {}
29+
self.sast_color = {}
3030
cmap = plt.get_cmap("Set2")
3131
self.sast_names = []
32-
for i, sast in enumerate(self.all_sast.sasts):
32+
for i, sast in enumerate(self.all_sast.partial_sasts):
3333
if self.project_name in sast.list_results(project=True):
34-
self.color_mapping[sast.name] = cmap(i)
34+
self.sast_color[sast.name] = cmap(i)
3535
self.sast_names.append(sast.name)
3636
self.plot_functions = []
3737

@@ -49,11 +49,11 @@ def __init__(self, project_name: str) -> None:
4949
)
5050

5151
def plot_overview(self) -> Figure:
52-
"""Generate an overview plot with stats by files, SAST tools, and categories."""
52+
"""Generate an overview plot with stats by files, SAST tools, and levels."""
5353
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, layout="constrained")
5454
by_files = self.result.stats_by_files()
5555
by_sasts = self.result.stats_by_sasts()
56-
by_categories = self.result.stats_by_categories()
56+
by_levels = self.result.stats_by_levels()
5757

5858
# Plot by files
5959
X_files, Y_files = [], []
@@ -64,10 +64,10 @@ def plot_overview(self) -> Figure:
6464
X_files.append(shorten_path(k))
6565
Y_files.append(v["count"])
6666

67-
COLORS_COUNT = {v: 0 for k, v in self.color_mapping.items()}
67+
COLORS_COUNT = {v: 0 for k, v in self.sast_color.items()}
6868

69-
for sast in v["sasts"]:
70-
color = self.color_mapping[sast]
69+
for sast_name in v["sasts"]:
70+
color = self.sast_color[sast_name]
7171
COLORS_COUNT[color] += 1
7272

7373
bars = []
@@ -95,41 +95,41 @@ def plot_overview(self) -> Figure:
9595
ax2.bar(
9696
X_sasts,
9797
Y_checkers,
98-
color=[self.color_mapping[s] for s in X_sasts],
98+
color=[self.sast_color[s] for s in X_sasts],
9999
)
100100
ax2.set_xticks(X_sasts, X_sasts, rotation=45, ha="right")
101101
ax2.set_title("Stats by SAST tools")
102102

103-
# Plot by categories
104-
X_categories = ["HIGH", "MEDIUM", "LOW"]
105-
for category in X_categories:
106-
if not by_categories.get(category):
103+
# Plot by levels
104+
X_levels = ["error", "warning", "note", "none"]
105+
for level in X_levels:
106+
if not by_levels.get(level):
107107
continue
108108

109-
sast_counts = by_categories[category]["sast_counts"]
109+
sast_counts = by_levels[level]["sast_counts"]
110110

111111
bars = []
112112
current_height = 0
113113
for sast_name, count in sorted(sast_counts.items()):
114-
color = self.color_mapping[sast_name]
114+
color = self.sast_color[sast_name]
115115
height = count
116116
if height > 0:
117-
bars.append((category, current_height + height, color))
117+
bars.append((level, current_height + height, color))
118118
current_height += height
119119

120-
for category_name, height, color in bars[::-1]:
121-
ax3.bar(category_name, height, color=color)
120+
for level_name, height, color in bars[::-1]:
121+
ax3.bar(level_name, height, color=color)
122122

123-
ax3.set_xticks(X_categories, X_categories, rotation=45, ha="right")
124-
ax3.set_title("Stats by categories")
123+
ax3.set_xticks(X_levels, X_levels, rotation=45, ha="right")
124+
ax3.set_title("Stats by levels")
125125

126126
fig.suptitle(
127127
f"Project {self.project_name}, {len(self.result.files)} files analyzed, {len(self.result.defects)} defects raised",
128128
fontsize=16,
129129
)
130-
labels = list(self.color_mapping.keys())
130+
labels = list(self.sast_color.keys())
131131
handles = [
132-
plt.Rectangle((0, 0), 1, 1, color=self.color_mapping[label])
132+
plt.Rectangle((0, 0), 1, 1, color=self.sast_color[label])
133133
for label in labels
134134
]
135135
plt.legend(handles, labels)
@@ -160,7 +160,7 @@ def plot_top_cwes(self) -> Figure:
160160
sast_counts,
161161
bottom=bottoms,
162162
label=sast_name,
163-
color=self.color_mapping.get(sast_name),
163+
color=self.sast_color.get(sast_name),
164164
)
165165
bottoms = [b + c for b, c in zip(bottoms, sast_counts, strict=False)]
166166

codesectools/sasts/all/parser.py

Lines changed: 28 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,13 @@ class AllSASTAnalysisResult:
1414
"""Represent the aggregated results from multiple SAST analyses on a single project."""
1515

1616
def __init__(self, name: str, analysis_results: dict[str, AnalysisResult]) -> None:
17-
"""Initialize an AllSASTAnalysisResult instance."""
17+
"""Initialize an AllSASTAnalysisResult instance.
18+
19+
Args:
20+
name: The name of the project.
21+
analysis_results: A dictionary of analysis results from various SAST tools.
22+
23+
"""
1824
self.name = name
1925
self.source_path = None
2026
self.analysis_results = analysis_results
@@ -34,22 +40,6 @@ def __init__(self, name: str, analysis_results: dict[str, AnalysisResult]) -> No
3440
self.files |= set(analysis_result.files)
3541
self.defects += analysis_result.defects
3642

37-
self.category_mapping = {}
38-
for sast_name in self.sast_names:
39-
sast = SASTS_ALL[sast_name]["sast"]
40-
for category_name, color in sast.color_mapping.items():
41-
if color.lower() == "red":
42-
self.category_mapping[(sast_name, category_name)] = "HIGH"
43-
elif color.lower() == "orange":
44-
self.category_mapping[(sast_name, category_name)] = "MEDIUM"
45-
elif color.lower() == "yellow":
46-
self.category_mapping[(sast_name, category_name)] = "LOW"
47-
48-
for defect in self.defects:
49-
defect.category = self.category_mapping.get(
50-
(defect.sast, defect.category), "LOW"
51-
)
52-
5343
def __repr__(self) -> str:
5444
"""Return a developer-friendly string representation of the aggregated result."""
5545
return f"""{self.__class__.__name__}(
@@ -78,9 +68,9 @@ def stats_by_files(self) -> dict:
7868
stats = {}
7969
for defect in self.defects:
8070
if defect.filepath_str not in stats.keys():
81-
stats[defect.filepath_str] = {"count": 1, "sasts": [defect.sast]}
71+
stats[defect.filepath_str] = {"count": 1, "sasts": [defect.sast_name]}
8272
else:
83-
stats[defect.filepath_str]["sasts"].append(defect.sast)
73+
stats[defect.filepath_str]["sasts"].append(defect.sast_name)
8474
stats[defect.filepath_str]["count"] += 1
8575

8676
return stats
@@ -89,23 +79,23 @@ def stats_by_sasts(self) -> dict:
8979
"""Calculate statistics on defects, grouped by SAST tool."""
9080
stats = {}
9181
for defect in self.defects:
92-
if defect.sast not in stats.keys():
93-
stats[defect.sast] = {"count": 1}
82+
if defect.sast_name not in stats.keys():
83+
stats[defect.sast_name] = {"count": 1}
9484
else:
95-
stats[defect.sast]["count"] += 1
85+
stats[defect.sast_name]["count"] += 1
9686

9787
return stats
9888

99-
def stats_by_categories(self) -> dict:
100-
"""Calculate statistics on defects, grouped by severity category."""
89+
def stats_by_levels(self) -> dict:
90+
"""Calculate statistics on defects, grouped by severity level."""
10191
stats = {}
10292
for defect in self.defects:
103-
if defect.category not in stats.keys():
104-
stats[defect.category] = {"count": 0, "sast_counts": {}}
93+
if defect.level not in stats.keys():
94+
stats[defect.level] = {"count": 0, "sast_counts": {}}
10595

106-
stats[defect.category]["count"] += 1
107-
sast_counts = stats[defect.category]["sast_counts"]
108-
sast_counts[defect.sast] = sast_counts.get(defect.sast, 0) + 1
96+
stats[defect.level]["count"] += 1
97+
sast_counts = stats[defect.level]["sast_counts"]
98+
sast_counts[defect.sast_name] = sast_counts.get(defect.sast_name, 0) + 1
10999
return stats
110100

111101
def stats_by_cwes(self) -> dict:
@@ -119,14 +109,14 @@ def stats_by_cwes(self) -> dict:
119109
stats[defect.cwe] = {
120110
"count": 1,
121111
"files": [defect.filepath_str],
122-
"sast_counts": {defect.sast: 1},
112+
"sast_counts": {defect.sast_name: 1},
123113
}
124114
else:
125115
stats[defect.cwe]["count"] += 1
126116
if defect.filepath_str not in stats[defect.cwe]["files"]:
127117
stats[defect.cwe]["files"].append(defect.filepath_str)
128-
stats[defect.cwe]["sast_counts"][defect.sast] = (
129-
stats[defect.cwe]["sast_counts"].get(defect.sast, 0) + 1
118+
stats[defect.cwe]["sast_counts"][defect.sast_name] = (
119+
stats[defect.cwe]["sast_counts"].get(defect.sast_name, 0) + 1
130120
)
131121
return stats
132122

@@ -144,7 +134,7 @@ def stats_by_scores(self) -> dict:
144134

145135
defects_same_cwe = 0
146136
for cwe in defects_cwes:
147-
cwes_sasts = {d.sast for d in defects if d.cwe == cwe}
137+
cwes_sasts = {d.sast_name for d in defects if d.cwe == cwe}
148138
if set(self.sast_names) == cwes_sasts:
149139
defects_same_cwe += 1
150140
else:
@@ -162,7 +152,7 @@ def stats_by_scores(self) -> dict:
162152
defects_same_location = 0
163153
defects_same_location_same_cwe = 0
164154
for _, defects_ in defect_locations.items():
165-
if set(defect.sast for defect in defects_) == set(self.sast_names):
155+
if set(defect.sast_name for defect in defects_) == set(self.sast_names):
166156
defects_same_location += 1
167157
defects_by_cwe = {}
168158
for defect in defects_:
@@ -171,14 +161,14 @@ def stats_by_scores(self) -> dict:
171161
defects_by_cwe[defect.cwe].append(defect)
172162

173163
for _, defects_ in defects_by_cwe.items():
174-
if set(defect.sast for defect in defects_) == set(
164+
if set(defect.sast_name for defect in defects_) == set(
175165
self.sast_names
176166
):
177167
defects_same_location_same_cwe += 1
178168
else:
179169
defects_same_location_same_cwe += (
180170
len(
181-
set(defect.sast for defect in defects_)
171+
set(defect.sast_name for defect in defects_)
182172
& set(self.sast_names)
183173
)
184174
- 1
@@ -224,7 +214,7 @@ def prepare_report_data(self) -> dict:
224214
for group in group_successive(defect.lines):
225215
start, end = group[0], group[-1]
226216
locations.append(
227-
(defect.sast, defect.cwe, defect.message, (start, end))
217+
(defect.sast_name, defect.cwe, defect.message, (start, end))
228218
)
229219

230220
report["files"][defect_file] = {
@@ -239,7 +229,7 @@ def prepare_report_data(self) -> dict:
239229
k: v
240230
for k, v in sorted(
241231
report["files"].items(),
242-
key=lambda item: (sum(v for v in item[1]["score"].values())),
232+
key=lambda item: sum(v for v in item[1]["score"].values()),
243233
reverse=True,
244234
)
245235
}

codesectools/sasts/all/report.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,7 +139,7 @@ def generate_single_defect(self, file_data: dict) -> tuple:
139139
else "None"
140140
)
141141
rows.append(
142-
(start, shortcut, defect.sast, cwe_link, defect.message)
142+
(start, shortcut, defect.sast_name, cwe_link, defect.message)
143143
)
144144
else:
145145
cwe_link = (

codesectools/sasts/all/sast.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,15 +19,24 @@ class AllSAST:
1919
def __init__(self) -> None:
2020
"""Initialize the AllSAST instance."""
2121
self.output_dir = USER_OUTPUT_DIR / self.name
22-
self.sasts: list[SAST] = []
22+
self.full_sasts: list[SAST] = []
23+
self.partial_sasts: list[SAST] = []
24+
self.any_sasts: list[SAST] = []
2325
for _, sast_data in SASTS_ALL.items():
2426
if sast_data["status"] == "full":
25-
self.sasts.append(sast_data["sast"]())
27+
self.full_sasts.append(sast_data["sast"]())
28+
self.partial_sasts.append(sast_data["sast"]())
29+
self.any_sasts.append(sast_data["sast"]())
30+
elif sast_data["status"] == "partial":
31+
self.partial_sasts.append(sast_data["sast"]())
32+
self.any_sasts.append(sast_data["sast"]())
33+
else:
34+
self.any_sasts.append(sast_data["sast"]())
2635

2736
self.sasts_by_lang = {}
2837
self.sasts_by_dataset = {}
2938

30-
for sast in self.sasts:
39+
for sast in self.full_sasts:
3140
for lang in sast.supported_languages:
3241
if self.sasts_by_lang.get(lang):
3342
self.sasts_by_lang[lang].append(sast)
@@ -45,7 +54,7 @@ def list_results(
4554
) -> set[str]:
4655
"""List the names of analysis results common to all enabled SAST tools."""
4756
output_dirs = set()
48-
for sast in self.sasts:
57+
for sast in self.partial_sasts:
4958
if not output_dirs:
5059
output_dirs = set(
5160
sast.list_results(project=project, dataset=dataset, limit=limit)

0 commit comments

Comments
 (0)