Skip to content

Commit f3b8626

Browse files
authored
Merge pull request #8 from OPPIDA/feat/sast-cppcheck
2 parents 5ef6410 + de2eeb6 commit f3b8626

27 files changed

Lines changed: 690 additions & 116 deletions

File tree

Makefile

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ test-force: ## Run tests in a Docker container while ignoring any stored state
2525

2626
test-debug: ## Spawn an interactive shell in the test container to debug
2727
@docker compose build
28-
@docker compose run --rm test /bin/bash
28+
@docker compose run --rm with-sast /bin/bash
2929

3030
docs-serve: ## Serve the documentation locally
3131
@mkdocs serve --livereload

codesectools/cli.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -160,8 +160,8 @@ def get_downloadable() -> dict[str, DownloadableRequirement | Dataset]:
160160

161161

162162
@cli.command(hidden=download_hidden)
163-
def download(name: download_arg_type = download_arg_value) -> None:
164-
"""Download any missing resources that are available for download."""
163+
def download(name: download_arg_type = download_arg_value, test: bool = False) -> None:
164+
"""Download and install any missing resources that are available for download."""
165165
if name is None:
166166
print("All downloadable resources have been retrieved.")
167167
else:
@@ -174,7 +174,7 @@ def download(name: download_arg_type = download_arg_value) -> None:
174174
if isinstance(downloadable, DownloadableRequirement):
175175
downloadable.download()
176176
else:
177-
downloadable.download_dataset()
177+
downloadable.download_dataset(test=test)
178178

179179

180180
cli.add_typer(build_all_sast_cli())

codesectools/datasets/BenchmarkJava/dataset.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
"""
77

88
import csv
9+
import random
910
from pathlib import Path
1011
from typing import Self
1112

@@ -99,12 +100,28 @@ def __eq__(self, other: str | Self) -> bool:
99100
else:
100101
return False
101102

102-
def download_files(self: Self) -> None:
103-
"""Download the dataset files from the official Git repository."""
103+
def download_files(self: Self, test: bool = False) -> None:
104+
"""Download the dataset files from the official Git repository.
105+
106+
Clones the BenchmarkJava repository and, if in test mode, prunes it to a smaller size.
107+
108+
Args:
109+
test: If True, reduce the number of test files for faster testing.
110+
111+
"""
104112
git.Repo.clone_from(
105113
"https://github.com/OWASP-Benchmark/BenchmarkJava.git", self.directory
106114
)
107115

116+
if test:
117+
testcodes = list(
118+
(
119+
self.directory / "src/main/java/org/owasp/benchmark/testcode"
120+
).iterdir()
121+
)
122+
for to_delete_testcode in random.sample(testcodes, k=len(testcodes) - 50):
123+
to_delete_testcode.unlink()
124+
108125
def load_dataset(self) -> list[TestCode]:
109126
"""Load the BenchmarkJava dataset from its source files.
110127

codesectools/datasets/CVEfixes/dataset.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ def __init__(self, lang: str | None = None) -> None:
4242
self.max_repo_size = 100e6
4343
super().__init__(lang)
4444

45-
def download_files(self: Self) -> None:
45+
def download_files(self: Self, test: bool = False) -> None:
4646
"""Copy the dataset files from the package data directory to the user cache."""
4747
self.directory.mkdir(exist_ok=True, parents=True)
4848
license_file = DATA_DIR / self.name / "LICENSE"

codesectools/datasets/core/dataset.py

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -94,16 +94,33 @@ def prompt_license_agreement(self) -> None:
9494
raise typer.Exit(code=1)
9595

9696
@abstractmethod
97-
def download_files(self) -> None:
98-
"""Download the raw dataset files."""
97+
def download_files(self, test: bool = False) -> None:
98+
"""Download the raw dataset files.
99+
100+
This method must be implemented by subclasses to define how the
101+
raw files for the dataset are obtained.
102+
103+
Args:
104+
test: If True, download a smaller subset of the dataset for testing.
105+
106+
"""
99107
pass
100108

101-
def download_dataset(self) -> None:
102-
"""Handle the full dataset download process, including license prompt and caching."""
109+
def download_dataset(self, test: bool = False) -> None:
110+
"""Handle the full dataset download process, including license prompt and caching.
111+
112+
This method orchestrates the download by first prompting for license
113+
agreement, then calling the `download_files` method, and finally creating
114+
a `.complete` file to mark the dataset as cached.
115+
116+
Args:
117+
test: If True, download a smaller subset of the dataset for testing.
118+
119+
"""
103120
self.prompt_license_agreement()
104121
with Progress() as progress:
105122
progress.add_task(f"Downloading [b]{self.name}[/b]...", total=None)
106-
self.download_files()
123+
self.download_files(test=test)
107124
(self.directory / ".complete").write_bytes(b"\x42")
108125
print(f"[b]{self.name}[/b] has been downloaded at {self.directory}.")
109126

codesectools/sasts/all/cli.py

Lines changed: 54 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,8 @@
2020
from codesectools.sasts import SASTS_ALL
2121
from codesectools.sasts.all.graphics import ProjectGraphics
2222
from codesectools.sasts.all.sast import AllSAST
23-
from codesectools.sasts.core.sast import PrebuiltSAST
23+
from codesectools.sasts.core.sast import PrebuiltBuildlessSAST, PrebuiltSAST
24+
from codesectools.utils import group_successive
2425

2526

2627
def build_cli() -> typer.Typer:
@@ -72,11 +73,11 @@ def analyze(
7273
),
7374
],
7475
# Additional options
75-
artifact_dir: Annotated[
76+
artifacts: Annotated[
7677
Path | None,
7778
typer.Option(
7879
help="Pre-built artifacts directory (for PrebuiltSAST only)",
79-
metavar="ARTIFACT_DIR",
80+
metavar="ARTIFACTS",
8081
),
8182
] = None,
8283
# Common NOT REQUIRED option
@@ -90,27 +91,28 @@ def analyze(
9091
) -> None:
9192
"""Run analysis on the current project with all available SAST tools."""
9293
for sast in all_sast.sasts_by_lang.get(lang, []):
93-
if isinstance(sast, PrebuiltSAST) and artifact_dir is None:
94-
print(f"{sast.name} required pre-built artifacts for analysis")
94+
if isinstance(sast, PrebuiltBuildlessSAST) and artifacts is None:
9595
print(
96-
"Please provide the directory with artifacts (with --artifact-dir) to include this tool"
96+
f"[i]{sast.name} can use pre-built artifacts ({sast.artefact_name} {sast.artefact_type}) for more accurate analysis"
9797
)
98+
print("[i]Use the flag --artifacts to provide the artifacts")
99+
elif isinstance(sast, PrebuiltSAST) and artifacts is None:
100+
print(
101+
f"[b]Skipping {sast.name} because it requires pre-built artifacts ({sast.artefact_name} {sast.artefact_type})"
102+
)
103+
print("[b]Use the flag --artifacts to provide the artifacts")
98104
continue
99105

100106
output_dir = sast.output_dir / Path.cwd().name
101107
if output_dir.is_dir():
102108
if overwrite:
103109
shutil.rmtree(output_dir)
104-
sast.run_analysis(
105-
lang, Path.cwd(), output_dir, artifact_dir=artifact_dir
106-
)
110+
sast.run_analysis(lang, Path.cwd(), output_dir, artifacts=artifacts)
107111
else:
108112
print(f"Found existing analysis result at {output_dir}")
109113
print("Use --overwrite to overwrite it")
110114
else:
111-
sast.run_analysis(
112-
lang, Path.cwd(), output_dir, artifact_dir=artifact_dir
113-
)
115+
sast.run_analysis(lang, Path.cwd(), output_dir, artifacts=artifacts)
114116

115117
@cli.command(help="Benchmark a dataset using all SAST tools.")
116118
def benchmark(
@@ -323,20 +325,43 @@ def report(
323325
defect_table.add_column("SAST", justify="center")
324326
defect_table.add_column("CWE", justify="center")
325327
defect_table.add_column("Message")
326-
for defect in sorted(set(defect_data["raw"]), key=lambda d: d.location[0]):
327-
if location := defect.location:
328-
start, end = location
329-
shortcut = Text(f"{start}", style=Style(link=f"#L{start}"))
328+
rows = []
329+
for defect in defect_data["raw"]:
330+
groups = group_successive(defect.lines)
331+
if groups:
332+
for group in groups:
333+
start, end = group[0], group[-1]
334+
shortcut = Text(f"{start}", style=Style(link=f"#L{start}"))
335+
cwe_link = (
336+
Text(
337+
f"CWE-{defect.cwe.id}",
338+
style=Style(
339+
link=f"https://cwe.mitre.org/data/definitions/{defect.cwe.id}.html"
340+
),
341+
)
342+
if defect.cwe.id != -1
343+
else "None"
344+
)
345+
rows.append(
346+
(start, shortcut, defect.sast, cwe_link, defect.message)
347+
)
330348
else:
331-
shortcut = "None"
332-
cwe_link = Text(
333-
f"CWE-{defect.cwe.id}",
334-
style=Style(
335-
link=f"https://cwe.mitre.org/data/definitions/{defect.cwe.id}.html"
336-
),
337-
)
338-
defect_table.add_row(shortcut, defect.sast, cwe_link, defect.message)
349+
cwe_link = (
350+
Text(
351+
f"CWE-{defect.cwe.id}",
352+
style=Style(
353+
link=f"https://cwe.mitre.org/data/definitions/{defect.cwe.id}.html"
354+
),
355+
)
356+
if defect.cwe.id != -1
357+
else "None"
358+
)
359+
rows.append(
360+
(float("inf"), "None", defect.sast, cwe_link, defect.message)
361+
)
339362

363+
for row in sorted(rows, key=lambda r: r[0]):
364+
defect_table.add_row(*row[1:])
340365
defect_page.print(defect_table)
341366

342367
# Syntax
@@ -352,7 +377,11 @@ def report(
352377
for location in defect_data["locations"]:
353378
sast, cwe, message, (start, end) = location
354379
for i in range(start, end + 1):
355-
text = f"<b>{sast}</b>: <i>{message} (CWE-{cwe.id})</i>"
380+
text = (
381+
f"<b>{sast}</b>: <i>{message} (CWE-{cwe.id})</i>"
382+
if cwe.id != -1
383+
else f"<b>{sast}</b>: <i>{message}</i>"
384+
)
356385
if highlights.get(i):
357386
highlights[i].add(text)
358387
else:

codesectools/sasts/all/parser.py

Lines changed: 10 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
from codesectools.sasts import SASTS_ALL
66
from codesectools.sasts.core.parser import AnalysisResult
7+
from codesectools.utils import group_successive
78

89
if TYPE_CHECKING:
910
from codesectools.sasts.core.sast import SAST
@@ -149,12 +150,10 @@ def stats_by_scores(self) -> dict:
149150

150151
defect_locations = {}
151152
for defect in defects:
152-
if any(defect.location):
153-
start, end = defect.location
154-
for line in range(start, end + 1):
155-
if not defect_locations.get(line):
156-
defect_locations[line] = []
157-
defect_locations[line].append(defect)
153+
for line in defect.lines:
154+
if not defect_locations.get(line):
155+
defect_locations[line] = []
156+
defect_locations[line].append(defect)
158157

159158
defects_same_location = 0
160159
defects_same_location_same_cwe = 0
@@ -202,12 +201,11 @@ def prepare_report_data(self) -> dict:
202201

203202
locations = []
204203
for defect in defects:
205-
if any(defect.location):
206-
start, end = defect.location
207-
if start and end:
208-
locations.append(
209-
(defect.sast, defect.cwe, defect.message, (start, end))
210-
)
204+
for group in group_successive(defect.lines):
205+
start, end = group[0], group[-1]
206+
locations.append(
207+
(defect.sast, defect.cwe, defect.message, (start, end))
208+
)
211209

212210
report["defects"][defect_file] = {
213211
"score": scores[defect_file]["score"],

codesectools/sasts/core/cli.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
GitRepoDatasetGraphics,
2424
ProjectGraphics,
2525
)
26-
from codesectools.sasts.core.sast import SAST, PrebuiltSAST
26+
from codesectools.sasts.core.sast import SAST, PrebuiltBuildlessSAST, PrebuiltSAST
2727

2828

2929
class CLIFactory:
@@ -139,17 +139,23 @@ def add_analyze(self: Self, help: str = "") -> None:
139139
140140
"""
141141
# PrebuiltSAST additional options
142-
if isinstance(self.sast, PrebuiltSAST):
143-
artifact_dir_default = typer.Option(
144-
help="Pre-built artifacts directory",
145-
metavar="ARTIFACT_DIR",
142+
if isinstance(self.sast, PrebuiltBuildlessSAST):
143+
artifacts_default = typer.Option(
144+
default=None,
145+
help=f"Pre-built artifacts ({self.sast.artefact_name} {self.sast.artefact_type}) for more accurate analysis",
146+
metavar="ARTIFACTS",
147+
)
148+
elif isinstance(self.sast, PrebuiltSAST):
149+
artifacts_default = typer.Option(
150+
help=f"Pre-built artifacts ({self.sast.artefact_name} {self.sast.artefact_type})",
151+
metavar="ARTIFACTS",
146152
)
147153
else:
148-
artifact_dir_default = typer.Option(
154+
artifacts_default = typer.Option(
149155
default=None,
150156
hidden=True,
151-
help="Pre-built artifacts directory (for PrebuiltSAST only)",
152-
metavar="ARTIFACT_DIR",
157+
help="Pre-built artifacts (for PrebuiltSAST only)",
158+
metavar="ARTIFACTS",
153159
)
154160

155161
@self.cli.command(help=help)
@@ -163,7 +169,7 @@ def analyze(
163169
),
164170
],
165171
# Additional REQUIRED options
166-
artifact_dir: Optional[Path] = artifact_dir_default,
172+
artifacts: Optional[Path] = artifacts_default,
167173
# Common NOT REQUIRED option
168174
overwrite: Annotated[
169175
bool,
@@ -177,23 +183,29 @@ def analyze(
177183
178184
Args:
179185
lang: The source code language to analyze.
180-
artifact_dir: The directory containing pre-built artifacts, required for PrebuiltSAST tools.
186+
artifacts: The path to pre-built artifacts, required for PrebuiltSAST tools.
181187
overwrite: If True, overwrite any existing analysis results for the project.
182188
183189
"""
190+
if isinstance(self.sast, PrebuiltBuildlessSAST) and artifacts is None:
191+
print(
192+
f"[i]{self.sast.name} can use pre-built artifacts ({self.sast.artefact_name} {self.sast.artefact_type}) for more accurate analysis"
193+
)
194+
print("[i]Use the flag --artifacts to provide the artifacts")
195+
184196
output_dir = self.sast.output_dir / Path.cwd().name
185197
if output_dir.is_dir():
186198
if overwrite:
187199
shutil.rmtree(output_dir)
188200
self.sast.run_analysis(
189-
lang, Path.cwd(), output_dir, artifact_dir=artifact_dir
201+
lang, Path.cwd(), output_dir, artifacts=artifacts
190202
)
191203
else:
192204
print(f"Found existing analysis result at {output_dir}")
193205
print("Use --overwrite to overwrite it")
194206
else:
195207
self.sast.run_analysis(
196-
lang, Path.cwd(), output_dir, artifact_dir=artifact_dir
208+
lang, Path.cwd(), output_dir, artifacts=artifacts
197209
)
198210

199211
def add_benchmark(self, help: str = "") -> None:

0 commit comments

Comments
 (0)