Skip to content

Commit d25f277

Browse files
authored
Merge pull request #6 from OPPIDA/fix/sast-various
2 parents aba3126 + 5dd9f8f commit d25f277

13 files changed

Lines changed: 85 additions & 74 deletions

File tree

codesectools/datasets/core/dataset.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,6 @@
99
from __future__ import annotations
1010

1111
from abc import ABC, abstractmethod
12-
from pathlib import Path
1312
from typing import TYPE_CHECKING
1413

1514
import git
@@ -22,6 +21,7 @@
2221
from codesectools.utils import USER_CACHE_DIR
2322

2423
if TYPE_CHECKING:
24+
from pathlib import Path
2525
from typing import Self
2626

2727
from codesectools.sasts.core.parser import AnalysisResult, Defect
@@ -296,7 +296,7 @@ def validate(self, analysis_result: AnalysisResult) -> FileDatasetData:
296296
if not defect.cwe or defect.cwe.id == -1:
297297
continue
298298

299-
file_cwe_pair = (Path(defect.file).name, defect.cwe) # TODO: USE FULL PATH
299+
file_cwe_pair = (defect.filename, defect.cwe)
300300
if file_cwe_pair not in unique_reported_defects:
301301
unique_reported_defects[file_cwe_pair] = defect
302302

@@ -563,18 +563,18 @@ def validate(self, analysis_results: list[AnalysisResult]) -> GitRepoDatasetData
563563

564564
# 1. Process reported defects to get unique (file, cwe) pairs
565565
# and keep one original Defect object for each to retain metadata.
566-
unique_reported_defects: dict[tuple[str, CWE], Defect] = {}
566+
unique_reported_defects: dict[tuple[Path, CWE], Defect] = {}
567567
for defect in analysis_result.defects:
568568
if not defect.cwe or defect.cwe.id == -1:
569569
continue
570570

571-
file_cwe_pair = (defect.file_path, defect.cwe)
571+
file_cwe_pair = (defect.filepath, defect.cwe)
572572
if file_cwe_pair not in unique_reported_defects:
573573
unique_reported_defects[file_cwe_pair] = defect
574574

575575
# 2. Classify unique reported defects as TP or FP.
576-
tp_defects_map: dict[tuple[str, CWE], Defect] = {}
577-
fp_defects_map: dict[tuple[str, CWE], Defect] = {}
576+
tp_defects_map: dict[tuple[Path, CWE], Defect] = {}
577+
fp_defects_map: dict[tuple[Path, CWE], Defect] = {}
578578

579579
if repo.has_vuln:
580580
for (filename, cwe), defect in unique_reported_defects.items():

codesectools/sasts/all/cli.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -157,13 +157,21 @@ def list_() -> None:
157157
table.add_row(
158158
dataset_full_name,
159159
"Dataset",
160-
", ".join(f"[b]{sast.name}[/b]" for sast in all_sast.sasts),
160+
", ".join(
161+
f"[b]{sast.name}[/b]"
162+
for sast in all_sast.sasts
163+
if dataset_full_name in sast.list_results(dataset=True)
164+
),
161165
)
162166
for project in all_sast.list_results(project=True):
163167
table.add_row(
164168
project,
165169
"Project",
166-
", ".join(f"[b]{sast.name}[/b]" for sast in all_sast.sasts),
170+
", ".join(
171+
f"[b]{sast.name}[/b]"
172+
for sast in all_sast.sasts
173+
if project in sast.list_results(project=True)
174+
),
167175
)
168176

169177
print(table)

codesectools/sasts/all/parser.py

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -76,11 +76,11 @@ def stats_by_files(self) -> dict:
7676
"""Calculate statistics on defects, grouped by file."""
7777
stats = {}
7878
for defect in self.defects:
79-
if defect.file_path not in stats.keys():
80-
stats[defect.file_path] = {"count": 1, "sasts": [defect.sast]}
79+
if defect.filepath_str not in stats.keys():
80+
stats[defect.filepath_str] = {"count": 1, "sasts": [defect.sast]}
8181
else:
82-
stats[defect.file_path]["sasts"].append(defect.sast)
83-
stats[defect.file_path]["count"] += 1
82+
stats[defect.filepath_str]["sasts"].append(defect.sast)
83+
stats[defect.filepath_str]["count"] += 1
8484

8585
return stats
8686

@@ -117,13 +117,13 @@ def stats_by_cwes(self) -> dict:
117117
if defect.cwe not in stats:
118118
stats[defect.cwe] = {
119119
"count": 1,
120-
"files": [defect.file_path],
120+
"files": [defect.filepath_str],
121121
"sast_counts": {defect.sast: 1},
122122
}
123123
else:
124124
stats[defect.cwe]["count"] += 1
125-
if defect.file_path not in stats[defect.cwe]["files"]:
126-
stats[defect.cwe]["files"].append(defect.file_path)
125+
if defect.filepath_str not in stats[defect.cwe]["files"]:
126+
stats[defect.cwe]["files"].append(defect.filepath_str)
127127
stats[defect.cwe]["sast_counts"][defect.sast] = (
128128
stats[defect.cwe]["sast_counts"].get(defect.sast, 0) + 1
129129
)
@@ -133,9 +133,9 @@ def stats_by_scores(self) -> dict:
133133
"""Calculate a risk score for each file based on defect data."""
134134
defect_files = {}
135135
for defect in self.defects:
136-
if defect.file_path not in defect_files:
137-
defect_files[defect.file_path] = []
138-
defect_files[defect.file_path].append(defect)
136+
if defect.filepath_str not in defect_files:
137+
defect_files[defect.filepath_str] = []
138+
defect_files[defect.filepath_str].append(defect)
139139

140140
stats = {}
141141
for defect_file, defects in defect_files.items():
@@ -192,9 +192,9 @@ def prepare_report_data(self) -> dict:
192192

193193
defect_files = {}
194194
for defect in self.defects:
195-
if defect.file_path not in defect_files:
196-
defect_files[defect.file_path] = []
197-
defect_files[defect.file_path].append(defect)
195+
if defect.filepath_str not in defect_files:
196+
defect_files[defect.filepath_str] = []
197+
defect_files[defect.filepath_str].append(defect)
198198

199199
for defect_file, defects in defect_files.items():
200200
for k, v in scores[defect_file]["score"].items():
@@ -211,7 +211,7 @@ def prepare_report_data(self) -> dict:
211211

212212
report["defects"][defect_file] = {
213213
"score": scores[defect_file]["score"],
214-
"source_path": str(self.source_path / defect.file),
214+
"source_path": str(self.source_path / defect.filepath),
215215
"locations": locations,
216216
"raw": defects,
217217
}

codesectools/sasts/all/sast.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ def list_results(
6262
sast.list_results(project=project, dataset=dataset, limit=limit)
6363
)
6464
else:
65-
output_dirs &= set(
65+
output_dirs |= set(
6666
sast.list_results(project=project, dataset=dataset, limit=limit)
6767
)
6868
return output_dirs

codesectools/sasts/core/parser.py

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ class Defect:
1818
1919
Attributes:
2020
sast (str): The name of the SAST tool that reported the defect.
21-
file (Path): The path to the file where the defect was found.
22-
file_path (str): The string representation of the file path.
21+
filepath (Path): The path to the file where the defect was found.
22+
filepath_str (str): The string representation of the file path.
23+
filename (str): The name of the file.
2324
checker (str): The name of the checker or rule that reported the defect.
2425
category (str): The category of the checker (e.g., security, performance).
2526
cwe (CWE): The CWE associated with the defect.
@@ -33,7 +34,7 @@ class Defect:
3334

3435
def __init__(
3536
self,
36-
file: Path,
37+
filepath: Path,
3738
checker: str,
3839
category: str,
3940
cwe: CWE,
@@ -44,7 +45,7 @@ def __init__(
4445
"""Initialize a Defect instance.
4546
4647
Args:
47-
file: The file path of the defect.
48+
filepath: The file path of the defect.
4849
checker: The name of the rule/checker.
4950
category: The category of the checker.
5051
cwe: The CWE associated with the defect.
@@ -53,8 +54,9 @@ def __init__(
5354
data: Raw data from the SAST tool for this defect.
5455
5556
"""
56-
self.file = file
57-
self.file_path = str(file)
57+
self.filepath = filepath
58+
self.filepath_str = str(filepath)
59+
self.filename = filepath.name
5860
self.checker = checker
5961
self.category = category
6062
self.cwe = cwe
@@ -206,9 +208,9 @@ def stats_by_checkers(self) -> dict:
206208
stats = {}
207209
for defect in self.defects:
208210
if defect.checker not in stats.keys():
209-
stats[defect.checker] = {"count": 1, "files": [defect.file_path]}
211+
stats[defect.checker] = {"count": 1, "files": [defect.filepath_str]}
210212
else:
211-
stats[defect.checker]["files"].append(defect.file_path)
213+
stats[defect.checker]["files"].append(defect.filepath_str)
212214
stats[defect.checker]["count"] += 1
213215

214216
return stats
@@ -250,11 +252,11 @@ def stats_by_files(self) -> dict:
250252
"""
251253
stats = {}
252254
for defect in self.defects:
253-
if defect.file_path not in stats.keys():
254-
stats[defect.file_path] = {"count": 1, "checkers": [defect.checker]}
255+
if defect.filepath_str not in stats.keys():
256+
stats[defect.filepath_str] = {"count": 1, "checkers": [defect.checker]}
255257
else:
256-
stats[defect.file_path]["checkers"].append(defect.checker)
257-
stats[defect.file_path]["count"] += 1
258+
stats[defect.filepath_str]["checkers"].append(defect.checker)
259+
stats[defect.filepath_str]["count"] += 1
258260

259261
return stats
260262

@@ -269,9 +271,9 @@ def stats_by_cwes(self) -> dict:
269271
stats = {}
270272
for defect in self.defects:
271273
if defect.cwe not in stats.keys():
272-
stats[defect.cwe] = {"count": 1, "files": [defect.file_path]}
274+
stats[defect.cwe] = {"count": 1, "files": [defect.filepath_str]}
273275
else:
274-
stats[defect.cwe]["files"].append(defect.file_path)
276+
stats[defect.cwe]["files"].append(defect.filepath_str)
275277
stats[defect.cwe]["count"] += 1
276278

277279
return stats

codesectools/sasts/core/sast/__init__.py

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -187,17 +187,19 @@ def save_results(self, project_dir: Path, output_dir: Path, extra: dict) -> None
187187
parent_dir = path_from_root.parent
188188
filename = path_from_root.name
189189
if "*" not in filename:
190-
file_path = project_dir / parent_dir / filename
191-
if file_path.is_file():
192-
shutil.copy2(file_path, output_dir / filename)
190+
filepath = project_dir / parent_dir / filename
191+
if filepath.is_file():
192+
if not filepath == output_dir / filename:
193+
shutil.copy2(filepath, output_dir / filename)
193194
else:
194195
if required:
195196
missing_files.append(filename)
196197
else:
197-
file_paths = (project_dir / parent_dir).glob(filename)
198-
if file_paths:
199-
for file_path in file_paths:
200-
shutil.copy2(file_path, output_dir / file_path.name)
198+
filepaths = (project_dir / parent_dir).glob(filename)
199+
if filepaths:
200+
for filepath in filepaths:
201+
if not filepath == output_dir / filename:
202+
shutil.copy2(filepath, output_dir / filepath.name)
201203
else:
202204
if required:
203205
missing_files.append(filename)

codesectools/sasts/tools/Bearer/parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def __init__(self, defect_data: dict, severity: str) -> None:
2828
2929
"""
3030
super().__init__(
31-
file=Path(defect_data["filename"]),
31+
filepath=Path(defect_data["filename"]),
3232
checker=defect_data["id"],
3333
category=severity,
3434
cwe=CWEs.from_id(int(defect_data["cwe_ids"][0])),

codesectools/sasts/tools/Coverity/parser.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ def __init__(self, defect_data: dict) -> None:
7878
7979
"""
8080
super().__init__(
81-
file=Path(defect_data["file"]),
81+
filepath=Path(defect_data["file"]),
8282
checker=defect_data["checker"],
8383
category=None,
8484
cwe=CWEs.from_id(TYPE_TO_CWE.get(defect_data["type"], -1)),
@@ -158,7 +158,7 @@ def __init__(
158158

159159
self.time = int(self.metrics["time"])
160160

161-
self.files = list(map(lambda line: Path(line).name, captured_list.splitlines()))
161+
self.files = list(map(lambda line: str(Path(line)), captured_list.splitlines()))
162162

163163
file_count = 0
164164
for lang, pattern in LANGUAGES.items():
@@ -202,16 +202,16 @@ def load_from_output_dir(cls, output_dir: Path) -> Self:
202202
cmdout = json.load((output_dir / "cstools_output.json").open())
203203

204204
# Analysis metrics
205-
file_path = output_dir / "ANALYSIS.metrics.xml"
206-
if file_path.is_file():
207-
analysis_data = xmltodict.parse(file_path.open("rb"))
205+
filepath = output_dir / "ANALYSIS.metrics.xml"
206+
if filepath.is_file():
207+
analysis_data = xmltodict.parse(filepath.open("rb"))
208208
else:
209209
raise MissingFile(["ANALYSIS.metrics.xml"])
210210

211211
# Config
212-
file_path = output_dir / "coverity.yaml"
213-
if file_path.is_file():
214-
config_data = yaml.load(file_path.open("r"), Loader=yaml.Loader)
212+
filepath = output_dir / "coverity.yaml"
213+
if filepath.is_file():
214+
config_data = yaml.load(filepath.open("r"), Loader=yaml.Loader)
215215
else:
216216
config_data = None
217217

codesectools/sasts/tools/SemgrepCE/parser.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ def __init__(self, defect_data: dict) -> None:
3737
3838
"""
3939
super().__init__(
40-
file=Path(defect_data["path"]),
40+
filepath=Path(defect_data["path"]),
4141
checker=defect_data["check_id"].split(".")[-1],
4242
category=defect_data["extra"]["metadata"].get("impact", "NONE"),
4343
cwe=CWEs.from_string(defect_data["extra"]["metadata"].get("cwe", [""])[0]),

codesectools/sasts/tools/SnykCode/parser.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class SnykCodeIssue(Defect):
2424

2525
def __init__(
2626
self,
27-
file: Path,
27+
filepath: Path,
2828
checker: str,
2929
category: str,
3030
cwe: CWE,
@@ -35,7 +35,7 @@ def __init__(
3535
"""Initialize a SnykCodeIssue instance.
3636
3737
Args:
38-
file: The file path of the defect.
38+
filepath: The file path of the defect.
3939
checker: The name of the rule/checker.
4040
category: The category of the checker.
4141
cwe: The CWE associated with the defect.
@@ -44,7 +44,7 @@ def __init__(
4444
data: Raw data from the SAST tool for this defect.
4545
4646
"""
47-
super().__init__(file, checker, category, cwe, message, location, data)
47+
super().__init__(filepath, checker, category, cwe, message, location, data)
4848

4949

5050
class SnykCodeAnalysisResult(AnalysisResult):
@@ -85,7 +85,7 @@ def __init__(self, output_dir: Path, result_data: dict, cmdout: dict) -> None:
8585
continue
8686

8787
defect = SnykCodeIssue(
88-
file=Path(
88+
filepath=Path(
8989
result["locations"][0]["physicalLocation"]["artifactLocation"][
9090
"uri"
9191
]
@@ -112,7 +112,7 @@ def __init__(self, output_dir: Path, result_data: dict, cmdout: dict) -> None:
112112
)
113113
self.defects.append(defect)
114114

115-
self.files = list(set(d.file for d in self.defects))
115+
self.files = list(set(d.filepath_str for d in self.defects))
116116

117117
@classmethod
118118
def load_from_output_dir(cls, output_dir: Path) -> Self:

0 commit comments

Comments
 (0)