Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
85 changes: 71 additions & 14 deletions python/resolver.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,52 @@ def _add_format_metadata(result: dict, data: dict) -> None:
result[field] = data[field]


def _substitute_url_template(template: str, child_data: dict, captured_input: str) -> str:
"""Substitute {var} placeholders in a URL template.

Two substitution modes coexist:

1. Explicit variables — a `variables` dict on the child's data, mapping
variable name to `{"extract": "<regex>"}`. The extract regex is matched
against the captured input and capture group 1 (or group 0 if no
groups) supplies the variable's value. Example: CWE's `{num}` is
extracted from "CWE-79" via `^CWE-(\\d+)$` -> "79".

2. Implicit `{id}` — defaults to the captured input itself unless an
explicit `id` variable was already defined. Example: CVE's
`{id}` in "https://www.cve.org/CVERecord?id={id}" gets "CVE-2021-44228".

The two modes are layered: implicit-{id} fills in only if not already
set explicitly. Unrecognized {placeholders} are left as-is so they're
visible in output rather than silently swallowed.
"""
if "{" not in template:
return template

variables: dict[str, str] = {}

# Mode 1: explicit variables
for var_name, var_def in (child_data.get("variables") or {}).items():
extract_regex = var_def.get("extract") if isinstance(var_def, dict) else None
if not extract_regex:
continue
try:
m = re.match(extract_regex, captured_input)
except re.error:
continue
if m:
variables[var_name] = m.group(1) if m.groups() else m.group(0)

# Mode 2: implicit {id} default
variables.setdefault("id", captured_input)

# Apply substitution
url = template
for name, value in variables.items():
url = url.replace("{" + name + "}", value)
return url


def resolve(store: Store, secid_query: str, registry_dirs: list[str] = None) -> dict:
"""Resolve a SecID string. Returns the API response envelope."""
secid_query = secid_query.strip()
Expand Down Expand Up @@ -231,13 +277,21 @@ def _build_node_result(node: dict, subpath: Optional[str], version: Optional[str
result = {"secid": secid}
if child.get("weight"):
result["weight"] = child["weight"]
if child_data.get("url"):
result["url"] = child_data["url"]
_add_format_metadata(result, child_data)
result["data"] = {
"description": child.get("description", ""),
**{k: v for k, v in child_data.items() if k != "url"},
}
url_template = child_data.get("url")
if url_template:
# URL-bearing result: substitute {var} placeholders;
# do NOT include `data` block (canonical contract).
result["url"] = _substitute_url_template(url_template, child_data, subpath)
_add_format_metadata(result, child_data)
else:
# Description-only result: include `data` block
# with descriptive context (variables is internal,
# not exposed in the public response).
_add_format_metadata(result, child_data)
result["data"] = {
"description": child.get("description", ""),
**{k: v for k, v in child_data.items() if k not in ("url", "variables")},
}
return result
except re.error:
continue
Expand All @@ -253,13 +307,16 @@ def _build_node_result(node: dict, subpath: Optional[str], version: Optional[str
result = {"secid": secid}
if child.get("weight"):
result["weight"] = child["weight"]
if child_data.get("url"):
result["url"] = child_data["url"]
_add_format_metadata(result, child_data)
result["data"] = {
"description": child.get("description", ""),
**{k: v for k, v in child_data.items() if k != "url"},
}
url_template = child_data.get("url")
if url_template:
result["url"] = _substitute_url_template(url_template, child_data, version)
_add_format_metadata(result, child_data)
else:
_add_format_metadata(result, child_data)
result["data"] = {
"description": child.get("description", ""),
**{k: v for k, v in child_data.items() if k not in ("url", "variables")},
}
return result
except re.error:
continue
Expand Down
89 changes: 89 additions & 0 deletions python/test_smoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,95 @@ def test_secid_types_single_source():
)


# ---------------------------------------------------------------------------
# URL template substitution (added in Phase 2.5a)
# ---------------------------------------------------------------------------


def test_substitute_template_no_placeholders():
"""Template without {} placeholders is returned verbatim."""
from resolver import _substitute_url_template
assert _substitute_url_template("https://example.com/static", {}, "anything") == "https://example.com/static"


def test_substitute_implicit_id():
"""{id} defaults to the whole captured input when no explicit variable is defined.
Tests the CVE case: pattern is '^CVE-\\d{4}-\\d{4,}$', URL is '...?id={id}'.
"""
from resolver import _substitute_url_template
url = _substitute_url_template(
"https://www.cve.org/CVERecord?id={id}",
{},
"CVE-2021-44228",
)
assert url == "https://www.cve.org/CVERecord?id=CVE-2021-44228"


def test_substitute_explicit_variable():
"""Explicit {num} via variables.num.extract regex.
Tests the CWE case: extract '^CWE-(\\d+)$' against 'CWE-79' yields '79'."""
from resolver import _substitute_url_template
child_data = {
"variables": {
"num": {"extract": r"^CWE-(\d+)$", "description": "Numeric CWE ID"}
}
}
url = _substitute_url_template(
"https://cwe.mitre.org/data/definitions/{num}.html",
child_data,
"CWE-79",
)
assert url == "https://cwe.mitre.org/data/definitions/79.html"


def test_substitute_multiple_variables():
"""ATT&CK sub-techniques use {parent} and {sub} from two extract regexes."""
from resolver import _substitute_url_template
child_data = {
"variables": {
"parent": {"extract": r"^(T\d{4})\.\d{3}$"},
"sub": {"extract": r"^T\d{4}\.(\d{3})$"},
}
}
url = _substitute_url_template(
"https://attack.mitre.org/techniques/{parent}/{sub}/",
child_data,
"T1059.003",
)
assert url == "https://attack.mitre.org/techniques/T1059/003/"


def test_substitute_implicit_id_with_explicit_others():
"""Implicit {id} should still default even when explicit variables exist
for other placeholders. Layered, not mutually exclusive."""
from resolver import _substitute_url_template
child_data = {
"variables": {
"num": {"extract": r"^CAPEC-(\d+)$"},
}
}
# If a template used both {id} and {num}, both should be filled
url = _substitute_url_template(
"https://example.com/{id}-num{num}",
child_data,
"CAPEC-66",
)
assert url == "https://example.com/CAPEC-66-num66"


def test_substitute_unrecognized_placeholder_left_visible():
"""Unknown {placeholders} are left as-is so they're visible in output
rather than silently swallowed — easier to diagnose registry bugs."""
from resolver import _substitute_url_template
url = _substitute_url_template(
"https://example.com/{unknown}/{id}",
{},
"test123",
)
# {unknown} stays; {id} is implicit-default
assert url == "https://example.com/{unknown}/test123"


# ---------------------------------------------------------------------------
# resolve() basic invariants
# ---------------------------------------------------------------------------
Expand Down
Loading