Skip to content

Commit 9ef6841

Browse files
committed
Merge branch 'Memory' into bug-bounty-mode
Made-with: Cursor
2 parents 942fc02 + fbb1192 commit 9ef6841

12 files changed

Lines changed: 962 additions & 8 deletions

File tree

pyproject.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,10 @@ dependencies = [
2323
]
2424

2525
[project.optional-dependencies]
26+
memory = [
27+
"pgvector>=0.4.0",
28+
"sentence-transformers>=2.2.0",
29+
]
2630
dev = [
2731
"pytest==8.3.5",
2832
"pytest-asyncio==0.25.3",

src/secnodeapi/infra/config.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,21 @@
22
import os
33

44

5+
def get_semantic_memory_enabled() -> bool:
6+
return os.environ.get("SECNODE_SEMANTIC_MEMORY_ENABLED", "false").lower() in (
7+
"true",
8+
"1",
9+
"yes",
10+
)
11+
12+
13+
def get_embedding_model() -> str:
14+
return os.environ.get(
15+
"SECNODE_EMBEDDING_MODEL",
16+
"sentence-transformers/all-MiniLM-L6-v2",
17+
)
18+
19+
520
def get_queue_backend() -> str:
621
return os.environ.get("SECNODE_QUEUE_BACKEND", "memory")
722

src/secnodeapi/memory/__init__.py

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""Memory subsystem."""
22
from typing import Optional
33

4+
from ..infra.config import get_embedding_model, get_postgres_dsn, get_semantic_memory_enabled
45
from .history.store import AttackHistoryStore
56
from .history.pg_store import PgAttackHistoryStore
67
from .metrics.store import SkillMetricsStore
@@ -43,4 +44,26 @@ def build_memory_service(
4344
raise ValueError(f"Unsupported backend: {backend}. Use 'memory' or 'postgres'.")
4445

4546

46-
__all__ = ["MemoryService", "build_memory_service"]
47+
def build_semantic_memory_service():
48+
"""Build SemanticMemoryService when enabled via SECNODE_SEMANTIC_MEMORY_ENABLED.
49+
50+
Returns:
51+
SemanticMemoryService if enabled and dependencies available, else None.
52+
"""
53+
if not get_semantic_memory_enabled():
54+
return None
55+
try:
56+
from .semantic import PgVectorSemanticBackend, SemanticMemoryService
57+
dsn = get_postgres_dsn()
58+
model = get_embedding_model()
59+
backend = PgVectorSemanticBackend(dsn=dsn, embedding_model=model)
60+
return SemanticMemoryService(backend=backend)
61+
except ImportError:
62+
return None
63+
64+
65+
__all__ = [
66+
"MemoryService",
67+
"build_memory_service",
68+
"build_semantic_memory_service",
69+
]
Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
"""Semantic memory subsystem for long-term cross-session learning."""
2+
from .contracts import (
3+
AuthFlow,
4+
BugBountyProgram,
5+
EndpointPattern,
6+
ExploitAttempt,
7+
ExploitChain,
8+
GlobalExploitPattern,
9+
SemanticMemoryRecord,
10+
SkillPerformance,
11+
StackFingerprint,
12+
TargetProfile,
13+
)
14+
from .backend import PgVectorSemanticBackend, SemanticMemoryBackend
15+
from .service import SemanticMemoryService
16+
17+
__all__ = [
18+
"AuthFlow",
19+
"BugBountyProgram",
20+
"EndpointPattern",
21+
"ExploitAttempt",
22+
"ExploitChain",
23+
"GlobalExploitPattern",
24+
"PgVectorSemanticBackend",
25+
"SemanticMemoryBackend",
26+
"SemanticMemoryRecord",
27+
"SemanticMemoryService",
28+
"SkillPerformance",
29+
"StackFingerprint",
30+
"TargetProfile",
31+
]
Lines changed: 210 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,210 @@
1+
"""Semantic memory backend abstraction and pgvector implementation."""
2+
from __future__ import annotations
3+
4+
import json
5+
import uuid
6+
from typing import Any, Dict, List, Optional, Protocol
7+
8+
from .contracts import SemanticMemoryRecord
9+
10+
# Embedding dimension for all-MiniLM-L6-v2
11+
EMBEDDING_DIM = 384
12+
13+
14+
def _get_embedding_function(model_name: str):
15+
"""Lazy-load sentence-transformers to avoid startup cost when disabled."""
16+
try:
17+
from sentence_transformers import SentenceTransformer
18+
return SentenceTransformer(model_name)
19+
except ImportError:
20+
return None
21+
22+
23+
class SemanticMemoryBackend(Protocol):
24+
"""Protocol for semantic memory storage and retrieval."""
25+
26+
async def add(self, record: SemanticMemoryRecord) -> str: ...
27+
async def search(
28+
self,
29+
query: str,
30+
memory_types: Optional[List[str]] = None,
31+
filters: Optional[Dict[str, Any]] = None,
32+
limit: int = 20,
33+
) -> List[SemanticMemoryRecord]: ...
34+
async def delete(self, record_id: str) -> bool: ...
35+
36+
37+
class PgVectorSemanticBackend:
38+
"""Postgres + pgvector implementation of semantic memory."""
39+
40+
def __init__(
41+
self,
42+
dsn: str,
43+
embedding_model: str = "sentence-transformers/all-MiniLM-L6-v2",
44+
embedding_dim: int = EMBEDDING_DIM,
45+
) -> None:
46+
self._dsn = dsn
47+
self._embedding_model_name = embedding_model
48+
self._embedding_dim = embedding_dim
49+
self._model = None
50+
self._pool = None
51+
52+
def _get_model(self):
53+
if self._model is None:
54+
self._model = _get_embedding_function(self._embedding_model_name)
55+
if self._model is None:
56+
raise ImportError(
57+
"sentence-transformers is required for semantic memory. "
58+
"Install with: uv sync --extra memory"
59+
)
60+
return self._model
61+
62+
def _embed(self, text: str) -> List[float]:
63+
model = self._get_model()
64+
return model.encode(text, convert_to_numpy=True).tolist()
65+
66+
async def _ensure_pool(self):
67+
if self._pool is not None:
68+
return
69+
import asyncpg
70+
from pgvector.asyncpg import register_vector
71+
72+
self._pool = await asyncpg.create_pool(self._dsn, min_size=1, max_size=5)
73+
74+
async with self._pool.acquire() as conn:
75+
await conn.execute("CREATE EXTENSION IF NOT EXISTS vector")
76+
await register_vector(conn)
77+
78+
await conn.execute(
79+
"""
80+
CREATE TABLE IF NOT EXISTS semantic_memories (
81+
id TEXT PRIMARY KEY,
82+
memory_type TEXT NOT NULL,
83+
content TEXT NOT NULL,
84+
embedding vector(384),
85+
metadata JSONB DEFAULT '{}',
86+
confidence REAL DEFAULT 0.5,
87+
verification_status TEXT DEFAULT 'unverified',
88+
created_at TIMESTAMP DEFAULT NOW(),
89+
expires_at TIMESTAMP
90+
)
91+
"""
92+
)
93+
await conn.execute(
94+
"""
95+
CREATE INDEX IF NOT EXISTS idx_semantic_memories_type
96+
ON semantic_memories(memory_type)
97+
"""
98+
)
99+
await conn.execute(
100+
"""
101+
CREATE INDEX IF NOT EXISTS idx_semantic_memories_created
102+
ON semantic_memories(created_at DESC)
103+
"""
104+
)
105+
106+
async def add(self, record: SemanticMemoryRecord) -> str:
107+
await self._ensure_pool()
108+
record_id = record.id or str(uuid.uuid4())
109+
embedding = self._embed(record.content)
110+
111+
async with self._pool.acquire() as conn:
112+
from pgvector.asyncpg import register_vector
113+
await register_vector(conn)
114+
115+
await conn.execute(
116+
"""
117+
INSERT INTO semantic_memories
118+
(id, memory_type, content, embedding, metadata, confidence,
119+
verification_status, created_at, expires_at)
120+
VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9)
121+
""",
122+
record_id,
123+
record.memory_type,
124+
record.content,
125+
embedding,
126+
json.dumps(record.metadata),
127+
record.confidence,
128+
record.verification_status,
129+
record.created_at,
130+
record.expires_at,
131+
)
132+
return record_id
133+
134+
async def search(
135+
self,
136+
query: str,
137+
memory_types: Optional[List[str]] = None,
138+
filters: Optional[Dict[str, Any]] = None,
139+
limit: int = 20,
140+
) -> List[SemanticMemoryRecord]:
141+
await self._ensure_pool()
142+
query_embedding = self._embed(query)
143+
144+
conditions = ["(expires_at IS NULL OR expires_at > NOW())"]
145+
params: List[Any] = [query_embedding]
146+
idx = 2
147+
148+
if memory_types:
149+
placeholders = ", ".join(f"${idx + i}" for i in range(len(memory_types)))
150+
conditions.append(f"memory_type IN ({placeholders})")
151+
params.extend(memory_types)
152+
idx += len(memory_types)
153+
154+
if filters:
155+
for key, value in filters.items():
156+
safe_key = key.replace("'", "''")
157+
if isinstance(value, str):
158+
conditions.append(f"metadata->>'{safe_key}' = ${idx}")
159+
elif isinstance(value, (int, float)):
160+
conditions.append(f"(metadata->>'{safe_key}')::numeric = ${idx}")
161+
params.append(value)
162+
idx += 1
163+
164+
params.append(limit)
165+
where_clause = " AND ".join(conditions)
166+
limit_param = f"${idx}"
167+
168+
async with self._pool.acquire() as conn:
169+
from pgvector.asyncpg import register_vector
170+
await register_vector(conn)
171+
172+
rows = await conn.fetch(
173+
f"""
174+
SELECT id, memory_type, content, metadata, confidence,
175+
verification_status, created_at, expires_at
176+
FROM semantic_memories
177+
WHERE {where_clause}
178+
ORDER BY embedding <=> $1
179+
LIMIT {limit_param}
180+
""",
181+
*params,
182+
)
183+
184+
return [
185+
SemanticMemoryRecord(
186+
id=r["id"],
187+
memory_type=r["memory_type"],
188+
content=r["content"],
189+
metadata=r["metadata"] or {},
190+
confidence=r["confidence"],
191+
verification_status=r["verification_status"],
192+
created_at=r["created_at"],
193+
expires_at=r["expires_at"],
194+
)
195+
for r in rows
196+
]
197+
198+
async def delete(self, record_id: str) -> bool:
199+
await self._ensure_pool()
200+
async with self._pool.acquire() as conn:
201+
result = await conn.execute(
202+
"DELETE FROM semantic_memories WHERE id = $1",
203+
record_id,
204+
)
205+
return result == "DELETE 1"
206+
207+
async def close(self) -> None:
208+
if self._pool:
209+
await self._pool.close()
210+
self._pool = None

0 commit comments

Comments
 (0)