Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
dc4751e
Merge pull request #1 from GaabDevWeb/mysql_integration
GaabDevWeb Apr 12, 2026
56b79b0
Update kernel-status.svg - [Skip GitHub Action]
GaabDevWeb Apr 18, 2026
38a1542
chore: update README with banner image and clean up main.py whitespace
GaabDevWeb Apr 23, 2026
62e32b3
chore: replace PNG banner image with WEBP format in README and remove…
GaabDevWeb Apr 23, 2026
693581d
chore: remove compiled Python cache files from __pycache__ directories
GaabDevWeb Apr 23, 2026
07d5143
Refactor: New structure for SQL Table
gbdiniz Apr 24, 2026
c10c6cb
docs: overhaul README to enhance clarity and detail about KernelBot f…
GaabDevWeb Apr 24, 2026
6a1df1b
Merge pull request mysql-refactor
GaabDevWeb Apr 24, 2026
037f636
Refactor: Transition to MySQL backend and enhance documentation
GaabDevWeb Apr 24, 2026
d61cee0
Enhance retrieval policy and documentation
GaabDevWeb Apr 24, 2026
3a1003a
Refactor logging and enhance setup documentation
GaabDevWeb Apr 25, 2026
964a1ae
Update README.md to include a new banner and project status information
GaabDevWeb Apr 25, 2026
6fdde2a
Update README.md
GaabDevWeb Apr 25, 2026
6a3b331
Refactor README.md to enhance project status presentation and replace…
GaabDevWeb Apr 25, 2026
a324c54
Update README.md to adjust Spider-Man image size for better display
GaabDevWeb Apr 26, 2026
671b3ce
Adjust image width in README.md
GaabDevWeb Apr 26, 2026
273fa9b
Change h3 tag to strong and h3 for emphasis
GaabDevWeb Apr 26, 2026
aea7146
Refactor project structure and remove obsolete files
GaabDevWeb Apr 26, 2026
0ba0ad8
Merge branch 'develop' into mysql-refactor
GaabDevWeb Apr 26, 2026
21bd161
Merge pull request frommysql-refactor
GaabDevWeb Apr 26, 2026
5a471f1
Enhance project documentation and improve logging structure
GaabDevWeb Apr 26, 2026
124ae17
Merge pull request testes
GaabDevWeb Apr 26, 2026
d88cc20
chore: update .gitignore to exclude additional cache and build files
GaabDevWeb Apr 26, 2026
95d7267
Merge branch 'develop' of https://github.com/GaabDevWeb/KernelBot int…
GaabDevWeb Apr 26, 2026
d32f5de
Fix merge conflict in README.md
GaabDevWeb Apr 26, 2026
52045e7
chore: remove example environment file
GaabDevWeb Apr 26, 2026
966a061
Merge branch 'develop' of https://github.com/GaabDevWeb/KernelBot int…
GaabDevWeb Apr 26, 2026
01fa53e
Merge branch 'main' into develop
GaabDevWeb May 2, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 62 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
# =============================================================================
# ACL (KernelBot) — variáveis de ambiente
# Copie para .env e preencha os valores reais:
# copy .env.example .env (Windows PowerShell: Copy-Item .env.example .env)
# O ficheiro .env não deve ir para o Git (.gitignore).
# =============================================================================

# --- OpenRouter (obrigatório) ---
# Chave da API em https://openrouter.ai/keys — usada em todas as chamadas ao LLM.
OPENROUTER_API_KEY=

# --- MySQL (obrigatório para o índice BM25 e ingestão) ---
# Host do servidor MySQL (ex.: 127.0.0.1 ou localhost). Nao use 127.0.0.0 — costuma dar timeout.
DB_HOST=
# Porta TCP (padrão MySQL: 3306).
DB_PORT=3306
# Nome da base de dados onde está a tabela `knowledge`.
DB_NAME=
# Utilizador com permissão de leitura (e escrita se correres ingest).
DB_USER=
DB_PASSWORD=

# --- Contexto global (/content sem disciplina) ---
# geral = BM25 em todos os silos e merge por score.
# all = mesmo modo de nome alternativo aceite pelo código.
ACL_GLOBAL_CONTEXT=geral

# --- Contexto fixado (pin na sessão) ---
# Quantos turnos manter chunks fixados antes de voltar a buscar forte.
ACL_PINNED_MAX_TURNS=5
# Tamanho máximo em caracteres do texto fixado no pin.
ACL_PINNED_MAX_CHARS=24000
# Score normalizado abaixo do qual o pin é considerado "fraco" (legado / heurísticas de pin).
ACL_PINNED_WEAK_SCORE=0.4

# --- Política de retrieval (modo strict — ver engine/retrieval.py) ---
# Score BM25 bruto mínimo do melhor candidato; abaixo → hard stop (contexto insuficiente).
ACL_RETRIEVAL_MIN_SCORE=1.5
# Margem mínima entre 1.º e 2.º score; evita ambos muito próximos (ambiguidade).
ACL_RETRIEVAL_MIN_SCORE_MARGIN=0.15
# Cobertura mínima (0–1) dos termos informativos da query nos chunks escolhidos.
ACL_RETRIEVAL_MIN_COVERAGE=0.34
# Igual, mas termos "centrais" da query contam com peso 2× na métrica.
ACL_RETRIEVAL_MIN_COVERAGE_WEIGHTED=0.34
# Número mínimo de termos informativos na pergunta (strict).
ACL_RETRIEVAL_MIN_TERMS=2
# Quantos candidatos o SearchEngine devolve antes de build_decision (1–50).
ACL_RETRIEVAL_CANDIDATE_K=8
# Quantos chunks entram no prompt após passar os gates (1–20).
ACL_RETRIEVAL_TOP_K=4
# Máximo de chunks por fonte (slug) entre os selecionados — diversidade (1–10).
ACL_RETRIEVAL_MAX_CHUNKS_PER_SOURCE=2

# --- Logging ---
# text = linha legivel; json = uma linha JSON por evento ACL (grep/agregadores).
# ACL_LOG_FORMAT=text
# ACL_LOG_LEVEL=INFO
# ACL_LOG_LEVEL=DEBUG # inclui evento retrieval_gates_inputs (metricas antes dos cortes)

# --- Não configurável por .env (definido em core/config.py) ---
# Lista de modelos OpenRouter, URL base, timeout HTTP, textos de system/sticky vêm de ficheiros
# em core/systemPrompt/ — ajusta lá ou no código se precisares de outro stack de modelos.
46 changes: 0 additions & 46 deletions .github/workflows/main.yml

This file was deleted.

34 changes: 33 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,8 +1,40 @@
<<<<<<< HEAD
.env

.pytest_cache/

**/__pycache__/
*.py[cod]
*$py.class
*.so

cursor/
content/
.venv/
.venv_readme_check/
.ruff_cache/
.mypy_cache/
dist/
build/
*.egg-info/
scripts/
=======
.env

.pytest_cache/

**/__pycache__/
*.py[cod]
*$py.class
*.so

content/
.venv/
.venv_readme_check/
.ruff_cache/
.mypy_cache/
dist/
build/
*.egg-info/
scripts/

>>>>>>> 124ae17972dd35902cec901ff1819bd5be55891f
21 changes: 0 additions & 21 deletions LICENSE

This file was deleted.

75 changes: 20 additions & 55 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,55 +1,20 @@
# KernelBots (ACL)

Agente de contexto local com RAG (BM25) sobre Markdown em `content/`, interface em `templates/` e respostas em streaming via OpenRouter.

## Requisitos

- Python 3.10+
- Chave `OPENROUTER_API_KEY` no arquivo `.env` na raiz do repositório

## Instalação

```bash
pip install -r requirements.txt
```

## Executar

```bash
python main.py
```

Ou com Uvicorn:

```bash
uvicorn main:app --host 127.0.0.1 --port 8000
```

Abra `http://127.0.0.1:8000`.

## Estrutura

| Caminho | Função |
|--------|--------|
| `main.py` | Orquestração: logging, `SearchEngine`, watchdog, `create_app` |
| `core/` | Config (`Settings`), logging centralizado |
| `engine/` | BM25 (`SearchEngine`), `ContentWatcher`, `ContextManager`, `ChatProvider` |
| `api/` | Rotas FastAPI (`GET /`, `POST /chat`) |
| `app/` | `create_app()`, estado injetado em `app.state` |
| `content/` | Arquivos `.md` indexados |
| `templates/` | UI (Jinja2) |

## Testes

```bash
python -m pytest tests/ -v
```

## Logging

O projeto usa `logging` da biblioteca padrão com loggers prefixados `kernelbots.*` (ex.: `kernelbots.engine.search`, `kernelbots.api.chat`). Para logs estruturados em JSON no stdout, é possível estender `core/logging_config.py` com algo como `structlog` no mesmo ponto de configuração.

## Comandos no chat

- `/content …` — força uso da base local (com fallback para os primeiros chunks se não houver hit BM25).
- `/doc …` — injeta o conteúdo de `documentation.md` quando disponível no índice.
<div align="center">
<img src="frontend/assets/images/KernelBanner.webp" alt="Banner" width="100%" />
</div>

<br />

<div>
<img align="right" width="50%" src="frontend/assets/images/spiderMan.webp" alt="Work in Progress" />

<strong><h3>Em obras (ou quase isso)</h3></strong>

<p>O código já está performando mais que muito sênior por aí, mas a documentação ainda está sendo "indexada" pela minha produtividade.</p>

<p>
<strong>Se você é um recrutador:</strong> O código fala mais que mil palavras. Olhe a pasta <code>engine/</code>.<br />
<strong>Se você é um curioso:</strong> Volte em breve. Ou dê um <code>python main.py</code> e descubra.


<br clear="all" />
</div>
2 changes: 0 additions & 2 deletions SQL/instructions_for_creation.md

This file was deleted.

10 changes: 0 additions & 10 deletions SQL/schema.sql

This file was deleted.

11 changes: 7 additions & 4 deletions api/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,11 +81,10 @@ async def chat(request: Request) -> StreamingResponse:
log.info("🔄 Comando /reload recebido — reconstruindo índice BM25...")
services.search_engine.rebuild()
chunk_count = len(services.search_engine.chunks)
db_count = sum(1 for c in services.search_engine.chunks if c.get("source", "").startswith("db:"))
md_count = chunk_count - db_count
silo_count = len(services.search_engine.discipline_ids)
status = (
f"Índice reconstruído: {chunk_count} chunk(s) total "
f"({md_count} de arquivos .md + {db_count} do MySQL)."
f"({silo_count} silo(s) do MySQL)."
)
log.info("✅ /reload concluído — %s", status)

Expand All @@ -106,7 +105,11 @@ async def _reload_stream() -> AsyncGenerator[str, None]:
)

return StreamingResponse(
services.chat_provider.stream_response(built.messages, trace=built.trace),
services.chat_provider.stream_response(
built.messages,
trace=built.trace,
decision=built.decision,
),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
Expand Down
5 changes: 2 additions & 3 deletions app/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,7 @@ def create_app(services: AppServices) -> FastAPI:
async def lifespan(app: FastAPI):
log.info("🚀 ACL iniciado e pronto para receber requisições.")
yield
services.observer.stop()
services.observer.join()
log.info("🛑 Watchdog encerrado. Servidor finalizado.")
log.info("🛑 Servidor finalizado.")

app = FastAPI(title="ACL — Agente de Contexto Local", lifespan=lifespan)
app.state.services = services
Expand All @@ -41,4 +39,5 @@ async def lifespan(app: FastAPI):
app.mount("/src", StaticFiles(directory=str(src_dir)), name="src")

app.include_router(router)

return app
3 changes: 0 additions & 3 deletions app/state.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@

from dataclasses import dataclass

from watchdog.observers import Observer

from engine.chat_provider import ChatProvider
from engine.context import ContextManager
from engine.pinned_store import PinnedSessionStore
Expand All @@ -17,5 +15,4 @@ class AppServices:
search_engine: SearchEngine
context_manager: ContextManager
chat_provider: ChatProvider
observer: Observer
pinned_store: PinnedSessionStore
39 changes: 0 additions & 39 deletions content/doc/acl-overview.md

This file was deleted.

Loading