diff --git a/.env.example b/.env.example index 8898b64e..02da5b63 100644 --- a/.env.example +++ b/.env.example @@ -6,7 +6,7 @@ NODE_ENV=development # EdgeOps Console static embed (npm run build:console → dev/console/build) EDGEOPS_CONSOLE_PATH=dev/console/build # must be absolute path -EDGEOPS_CONSOLE_VERSION=v1.0.3 +EDGEOPS_CONSOLE_VERSION=v1.0.8 # EDGEOPS_CONSOLE_REPO=https://github.com/Datasance/edgeops-console # EDGEOPS_CONSOLE_FLAVOR=datasance diff --git a/.github/actions/set-build-env/action.yml b/.github/actions/set-build-env/action.yml index 7c831f55..5d0cfee2 100644 --- a/.github/actions/set-build-env/action.yml +++ b/.github/actions/set-build-env/action.yml @@ -8,7 +8,7 @@ runs: shell: bash run: | VERSION="${{ env.EDGEOPS_CONSOLE_VERSION }}" - if [ -z "$VERSION" ]; then VERSION="1.0.3"; fi + if [ -z "$VERSION" ]; then VERSION="1.0.8"; fi echo "EDGEOPS_CONSOLE_VERSION=$VERSION" >> "${GITHUB_ENV}" REPO="${{ env.EDGEOPS_CONSOLE_REPO }}" diff --git a/CHANGELOG.md b/CHANGELOG.md index 3bb86a1a..480ec2ed 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog -## [v3.8.0] - 2026-06-17 + +## [v3.8.0] - 2026-07-03 Controller v3.8 is a **greenfield** release aligned with **Edgelet**. There is **no upgrade path** from v3.7: use a fresh database and redeploy Controller + Edgelet together. @@ -42,7 +43,7 @@ Controller v3.8 is a **greenfield** release aligned with **Edgelet**. There is * #### WebSocket exec — multi-session - **Microservice exec REST removed** — `POST/DELETE /api/v3/microservices/:uuid/exec` and `…/system/:uuid/exec` no longer exist. Open exec with **direct WebSocket** only: `WS /api/v3/microservices/exec/:uuid` (or `…/system/exec/:uuid`). -- **3 concurrent exec sessions** per microservice (was 1 user exec WS per MS). +- **5 concurrent exec sessions** per microservice (was 1 user exec WS per MS). - **Per-session lifecycle** — closing one exec session deletes only that session row only (no microservice-level exec flag). - **`execEnabled` removed** — dropped `microservices.exec_enabled` column and agent MS list field; exec attach is poll-driven only (`GET /agent/exec/sessions`). - **Agent exec discovery** — new `GET /api/v3/agent/exec/sessions` when change tracking reports `execSessions: true`. @@ -108,12 +109,31 @@ Controller v3.8 is a **greenfield** release aligned with **Edgelet**. There is * - **K8s control plane:** hub **`iofog-router`** ConfigMap patches serialized via DB lock; K8s Service create/update/delete with LoadBalancer watch timeout. - **`service-bridge-config.js`** — full recompute of service-derived TCP bridge config per fog on reconcile (preserves router base config). - **SQLite single-node production hardening** — WAL + `busy_timeout` pragmas, reconcile task claim retry on `SQLITE_BUSY`, staggered startup for reconcile-heavy background jobs (`settings.jobStartupDelaySeconds`). -- **WebSocket exec & log session hardening** — quotas (**3 exec** / 3 log WS per resource), per-session exec lifecycle, 60s/120s pending timeouts, 8h exec max, 30s graceful drain, OTEL metrics, HA AMQP fail-fast, integration tests, swagger WS protocol docs, operator guide (`docs/operations/ws-sessions.md`). -- **Multi exec sessions** — `GET /api/v3/agent/exec/sessions`; agent exec WS `…/agent/exec/microservice/:uuid/:sessionId`; user ACTIVATION with `sessionId`; `MicroserviceExecSessions` table; `execMaxConcurrentPerResource` config (default **3**). +- **WebSocket exec & log session hardening** — quotas (**5 exec** / 5 log WS per resource), per-session exec lifecycle, 60s/120s pending timeouts, 8h exec max, 30s graceful drain, OTEL metrics, HA AMQP fail-fast, integration tests, swagger WS protocol docs, operator guide (`docs/operations/ws-sessions.md`). +- **Multi exec sessions** — `GET /api/v3/agent/exec/sessions`; agent exec WS `…/agent/exec/microservice/:uuid/:sessionId`; user ACTIVATION with `sessionId`; `MicroserviceExecSessions` table; `execMaxConcurrentPerResource` config (default **5**). - **WebSocket relay production** — unified **`WsRelayTransport`** abstraction; cross-replica exec/log relay backend selected at startup by **`nats.enabled`** (`NATS_ENABLED`): **AMQP** router pool (8 connections per replica, overflow recovery, sendable gating) when `false`, **NATS Core** pub/sub on platform hub (`controller-relay` account) when `true`. Fail-fast activation on both transports; log backpressure drops `LOG_LINE` under pressure. Config: `server.webSocket.relay.amqp.*`, `server.webSocket.relay.nats.*`. No new relay env var; HA swagger/docs updated per R112. +- **ransaction safety** — unified **`runInTransaction()`** write path for API, jobs, and WebSocket cleanup; **`fakeTransaction`** and **`bypassQueue`** removed; **`ReconcileOutbox`** transactional outbox with background drainer; SQLite priority write queue (`interactive` > `background`); mysql/postgres reconcile task claims use **`FOR UPDATE SKIP LOCKED`**; OTEL DB metrics and ops runbook (`docs/operations/database-transactions.md`). **Breaking: internal only** — no agent wire or public REST shape changes. +- **— pre-close transaction audit** — fixes SQLite hangs from nested `generateTransaction` (`certificate-service` → `SecretService` tx propagation), JTI cleanup job queue bypass, OAuth interaction OIDC reads outside tx, external-mode user IdP HTTP outside tx, service platform LoadBalancer watch outside long tx; extended grep gates and unit tests. threads optional `transaction` through `cert.js` `loadCA` / `getCAFromK8sSecret` / `getCAFromInput` so fog platform reconcile no longer deadlocks on SQLite when signing site-server certs after router-site-ca. NATS hub ConfigMap cluster routes, StatefulSet rollout, and JWT bundle K8s patches moved outside DB transaction bodies in `nats-service.js` (phased reconcile + `afterCommit` deferral when called from `reconcileFog`). HashiCorp Vault HTTP for secret/configmap/registry create/update/delete deferred via `transaction.afterCommit` (`vault-transaction-helper.js`); DB rows use internal encryption during tx, vault store/delete after commit. splits `FogPlatformService.reconcileFog` into phased background transactions (`prepare` → `certPrep` → NATS self-tx → `platform` → `finalize`) mirroring service-platform reconcile — no single tx spans cert generation, NATS, and router reconcile end-to-end. AMQP router cert provisioning in one transaction; agent CA endpoint without pointless DB tx; removed unused services-service TCP bridge K8s-in-tx helpers (operator CRUD uses enqueue + service-platform reconcile only); OIDC provider adapter routed through write queue. +- **unified ALS transaction context** — `runWithTransactionContext` registers existing Sequelize transactions in AsyncLocalStorage; `generateTransaction` uses it for explicit-tx and ALS-inject paths so nested `runInTransaction()` reuses the parent writer on SQLite instead of enqueueing a second transaction. +- **NATS phased reconcile** — `ensureNatsForFog` splits into `nats.ensure.certPrep` (certs + JetStream key), `nats.ensure.authPrep` (JWT bundle + sys-user creds), and `nats.ensure.topology` (instance, mounts, microservice) short background transactions; K8s hub routes remain post-tx. `reconcileFogNats` calls `ensureNatsForFogPhased` / `cleanupNatsForFogPhased` instead of one monolithic `fogPlatform.natsEnsure` tx. +- **enforcement grep gates** — extended `transaction-grep-gates.test.js` for managers-never-enqueue, cert utils branch, K8s-outside-tx (nats + service-platform), vault afterCommit, volume-mount association tx, phased fog platform labels, OIDC adapter queue routing, JTI cleanup via runner. +- **first-fog integration + close docs** — `test/src/integration/first-fog-reconcile-sqlite.test.js` (gate: `RUN_INTEGRATION=1`); ops/architecture docs for R126–R135; plan close checklist updated for integration + load probe. +- **unwrap internal `_`** — `iofog-service.js` internal helpers (`_handleRouterCertificates`, `_deleteFogRouter`, router/TCP/HAL/Bluetooth/NATS helpers) export plain functions; callers in `fog-platform-service` and sweep jobs pass explicit `transaction` from phased orchestrators. Public API entrypoints remain wrapped with `generateTransaction`. ### Fixed +- **Agent fog-token auth hang (SQLite)** — `checkFogToken` updated `lastActive` via `FogManager.updateLastActive` without passing the open Sequelize transaction on a single-connection pool (`pool.max: 1`), deadlocking the write queue after provision when Edgelet first called JWT-authenticated routes (`PATCH /agent/config`, `GET /agent/registries`, etc.). +- **WebSocket audit event logging (SQLite)** — `persistAuditEvent` (`PRIORITY_BACKGROUND`) reused a committed parent transaction from AsyncLocalStorage when `createWsConnectEvent` ran in `setImmediate` after the log-session handler committed, causing `commit has been called on this transaction` errors. Background `runInTransaction` on SQLite now always enqueues a fresh transaction. +- **WebSocket log/exec session deadlock (SQLite)** — log and exec handlers awaited NATS relay setup inside the open interactive transaction; relay hub lookup enqueues a background transaction on the single SQLite connection and deadlocked. Relay setup now runs in `setImmediate` after DB work commits; relay cleanup callbacks open fresh transactions instead of capturing the handler transaction. +- **WebSocket exec/log session cleanup race (postgres / NATS relay)** — concurrent teardown paths (pending timeout + disconnect, NATS CLOSE + CLOSE ack, relay callback + `ws.on('close')`) reused one Sequelize transaction via AsyncLocalStorage, causing `commit has been called on this transaction` on session row delete. Exec and log cleanup are deduplicated per `sessionId`, use fresh background transactions, pending timeouts only close sockets, and relay CLOSE acks no longer trigger DB teardown. +- **WebSocket exec/log cross-replica pairing** — pending timeouts no longer require a local `session.agent`; user pods mark `remoteAgentPaired` via relay delivery hooks and DB fallback (`agentConnected`). Agent pods publish **ACTIVATION** (exec) and **LOG_LINE** user notifications via NATS/AMQP relay when the user is on another replica. Same-replica log “agent connected” notify uses **LOG_LINE** (not `LOG_START` + embedded message). **`ws_pending_pairings`** and **`ws_pairing_duration_ms`** metrics are recorded from user connect through pairing completion or timeout. Cross-replica paired sessions use **max/idle duration** (not pending timeout) in periodic cleanup; agent disconnect on an agent-only pod relays **CLOSE** (exec) or **LOG_LINE** (log) to the user pod and detaches local state without deleting the DB row. +- **WebSocket cross-replica exec activation** — `setupExecMessageForwarding` read `shouldUseRelay` before `enableForSession`, so agent-only pods skipped relay **ACTIVATION** and user notify on first connect (log setup was already correct). Info logs added for log session user/agent disconnect, full cleanup, and local detach. +- **WebSocket exec/log orphan session cleanup (multi-replica HA)** — agent partial disconnect no longer triggers solely because relay is enabled; teardown uses DB `userConnected` (not stale `remoteUserPaired`). Full DB delete when both sides are gone. Concurrency limits raised to **5** per resource; `GET /agent/logs/sessions` / `GET /agent/exec/sessions` count or list only `userConnected: true` rows. Reconcile job immediately removes rows with both flags false. Same-replica user disconnect still full-cleans when the agent socket is local. +- **WebSocket protocol heartbeat (exec + log)** — Controller sends RFC 6455 **Ping** frames on all four session sockets (user/agent, exec/log) every **`WS_PING_INTERVAL`** (default 30s). Keeps idle agent log/exec streams alive through Edgelet read deadlines and ingress; browsers auto-respond with Pong. Application **`CONTROL/keepalive`** on user exec unchanged (EdgeOps Console contract). No server-side pong-timeout terminate in v1. +- **Volume mount manager transaction propagation** — `VolumeMountingManager.findOne` / `findAll` passed `transaction` as a second Sequelize argument instead of inside the options object, so NATS fog reconcile could create a volume mount in an open transaction then fail to link it (`nats-server-conf-* not found`). Reads now honor the parent transaction like `BaseManager`. +- **Volume mount service transaction propagation** — `VolumeMountService.linkVolumeMountEndpoint` / `unlinkVolumeMountEndpoint` passed `transaction` as a second Sequelize argument to `getFogs` / `addVolumeMount` / `removeVolumeMount` instead of inside the options object, causing NATS fog reconcile to hang when linking volume mounts after auth bootstrap. +- **Fog platform reconcile stale errors** — `reconcileFogPrepare` clears `lastError` when entering `Progressing` so prior `SQLITE_BUSY` does not mask current reconcile state. +- **Fog platform NATS provisioning hang (SQLite)** — `reconcileFogNats` calls `ensureNatsForFogDb` directly in a background transaction (no `ensureNatsForFog` re-wrap). `generateTransaction` inlines the active AsyncLocalStorage transaction instead of enqueueing nested `runInTransaction`; duck-typed Sequelize transaction detection; `NatsConnectionManager.findAllWithNats` and `VolumeMappingManager.findAll` pass `transaction` inside Sequelize options. Fixes deadlock after router cert prep when provisioning hub NATS on first fog. - NATS relay and AMQP router connection resolvers — **Remote CP** uses Edgelet bridge DNS then DB host only (no `*.svc.cluster.local`); **Kubernetes CP** uses `nats-server.{namespace}.svc.cluster.local` / `router.{namespace}.svc.cluster.local` with DB host fallback; connect failures log and throw aggregate errors for all attempts; relay log messages are transport-aware. - NATS relay **`controller-relay` creds loading** — read Opaque secret values as plain UTF-8 `.creds` text (matches `nats-service.js` and DB storage); fixes **`unable to parse credentials`** on hub connect when `NATS_ENABLED=true`. - NATS platform relay identity renamed to account/user **`controller`** with rules **`controller-account`** / **`controller-user`**; **`GET /nats/accounts/controller/users/controller/creds`** supported for operator cred export. @@ -124,6 +144,14 @@ Controller v3.8 is a **greenfield** release aligned with **Edgelet**. There is * - Central local CAs (`default-router-local-ca`, `default-nats-local-ca`) are ensured on first agent provision (or via operator direct import before first agent), not at Controller boot — allows custom local CAs before agent deployment. - Fog teardown drops obsolete per-fog **`nats-local-ca-*`** and **`router-local-ca-*`** secret names from cleanup lists. - OIDC discovery with **`AUTH_INSECURE_ALLOW_HTTP`** uses the supported `openid-client` insecure-request hook for local **`http://`** issuers. +- **Postgres OAuth/session expiry (TIMESTAMPTZ)** — greenfield postgres migration uses **`TIMESTAMPTZ`** for all temporal columns; Sequelize **`timezone: '+00:00'`** on postgres/mysql providers so auth interaction, BFF session, and certificate/heartbeat date comparisons are UTC-correct regardless of host **`TZ`**. OIDC adapter upsert uses **`conflictFields: ['model', 'record_id']`** for postgres **`ON CONFLICT`**. +- **Cross-DB TEXT column defaults** — removed **`DEFAULT`** from **`TEXT`** columns (`Fogs.warning_message`, `RbacRoles.kind`, `RbacRoleBindings.kind`) in sqlite/mysql/postgres greenfield migrations; Sequelize model **`defaultValue`** applies on insert (fixes MySQL **`ER_BLOB_CANT_HAVE_DEFAULT`** in strict mode). +- **Postgres reconcile outbox enqueue** — `ReconcileOutboxManager.enqueue` uses find-first dedup (postgres aborts transactions on duplicate insert); processed rows with the same idempotency key are re-opened for drain; insert races use a savepoint on postgres. +- **Fog delete reconcile** — platform worker runs delete when status phase is **`Deleting`** even if task reason is still spec/manual-retry; **`reconcileFog` skip → delete** fallback; delete enqueue preempts **`in_progress`** tasks; delete failures keep phase **`Deleting`**; shorter delete-task staleness reclaim (default 60s). +- **Fog delete NATS cleanup (postgres)** — `cleanupNatsForFog` reuses the caller transaction when provided (fixes postgres hang/deadlock from nested tx); NATS cleanup runs before microservice deletes in `_processDeleteCommand`. +- **MySQL `MicroserviceHealthChecks.interval`** — quote reserved column name as **`` `interval` ``** in mysql migration (fixes **`ER_PARSE_ERROR`** on greenfield install). +- **MySQL `MicroserviceExecSessions.session_id`** — use **`VARCHAR(255) UNIQUE`** instead of **`TEXT UNIQUE`** (fixes ignored **`ER_BLOB_KEY_WITHOUT_LENGTH`** and subsequent **`ER_NO_SUCH_TABLE`** on index creation); model aligned to **`STRING(255)`**. +- **MySQL RBAC TEXT unique keys** — remove inline **`TEXT UNIQUE`** on **`RbacRoles.name`** / **`RbacRoleBindings.name`** (keep **`UNIQUE KEY … (name(255))`**); prefix **`RbacServiceAccounts`** composite unique index with **`name(255)`** (fixes greenfield **`ER_FK_CANNOT_OPEN_PARENT`** / blob-key errors). - Embedded OAuth BFF builds the in-process issuer client from local metadata and trusts listener TLS material (**`TLS_PATH_*`** / **`TLS_BASE64_*`**) for token exchange — fixes **`fetch failed`** on **`GET /api/v3/user/oauth/authorize`** with self-signed HTTPS certs without **`NODE_EXTRA_CA_CERTS`**. - Provisioning key and **`GET /api/v3/agent/cert`** derive **`caCert`** from listener TLS material (**`TLS_PATH_*`** / **`TLS_BASE64_*`**) via shared **`tls-config`** — always base64-encoded for Edgelet trust store; fixes empty **`caCert`** when legacy **`SSL_CERT`** / **`INTERMEDIATE_CERT`** were unset. - Config keys **`auth.bootstrap.adminUsername`** / **`adminPassword`** renamed to **`auth.bootstrap.username`** / **`password`** (**`OIDC_BOOTSTRAP_ADMIN_*`** env vars unchanged). @@ -141,6 +169,14 @@ Controller v3.8 is a **greenfield** release aligned with **Edgelet**. There is * - Dual writers to router microservice bridge config from fog create/update and service create/update/delete — single full-recompute path on fog reconcile. - SQLite startup lock contention on single-controller deployments — WAL + `busy_timeout` pragmas on connect, `withDbBusyRetry` on fog/service/NATS task claims, staggered reconcile-heavy job startup. - **`reconcileFog` transaction parameter** — removed unused `options` argument so worker-decorated calls receive the transaction correctly. +- **NATS auth post-commit orchestration** — account/user rule reissue and application NATS orchestration run in background `PRIORITY_BACKGROUND` transactions after API commit; no longer inherit committed ALS transactions (`commit has been called on this transaction`). +- **NATS resolver bundle ordering** — hub + leaf JWT bundles rebuild only after reissue/revocation commits; outbox enqueue removed from eager `scheduleReissueFor*` paths. +- **Application NATS rule / disable** — `_scheduleApplicationNatsOrchestration` post-commit with guaranteed outbox enqueue on success (R139). +- **Microservice NATS PATCH** — normalized `natsConfig` gates enable/disable/rule change; resolver bundle uses fresh account JWT reads; idempotency keys include `authGeneration` / `microserviceUuid` (R137, R140, R142). +- **User rule fan-out** — `reissueForUserRule` covers all `NatsUserManager` rows by rule id including Bearer users; revocations propagate (R143). +- **Fog router MS upstream** — router microservice config built from live router DB + connections, not stale persisted JSON; upstream topology change forces persist (R144, R145). +- **Downstream fog fan-out** — upstream interior-router or server-NATS host/port change enqueues downstream platform reconcile (R146). +- **`upstreamNatsServers` preserve-on-omit** — PATCH omitting `upstreamNatsServers` preserves existing NATS upstream connections (R147). ### Changed diff --git a/Dockerfile b/Dockerfile index 3cfe9399..c3232d23 100644 --- a/Dockerfile +++ b/Dockerfile @@ -5,7 +5,7 @@ FROM node:24-bookworm@sha256:fdddfb3e688158251943d52eba361de991548f6814007acba4917ae6b512d6be AS console-builder ARG EDGEOPS_CONSOLE_REPO=https://github.com/Datasance/edgeops-console -ARG EDGEOPS_CONSOLE_VERSION=v1.0.3 +ARG EDGEOPS_CONSOLE_VERSION=v1.0.8 ARG EDGEOPS_CONSOLE_FLAVOR=datasance RUN apt-get update \ @@ -48,9 +48,9 @@ RUN npm pack # ubi9/nodejs-24-minimal:latest — pin manifest list digest for reproducible multi-arch builds -FROM registry.access.redhat.com/ubi9/nodejs-24-minimal@sha256:cc7648f8e1c7d628e4334328a712f30ea0820787bb92836cc93e349674c689bf +FROM registry.access.redhat.com/ubi9/nodejs-24-minimal@sha256:5f1ac8eab93c93eb2227f4ee7822668b312ee292d122dddd580bee8f17359c2f -ARG EDGEOPS_CONSOLE_VERSION=v1.0.3 +ARG EDGEOPS_CONSOLE_VERSION=v1.0.8 ARG IMAGE_REGISTRY ARG OCI_SOURCE_REPO ARG CONTROLLER_DISTRIBUTION=iofog diff --git a/Makefile b/Makefile index 49685283..c02f42b7 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ # Local Docker build — mirrors CI/release build-args (see .github/actions/set-build-env). -# Override any variable: make build FLAVOR=iofog EDGEOPS_CONSOLE_VERSION=v1.0.3 +# Override any variable: make build FLAVOR=iofog EDGEOPS_CONSOLE_VERSION=v1.0.8 FLAVOR ?= datasance IMAGE_NAME ?= controller @@ -25,7 +25,7 @@ else $(error FLAVOR must be "datasance" or "iofog", got "$(FLAVOR)") endif -EDGEOPS_CONSOLE_VERSION ?= v1.0.3 +EDGEOPS_CONSOLE_VERSION ?= v1.0.8 IMAGE_REF = $(IMAGE_REGISTRY)/$(IMAGE_NAME):$(DOCKER_TAG) diff --git a/docs/architecture.md b/docs/architecture.md index e19adfd3..ebcb78ed 100644 --- a/docs/architecture.md +++ b/docs/architecture.md @@ -206,7 +206,7 @@ Full spec: [`.cursor/controllerv3.8/docs/15-fog-platform-reconcile.md`](../.curs ## WebSocket exec & log sessions -Interactive **exec** and **log streaming** use paired WebSocket sessions between operators (Bearer JWT), Controller, and Edgelet agents (fog token). Plan 16 hardens log sessions and shared WS infra (HA, drain, OTEL). **Plan 17** redesigns **microservice exec** to log-style multi-session flow (3 concurrent per MS, agent poll + session-scoped WS). **Plan 18** production-hardens cross-replica relay via **`WsRelayTransport`** — AMQP pool + recovery when `nats.enabled=false`, NATS Core when `nats.enabled=true` (R102–R113). **Edgelet agent wire change required** for exec only (see [edgelet-invariants.md §10.1](../.cursor/controllerv3.8/docs/edgelet-invariants.md)). +Interactive **exec** and **log streaming** use paired WebSocket sessions between operators (Bearer JWT), Controller, and Edgelet agents (fog token). Plan 16 hardens log sessions and shared WS infra (HA, drain, OTEL). **Plan 17** redesigns **microservice exec** to log-style multi-session flow (5 concurrent per MS, agent poll + session-scoped WS). **Plan 18** production-hardens cross-replica relay via **`WsRelayTransport`** — AMQP pool + recovery when `nats.enabled=false`, NATS Core when `nats.enabled=true` (R102–R113). **Edgelet agent wire change required** for exec only (see [edgelet-invariants.md §10.1](../.cursor/controllerv3.8/docs/edgelet-invariants.md)). ```mermaid flowchart TB @@ -283,14 +283,14 @@ sequenceDiagram | Topic | Normative value | |-------|-----------------| | MS exec entry | **Direct user WS** — no `POST …/microservices/…/exec` (R92, R94) | -| MS exec concurrency | **3** user exec WS per microservice (R93) | +| MS exec concurrency | **5** user exec WS per microservice | | MS exec lifecycle | **Per-session** — close deletes session row only; **no** `execEnabled=false` (R98) | | MS exec pending / max | **60s** pending for agent; **8h** max active session (Plan 16 carry-over) | | Agent exec discovery | `GET /agent/exec/sessions` on `execSessions` change flag (R95, R100) | | Agent exec WS | `/agent/exec/microservice/:uuid/:sessionId` only — legacy `/agent/exec/:uuid` removed (R96) | | User session notify | **ACTIVATION** (type 5) with `{ sessionId, microserviceUuid }` (R97) | | Fog debug provision | `POST/DELETE /iofog/:uuid/exec` unchanged; shell via `WS /microservices/system/exec/:debugMsUuid` (R99) | -| Log concurrency | **3** user log WS per microservice (or per fog for node logs) | +| Log concurrency | **5** user log WS per microservice (or per fog for node logs) | | Log limits | Tail max **5,000** lines; **120s** pending; **2h** idle | | Log content | Live relay only — no log line persistence; audit connect/disconnect | | HA relay | Cross-replica sessions **require** a **relay backend** (R112): **AMQP** router queues when `nats.enabled=false`; **NATS Core** subjects on hub when `nats.enabled=true`. Same-replica may use direct WS; **fail fast** close **1013** when active backend unavailable | @@ -369,7 +369,52 @@ For the full bilateral contract (including ControlPlane env vars and verificatio | Topic | v3.8 behavior | |-------|---------------| -| **Database** | Greenfield v3.8.0 schema — **new install only** (no v3.7 migrator). Supports **sqlite** (single-controller production), **mysql**, and **postgres** (multi-replica / HA). | +| **Database** | Greenfield v3.8.0 schema — **new install only** (no v3.7 migrator). Supports **sqlite** (single-controller production), **mysql**, and **postgres** (multi-replica / HA). All mutating paths use **`runInTransaction()`** (Plan 19, R114–R125). Plan **19-I** stabilization (R126–R135): unified ALS transaction context, phased NATS reconcile, grep gates, first-fog integration SLO. | + +### Database profiles (Plan 19 / 19-I) + +| Profile | Database | Controller replicas | Typical fleet size | Notes | +|---------|----------|---------------------|-------------------|-------| +| **Edge / PoT** | sqlite | 1 | ≤ **50** fogs (default warning threshold) | Single write queue; embedded OIDC | +| **Small production** | sqlite | 1 | 50–100 fogs | Supported within single-writer physics; soft warning logged above threshold | +| **Enterprise / HA** | mysql or postgres | 1+ | **100+** fogs recommended | Default for large fleets; `FOR UPDATE SKIP LOCKED` task claims; shared OIDC session store | + +**Enterprise default:** mysql/postgres for fleets above **100** fogs or any multi-replica deployment. sqlite remains supported for single-node edge deployments within Plan 19 SLOs (200 fogs acceptance profile). + +```mermaid +flowchart LR + subgraph callers [Mutating callers] + API[REST / Agent API] + WS[WS session DB ops] + JOBS[Background jobs] + end + + subgraph runner [runInTransaction] + Q{provider?} + SQ[SQLite priority queue] + POOL[mysql/postgres pool] + TX[Real Sequelize transaction] + end + + subgraph outbox [ReconcileOutbox] + INS[Same-commit insert] + DRAIN[Outbox drainer] + end + + API --> runner + WS --> runner + JOBS --> runner + Q -->|sqlite| SQ --> TX + Q -->|mysql/pg| POOL --> TX + TX --> INS --> DRAIN +``` + +| Priority lane | Callers | +|---------------|---------| +| **interactive** | Agent routes, user RBAC API, WS session DB ops, OIDC/auth | +| **background** | Reconcile workers, outbox drainer, platform sweep, cleanup timers | + +Full operator runbook: [operations/database-transactions.md](operations/database-transactions.md). ### SQLite single-node production @@ -377,10 +422,16 @@ Small deployments with **one Controller process** may use SQLite as the producti | Topic | Behavior | |-------|----------| -| **When to use** | Single Controller, no DB HA requirement, edge/small-cluster PoT | -| **Concurrency** | WAL journal mode + `busy_timeout` pragmas on connect; connection pool size 1 | -| **Background jobs** | Reconcile-heavy jobs start after a configurable delay (`settings.jobStartupDelaySeconds`, default 3s) and stagger by 500ms to avoid restart lock bursts | -| **Task claims** | Fog/service/NATS reconcile task claims retry on `SQLITE_BUSY` (same retry budget as `TransactionDecorator`) | +| **When to use** | Single Controller, no DB HA requirement, edge/small-cluster PoT (≤ recommended fog count) | +| **Write path** | All mutations via `runInTransaction()` — **real** ACID transactions (no `fakeTransaction`); nested reuse via **`runWithTransactionContext`** ALS (R126–R128) | +| **Concurrency** | Global **priority write queue** (interactive before background); pool `max: 1`; WAL + `busy_timeout` pragmas | +| **First-fog SLO (R133)** | sqlite integration gate: first fog reconcile + concurrent operator login/list **< 2s**; `RUN_INTEGRATION=1 npm run test:integration:first-fog` | +| **Load close gate (R135)** | `node test/load/transaction-safety-load.js --fogs=50 --soak-minutes=5` — agent p99 < 200ms, operator p99 < 1s | +| **Busy retry** | Exponential backoff + jitter on `SQLITE_BUSY` inside queue task (configurable max attempts) | +| **Reconcile enqueue** | **`ReconcileOutbox`** — mutation + outbox row in same commit; drainer creates reconcile tasks | +| **Background jobs** | `priority: 'background'`; startup stagger (`settings.jobStartupDelaySeconds`, default 3s) + 500ms between jobs | +| **Task claims** | Same runner; busy retry on sqlite; mysql/postgres use `FOR UPDATE SKIP LOCKED` | +| **Load SLO** | 200 fogs / 40s poll / 10 operators / 30 min soak: agent poll p99 **< 200ms**; operator REST p99 **< 1s** | | **Persistence** | Mount a persistent volume for `controller_db.sqlite` and WAL sidecar files (`-wal`, `-shm`) | | **Backup** | Use SQLite backup API or copy DB + WAL files during a quiet window | | **HA path** | mysql/postgres + multiple Controller replicas — see [oidc-configuration.md](oidc-configuration.md) | @@ -415,4 +466,5 @@ Agent routes and WebSocket exec/logs for agents are **outside** OIDC — see [rb | [pki.md](pki.md) | Central CAs, cert renewal, NATS operator rotation | | [oidc-configuration.md](oidc-configuration.md) | Embedded/external auth modes and env vars | | [external-oidc-client-setup.md](external-oidc-client-setup.md) | External IdP client configuration | +| [operations/database-transactions.md](operations/database-transactions.md) | Transaction runner, OTEL metrics, SQLITE_BUSY runbook | | [CONTRIBUTING](../CONTRIBUTING) | Dual-mirror CI and development | diff --git a/docs/operations/database-transactions.md b/docs/operations/database-transactions.md new file mode 100644 index 00000000..38236e51 --- /dev/null +++ b/docs/operations/database-transactions.md @@ -0,0 +1,233 @@ +# Database transactions — operations guide + +## Overview + +Controller v3.8.0 routes **all mutating database work** through `runInTransaction()` in `src/helpers/transaction-runner.js`. Every commit uses a **real** Sequelize transaction — the legacy `fakeTransaction` workaround is removed. + +| Provider | Concurrency model | +|----------|-------------------| +| **sqlite** | Single connection (`pool.max: 1`); global **priority write queue** (interactive before background); WAL + `busy_timeout` | +| **mysql / postgres** | Connection pool (default max **10**); task claims use `FOR UPDATE SKIP LOCKED` | + +Reconcile work is scheduled via the **`ReconcileOutbox`** table — mutations and outbox inserts commit atomically; a background drainer creates reconcile task rows. + +--- + +## Architecture + +```mermaid +flowchart LR + subgraph callers [Mutating callers] + API[REST / agent API] + WS[WS session DB ops] + JOBS[Background jobs] + end + + subgraph runner [transaction-runner.js] + Q{provider?} + SQ[SQLite priority queue] + POOL[mysql/postgres pool tx] + RETRY[withDbBusyRetry] + end + + callers --> runner + Q -->|sqlite| SQ --> RETRY + Q -->|mysql/pg| POOL --> RETRY +``` + +**Priority lanes (sqlite only):** + +| Priority | Typical callers | +|----------|-----------------| +| **interactive** | Agent routes, RBAC API, WS session DB ops, OIDC/auth | +| **background** | Reconcile workers, outbox drainer, platform sweep, heartbeat, cleanup jobs | + +Interactive tasks are dequeued before background tasks. mysql/postgres skip the global queue and use the connection pool directly. + +### Unified transaction context + +Routed all writes through `runInTransaction()`, but **`generateTransaction` handlers that received an explicit Sequelize `transaction` argument did not register that transaction in AsyncLocalStorage (ALS)**. Any nested callee that called `runInTransaction()` without the parent tx could enqueue a **second** sqlite writer → deadlock (`SQLITE_BUSY`, API hang ~30–60s). + +| Mechanism | Role | +|-----------|------| +| **`runWithTransactionContext(transaction, priority, fn)`** | Runs `fn` with ALS set to the **existing** transaction — required whenever code already holds a Sequelize tx | +| **`generateTransaction`** | Uses `runWithTransactionContext` for explicit-tx args, ALS parent injection, and new top-level txs via `runInTransaction` | +| **`runInTransaction()` (no tx arg)** | Reuses parent tx from ALS when nested under any ancestor that used `runWithTransactionContext` (**interactive** priority only on SQLite) | +| **`runInTransaction({ priority: background })`** | Always enqueues a **fresh** SQLite transaction — never reuses ALS parent (avoids stale tx after handler commit + deferred audit) | + +**Rule for developers:** If your function runs inside an open transaction (API handler last arg, worker phase callback, etc.), nested work must either pass `transaction` through wrapped exports **or** rely on ALS via `runWithTransactionContext`. Do not call bare `runInTransaction()` from deep helpers expecting implicit join — the runner only reuses via ALS or an explicit tx parameter on `runInTransaction` itself. + +Phased reconcile and grep gates complement ALS: short txs for NATS/platform phases and CI checks for K8s/vault/I/O outside tx bodies. + +--- + +## When to use which database + +| Profile | Recommended DB | Notes | +|---------|----------------|-------| +| Single Controller, ≤ **50** fogs (default threshold) | **sqlite** | Edge / PoT; mount persistent volume for `.sqlite` + `-wal` / `-shm` | +| Single Controller, **50–100+** fogs | **mysql** or **postgres** | Controller logs soft warning above threshold on sqlite | +| Multi-replica HA | **mysql** or **postgres** | Embedded OIDC requires shared DB; sqlite **not** supported for multi-replica | +| Enterprise production | **mysql** or **postgres** | Documented default for large fleets and multi-user load | + +| Fleet size | sqlite | mysql/postgres | +|------------|--------|----------------| +| ≤ 50 fogs | Recommended | Optional | +| 51–100 fogs | Supported with warning | Recommended | +| 100+ fogs | Possible but not recommended | **Required** for enterprise SLOs | +| Multi-replica | Not supported | **Required** | + +--- + +## Configuration + +| Setting | Default | Env override | +|---------|---------|--------------| +| `settings.sqliteEnterpriseFogWarningThreshold` | 50 | `SQLITE_ENTERPRISE_FOG_WARNING_THRESHOLD` | +| `settings.dbWriteQueueMaxDepth` | 256 | `DB_WRITE_QUEUE_MAX_DEPTH` | +| `settings.dbBusyRetryMaxAttempts` | 8 | `DB_BUSY_RETRY_MAX_ATTEMPTS` | +| `settings.dbBusyRetryBaseMs` | 25 | `DB_BUSY_RETRY_BASE_MS` | +| `settings.reconcileOutboxDrainerIntervalSeconds` | 1 | `RECONCILE_OUTBOX_DRAINER_INTERVAL_SECONDS` | +| `database.mysql.pool.max` | 10 | *(yaml only)* | +| `database.postgres.pool.max` | 10 | *(yaml only)* | + +See `src/config/config.yaml` and [architecture.md](../architecture.md) for pool and pragma settings. + +--- + +## SQLite write queue backpressure + +When total queued work (`interactive` + `background` lanes) exceeds `settings.dbWriteQueueMaxDepth` (default **256**), Controller logs an **error** once per overflow episode. **Interactive requests are not rejected** — the queue continues to drain in priority order. Operators should investigate background job pressure or migrate to mysql/postgres. + +--- + +## OTEL metrics + +Instruments are registered at startup in `src/helpers/db-metrics.js` (requires `ENABLE_TELEMETRY=true`). + +| Metric | Type | Labels | Suggested alert | +|--------|------|--------|-----------------| +| `db.transaction.duration` | histogram | `label`, `priority`, `provider` | p99 spike correlated with load | +| `db.write_queue.depth` | gauge | `priority` | **> 100 for 5 min** → investigate background pressure | +| `db.write_queue.wait_ms` | histogram | `priority` | Sustained high wait → scale DB or reduce background load | +| `db.busy_retries` | counter | `label` | **> 10/min** → lock contention | +| `db.connection.invalidated` | counter | `provider` | **Any increment** → investigate pool / connection errors | +| `db.sqlite.fog_count_warning` | counter | — | Fleet exceeded sqlite recommended size | + +### Alert thresholds (summary) + +1. **`db.write_queue.depth` > 100 for 5 minutes** — background jobs or agent poll load saturating the sqlite serializer; check reconcile worker intervals and fog count. +2. **`db.busy_retries` rate > 10/minute** — sqlite lock contention; verify WAL mode and consider mysql/postgres. +3. **`db.connection.invalidated` any increase** — connection pool error or mid-transaction kill; check DB connectivity and replica health. +4. **`db.sqlite.fog_count_warning` any increase** — migrate to mysql/postgres for enterprise scale. + +--- + +## Troubleshooting + +### SQLITE_BUSY / "cannot rollback - no transaction is active" + +Caused by competing raw transactions (jobs, WS cleanup) vs fakeTransaction API path. + +Should be rare — busy retry with exponential backoff and a single priority queue serialize writes. If persistent: + +1. Check `db.write_queue.depth` and `db.busy_retries` +2. Confirm WAL mode: `PRAGMA journal_mode` → `wal` +3. Verify no long-running transaction (K8s and external I/O must run outside open transactions) +4. **Migrate to mysql/postgres** if fog count > threshold + +### Reconcile tasks not running + +1. Check `ReconcileOutbox` for rows with `processedAt IS NULL` +2. Verify outbox drainer job is running (logs on startup) +3. Check drainer `lastError` column + +### HA double reconcile (mysql/postgres) + +Should not occur with SKIP LOCKED claims. If observed, capture concurrent worker logs and verify claim tests pass. + +### API hangs ~60s then SQLITE_BUSY (SQLite) + +Typical on **`pool.max: 1`** when two writers compete for the single connection. + +| Symptom | Likely cause | Fix | +|---------|--------------|-----| +| Hang during **first fog create** / platform reconcile / **provisioning-key** (post-19-I-A) | Nested `runInTransaction()` without ALS parent (legacy explicit-tx path) or monolithic reconcile tx | Verify deployed: ALS via `runWithTransactionContext`; NATS phased txs; run `RUN_INTEGRATION=1 npm run test:integration:first-fog` | +| Hang during **first fog create** / platform reconcile / **provisioning-key** (pre-19-I) | Nested `generateTransaction`: wrapped callee called **without** `transaction` last arg (e.g. `SecretService.getSecretEndpoint(name)` inside `certificate-service.js`) | Pass parent `transaction` through all wrapped service calls; grep `certificate-service.js` for SecretService calls | +| **`SQLITE_BUSY`** on idle timer (JTI cleanup) | Job/manager raw Sequelize write bypassing `runInTransaction` | Route through `runInTransaction(..., { priority: 'background' })` | +| Hang on **OAuth interaction** login/MFA | OIDC adapter read/write while outer API tx holds connection | Mirror `auth-interaction-service.complete()`: adapter I/O **before** short DB tx | +| External IdP **login/refresh** slow under load | HTTP inside `generateTransaction` wrapper | External-mode HTTP outside tx; embedded DB paths unchanged | +| Hang on fog **NATS mode change** / fog delete (K8s CP) | Monolithic `ensureNatsForFogDb` held sqlite writer for seconds (certs + auth + mounts in one tx) | — split into `nats.ensure.certPrep` + `nats.ensure.authPrep` + `nats.ensure.topology` short txs; K8s via post-tx external helpers | +| Hang on **nats-reconcile-worker** | JWT bundle K8s patch inside reconcile tx | R-06 phased split — DB reconcile tx then external ConfigMap patch | +| Hang on **secret/configmap/registry** CRUD (vault enabled) | `SecretHelper` vault HTTP inside open Sequelize tx |— internal encrypt in tx; vault store/delete via `transaction.afterCommit` | + +**Rule:** When caller already has `transaction`, every wrapped export must receive it as the **last argument** (`generateTransaction` reuses parent tx when `lastArg instanceof Transaction`). + +### Vault I/O outside transactions + +When HashiCorp Vault is enabled, secret/configmap/registry mutations must not perform vault HTTP while a Sequelize transaction holds the sqlite connection. + +| Operation | In transaction | After commit | +|-----------|----------------|--------------| +| Create / update | Store **internal** encryption (or plaintext vault ref if already promoted) | `storeInVaultAndGetReference` + short DB patch to vault ref | +| Delete | DB row + FK cleanup | `SecretHelper.deleteSecret` | + +Helpers live in `src/helpers/vault-transaction-helper.js` (`scheduleVaultDeleteAfterCommit`, `scheduleVaultPromoteAfterCommit`). Model `beforeSave` hooks defer vault when `options.transaction` is set. Failures in deferred vault work are logged; committed DB state is not rolled back (orphan vault secrets are preferable to orphan DB rows). + +Without vault (`vaultManager.isEnabled()` false), behavior is unchanged — internal encryption only. + +--- + +## Enforcement + +Mechanical **grep gates** in `test/src/helpers/transaction-grep-gates.test.js` fail CI when transaction regressions reappear. Run: + +```bash +nvm use 24 +npm test -- --grep "grep gates" +``` + +| Gate | What it checks | +|------|----------------| +| **fakeTransaction** | Zero hits anywhere under `src/` | +| **bypassQueue** | Zero hits anywhere under `src/` | +| **sequelize.transaction** | Allowed only in `transaction-runner.js` | +| **Managers never enqueue** | Zero `runInTransaction` in `src/data/managers/` — managers accept `transaction` from callers only | +| **Certificate tx propagation** | `certificate-service.js` passes `transaction` to every `SecretService.getSecretEndpoint` call | +| **Cert utils branch** | `src/utils/cert.js` — `loadCA` / `getCAFromK8sSecret` branch on `transaction ?` before nested `runInTransaction`; `getCAFromInput` forwards `transaction` | +| **K8s outside tx (nats-service)** | DB helpers (`ensureNatsForFog*Db`, `cleanupNatsForFogDb`, `_reconcileResolverArtifactsOnceDb`) contain no `K8sClient` — K8s via external helpers after commit | +| **K8s outside tx (service-platform-service)** | Labeled `servicePlatform.*` tx blocks contain no `K8sClient` — hub router / LB sync via `applyK8sHubRouterPlan`, `reconcileK8sServiceExternal`, etc. | +| **Vault outside tx** | Secret/configmap/registry delete paths use `scheduleVaultDeleteAfterCommit` / `scheduleVaultPromoteAfterCommit`, not inline vault HTTP | +| **Volume-mount associations** | Sequelize `{ transaction }` inside association options, not as a trailing positional arg | +| **Fog platform phased reconcile** | Separate `fogPlatform.*` labels; no monolithic `fogPlatform.natsEnsure` | +| **OIDC adapter** | All adapter reads/writes through `runInTransaction` with `oidc.adapter.*` labels | +| **JTI cleanup** | `fog-token-cleanup-job.js` routes through `runInTransaction`, not bare manager call | + +When a gate fails, fix the **minimal** violation (pass parent `transaction`, move I/O outside the tx body, or split phases) — do not disable the gate. + +### OIDC provider adapter (R-16) + +`src/data/adapters/oidc-provider-adapter.js` routes all `AuthOidcProviderState` reads/writes through `runInTransaction` with **interactive** priority (background for expiry purge). OAuth BFF interaction handlers in `auth-interaction-service.js` keep adapter I/O outside short user DB transactions. Do not call adapter methods from inside another open `runInTransaction` body. + +### Certificate / secret propagation checklist + +When calling from within `reconcileFog`, `_handleRouterCertificates`, or any open transaction: + +- `CertificateService.*` → internal `SecretService.*` must pass `transaction` +- `storeCA` / `generateCertificate` in `src/utils/cert.js` must pass `transaction` to `SecretService.createSecretEndpoint` + +--- + +## Backup notes + +- **sqlite:** Back up the database file **and** `-wal` / `-shm` sidecars together, or checkpoint WAL before copy. +- **mysql/postgres:** Use standard provider backup tools; ensure migrations are at v3.8.0 before restore. + +--- + +## Related docs + +- [architecture.md](../architecture.md) — data layer overview +- [oidc-configuration.md](../oidc-configuration.md) — HA session store requires mysql/postgres + +--- diff --git a/docs/operations/ws-sessions.md b/docs/operations/ws-sessions.md index 34e268a1..56ec6402 100644 --- a/docs/operations/ws-sessions.md +++ b/docs/operations/ws-sessions.md @@ -18,7 +18,7 @@ Controller exposes **interactive exec** and **log streaming** over WebSocket on | **User auth** | Bearer JWT via `Authorization` header or `?token=` query param (browser Console). RBAC: `execSessions`, `logs`, `systemExecSessions`, `systemLogs`. | | **Agent auth** | Fog token on `/api/v3/agent/exec/*` and `/api/v3/agent/logs/*` — OIDC does **not** apply to agent routes. | -> **Plan 17 (MS exec):** Open exec with **direct WebSocket** — `wss://…/api/v3/microservices/exec/:uuid` (app MS) or `…/system/exec/:uuid` (system MS). **No** `POST …/exec` before connect. Up to **3** concurrent exec sessions per microservice. Agent discovers sessions via `GET /api/v3/agent/exec/sessions` and connects `WS /api/v3/agent/exec/microservice/:uuid/:sessionId`. Fog node debug: `POST/DELETE /api/v3/iofog/:uuid/exec` provisions the debug system MS, then **`WS …/microservices/system/exec/:debugMsUuid`** (not the app exec path). Full spec: [17-multi-exec-sessions.md](../.cursor/controllerv3.8/docs/17-multi-exec-sessions.md). +> **Plan 17 (MS exec):** Open exec with **direct WebSocket** — `wss://…/api/v3/microservices/exec/:uuid` (app MS) or `…/system/exec/:uuid` (system MS). **No** `POST …/exec` before connect. Up to **5** concurrent exec sessions per microservice. Agent discovers sessions via `GET /api/v3/agent/exec/sessions` and connects `WS /api/v3/agent/exec/microservice/:uuid/:sessionId`. Fog node debug: `POST/DELETE /api/v3/iofog/:uuid/exec` provisions the debug system MS, then **`WS …/microservices/system/exec/:debugMsUuid`** (not the app exec path). Full spec: [17-multi-exec-sessions.md](../.cursor/controllerv3.8/docs/17-multi-exec-sessions.md). ### Ingress log redaction (required) @@ -118,13 +118,28 @@ spec: --- +## Connection keepalive + +Controller uses **two layers** for long-lived exec/log WebSockets: + +| Layer | Mechanism | Peers | +|-------|-----------|-------| +| **WebSocket protocol** | Server sends RFC 6455 **Ping** frames every **`server.webSocket.pingInterval`** (default **30s**, env **`WS_PING_INTERVAL`**) | All four sockets: user + agent on exec and log paths (Edgelet, potctl, EdgeOps Console browser) | +| **Application (exec user only)** | MessagePack **`CONTROL`** with payload **`keepalive`** | EdgeOps Console exec terminal — Controller echoes **`keepalive`** to the user socket; browsers cannot send native WS ping frames | + +Log streaming does **not** use application-level keepalive (Console `LogViewer` is receive-only). Quiet `follow=true` log sessions rely on WS protocol ping to keep the agent and browser legs alive through ingress and Edgelet read deadlines. + +`server.webSocket.pongTimeout` is reserved for future server-side watchdog use; v1 does **not** terminate sessions on missed pongs (Console exec owns the 10s app-level watchdog). + +--- + ## Scale SLO (R88) | Metric | Target | |--------|--------| | Concurrent WS per replica | **500** (`WS_REPLICA_MAX_CONCURRENT_WS`) | | p99 exec pairing latency | **< 5s** | -| Exec sessions per microservice | **3** concurrent user WS (Plan 17) | +| Exec sessions per microservice | **5** concurrent user WS | Run the load probe locally: @@ -134,7 +149,7 @@ node test/load/ws-pairing-load.js --pairs 500 node test/load/ws-pairing-load.js --multi-ms 100 ``` -The `--multi-ms` mode creates **3 exec sessions per microservice** (100 MS × 3 = 300 pairs) to validate multi-session pairing latency under the same p99 SLO. +The `--multi-ms` mode creates **5 exec sessions per microservice** (100 MS × 5 = 500 pairs) to validate multi-session pairing latency under the same p99 SLO. **AMQP profile** (`nats.enabled=false`): run the probe above on a dev machine — it exercises in-process `ExecSessionManager` pairing only (no router required). Record p99 from stdout; target **< 5000 ms**. @@ -170,10 +185,10 @@ Enable `ENABLE_TELEMETRY=true`. Key metrics (`src/websocket/ws-metrics.js`): | Session | Limit | |---------|-------| -| Exec user WS per microservice | **3** (Plan 17 — direct WS; no POST/DELETE MS exec REST) | +| Exec user WS per microservice | **5** (direct WS; no POST/DELETE MS exec REST) | | Exec pending (user waits for agent) | **60s** | | Exec max duration | **8h** | -| Log user WS per microservice/fog | **3** | +| Log user WS per microservice/fog | **5** | | Log pending (user waits for agent) | **120s** | | Log idle | **2h** | | Log tail max lines | **5000** | diff --git a/docs/swagger.yaml b/docs/swagger.yaml index abd65f4d..0fd14abc 100755 --- a/docs/swagger.yaml +++ b/docs/swagger.yaml @@ -726,10 +726,20 @@ paths: get: tags: - Cluster - summary: Lists all cluster controllers + summary: Lists cluster controllers (active replicas by default) operationId: listClusterControllers security: - authToken: [] + parameters: + - in: query + name: includeInactive + description: >- + When true, include inactive controller rows (historical replicas from + past rollouts). Default false returns only active replicas. + required: false + schema: + type: boolean + default: false responses: "200": description: Success @@ -2767,14 +2777,14 @@ paths: Pairing: user connects directly via WebSocket; Controller creates a per-session row in `MicroserviceExecSessions` and relays STDIN/STDOUT/STDERR by `sessionId`. - No prior REST enable step is required (Plan 17). Max **3** concurrent exec sessions + No prior REST enable step is required (Plan 17). Max **5** concurrent exec sessions per microservice (close code **1008** when quota exceeded). On connect, Controller sends **ACTIVATION** (type 5) with JSON `{ sessionId, microserviceUuid }` in the `data` field and top-level `sessionId`, followed by STDERR "waiting for agent…". Relay frames use `execId` equal to `sessionId`. - **HA (R112):** Multi-replica deployments require a **relay backend** selected at startup by **`nats.enabled`**: AMQP router queues when `false` (default), NATS Core pub/sub on the platform hub when `true`. Cross-replica sessions fail fast with close code **1013** when the active relay backend is unavailable. See `docs/operations/ws-sessions.md`. + **HA:** Multi-replica deployments require a **relay backend** selected at startup by **`nats.enabled`**: AMQP router queues when `false` (default), NATS Core pub/sub on the platform hub when `true`. Cross-replica sessions fail fast with close code **1013** when the active relay backend is unavailable. See `docs/operations/ws-sessions.md`. See `#/components/schemas/WsExecMessageTypes` and `#/components/schemas/WsCloseCodes`. operationId: userMicroserviceExecWebSocket @@ -2830,7 +2840,7 @@ paths: description: | Upgrades to binary MessagePack log stream (agent → user after pairing). - **RBAC:** `logs`. Max **3** concurrent user log WS per microservice. + **RBAC:** `logs`. Max **5** concurrent user log WS per microservice. Query params control tail behaviour (see parameters). Live relay only — no log persistence. operationId: userMicroserviceLogsWebSocket @@ -2922,7 +2932,7 @@ paths: tags: - WebSocketSessions summary: User fog (node) log streaming WebSocket - description: Node-level logs; **RBAC** `systemLogs`. Max **3** concurrent sessions per fog. + description: Node-level logs; **RBAC** `systemLogs`. Max **5** concurrent sessions per fog. operationId: userFogLogsWebSocket parameters: - in: path @@ -6056,6 +6066,406 @@ paths: description: RoleBinding Not Found "500": description: Internal Server Error + /network-topology/summary: + get: + tags: + - NetworkTopology + summary: Network topology summary counts + operationId: getNetworkTopologySummary + security: + - authToken: [] + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/NetworkTopologySummaryResponse" + "401": + description: Not Authorized + "500": + description: Internal Server Error + /network-topology/router/overview: + get: + tags: + - NetworkTopology + summary: Router topology overview + operationId: getRouterTopologyOverview + security: + - authToken: [] + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/RouterTopologyOverviewResponse" + "401": + description: Not Authorized + "500": + description: Internal Server Error + /network-topology/nats/overview: + get: + tags: + - NetworkTopology + summary: NATS topology overview + operationId: getNatsTopologyOverview + security: + - authToken: [] + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/NatsTopologyOverviewResponse" + "401": + description: Not Authorized + "500": + description: Internal Server Error + /network-topology/router/nodes: + get: + tags: + - NetworkTopology + summary: List router topology nodes + operationId: listRouterTopologyNodes + security: + - authToken: [] + parameters: + - $ref: "#/components/parameters/NetworkTopologyLimitParam" + - $ref: "#/components/parameters/NetworkTopologyOffsetParam" + - $ref: "#/components/parameters/NetworkTopologyRouterRoleParam" + - $ref: "#/components/parameters/NetworkTopologyDeploymentTargetParam" + - $ref: "#/components/parameters/NetworkTopologySearchParam" + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/RouterTopologyNodeListResponse" + "400": + description: Bad Request + "401": + description: Not Authorized + "500": + description: Internal Server Error + /network-topology/nats/nodes: + get: + tags: + - NetworkTopology + summary: List NATS topology nodes + operationId: listNatsTopologyNodes + security: + - authToken: [] + parameters: + - $ref: "#/components/parameters/NetworkTopologyLimitParam" + - $ref: "#/components/parameters/NetworkTopologyOffsetParam" + - $ref: "#/components/parameters/NetworkTopologyNatsRoleParam" + - $ref: "#/components/parameters/NetworkTopologyDeploymentTargetParam" + - $ref: "#/components/parameters/NetworkTopologySearchParam" + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/NatsTopologyNodeListResponse" + "400": + description: Bad Request + "401": + description: Not Authorized + "500": + description: Internal Server Error + /network-topology/router/nodes/{id}: + get: + tags: + - NetworkTopology + summary: Get a router topology node + operationId: getRouterTopologyNode + security: + - authToken: [] + parameters: + - in: path + name: id + required: true + schema: + type: string + description: Router node id (`default-router` or agent iofog UUID) + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/RouterTopologyNodeDetail" + "401": + description: Not Authorized + "404": + description: Not Found + "500": + description: Internal Server Error + /network-topology/nats/nodes/{id}: + get: + tags: + - NetworkTopology + summary: Get a NATS topology node + operationId: getNatsTopologyNode + security: + - authToken: [] + parameters: + - in: path + name: id + required: true + schema: + type: string + description: NATS node id (`default-nats-hub` or agent iofog UUID) + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/NatsTopologyNodeDetail" + "401": + description: Not Authorized + "404": + description: Not Found + "500": + description: Internal Server Error + /network-topology/router/nodes/{id}/connections: + get: + tags: + - NetworkTopology + summary: Get router node connections + operationId: getRouterTopologyNodeConnections + security: + - authToken: [] + parameters: + - in: path + name: id + required: true + schema: + type: string + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/TopologyNodeConnectionsResponse" + "401": + description: Not Authorized + "404": + description: Not Found + "500": + description: Internal Server Error + /network-topology/nats/nodes/{id}/connections: + get: + tags: + - NetworkTopology + summary: Get NATS node connections + operationId: getNatsTopologyNodeConnections + security: + - authToken: [] + parameters: + - in: path + name: id + required: true + schema: + type: string + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/TopologyNodeConnectionsResponse" + "401": + description: Not Authorized + "404": + description: Not Found + "500": + description: Internal Server Error + /network-topology/router/connections: + get: + tags: + - NetworkTopology + summary: List router topology connections + operationId: listRouterTopologyConnections + security: + - authToken: [] + parameters: + - $ref: "#/components/parameters/NetworkTopologyLimitParam" + - $ref: "#/components/parameters/NetworkTopologyOffsetParam" + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/RouterTopologyConnectionListResponse" + "400": + description: Bad Request + "401": + description: Not Authorized + "500": + description: Internal Server Error + /network-topology/nats/connections: + get: + tags: + - NetworkTopology + summary: List NATS topology connections + operationId: listNatsTopologyConnections + security: + - authToken: [] + parameters: + - $ref: "#/components/parameters/NetworkTopologyLimitParam" + - $ref: "#/components/parameters/NetworkTopologyOffsetParam" + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/NatsTopologyConnectionListResponse" + "400": + description: Bad Request + "401": + description: Not Authorized + "500": + description: Internal Server Error + /network-topology/router/subgraph: + get: + tags: + - NetworkTopology + summary: Get router topology subgraph + operationId: getRouterTopologySubgraph + security: + - authToken: [] + parameters: + - in: query + name: center + required: true + schema: + type: string + - in: query + name: depth + schema: + type: integer + minimum: 1 + maximum: 2 + default: 1 + - $ref: "#/components/parameters/NetworkTopologyLimitParam" + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/TopologySubgraphResponse" + "400": + description: Bad Request + "401": + description: Not Authorized + "404": + description: Not Found + "500": + description: Internal Server Error + /network-topology/nats/subgraph: + get: + tags: + - NetworkTopology + summary: Get NATS topology subgraph + operationId: getNatsTopologySubgraph + security: + - authToken: [] + parameters: + - in: query + name: center + required: true + schema: + type: string + - in: query + name: depth + schema: + type: integer + minimum: 1 + maximum: 2 + default: 1 + - $ref: "#/components/parameters/NetworkTopologyLimitParam" + responses: + "200": + description: Success + content: + application/json: + schema: + $ref: "#/components/schemas/TopologySubgraphResponse" + "400": + description: Bad Request + "401": + description: Not Authorized + "404": + description: Not Found + "500": + description: Internal Server Error + /router: + get: + tags: + - Router + summary: Gets the default network router configuration + operationId: getDefaultRouter + security: + - authToken: [] + responses: + "200": + description: Success + headers: + X-Timestamp: + description: FogController server timestamp + schema: + type: number + content: + application/json: + schema: + $ref: "#/components/schemas/DefaultRouterResponse" + "401": + description: Not Authorized + "404": + description: Default router not found + "500": + description: Internal Server Error + put: + tags: + - Router + summary: Creates or updates the default network router + operationId: upsertDefaultRouter + security: + - authToken: [] + requestBody: + required: true + content: + application/json: + schema: + $ref: "#/components/schemas/DefaultRouterUpsertRequest" + responses: + "200": + description: Success + headers: + X-Timestamp: + description: FogController server timestamp + schema: + type: number + content: + application/json: + schema: + $ref: "#/components/schemas/DefaultRouterRecordResponse" + "400": + description: Bad Request + "401": + description: Not Authorized + "500": + description: Internal Server Error /serviceaccounts: get: tags: @@ -7056,6 +7466,10 @@ tags: description: Manage RBAC roles - name: RoleBindings description: Manage RBAC role bindings + - name: Router + description: Manage the default router configuration + - name: NetworkTopology + description: Read-only router and NATS network topology for visualization - name: ServiceAccounts description: Manage RBAC service accounts servers: @@ -7070,6 +7484,51 @@ components: type: http scheme: bearer description: Edgelet fog provisioning token (agent routes) + parameters: + NetworkTopologyLimitParam: + in: query + name: limit + required: false + schema: + type: integer + minimum: 1 + maximum: 500 + default: 100 + NetworkTopologyOffsetParam: + in: query + name: offset + required: false + schema: + type: integer + minimum: 0 + default: 0 + NetworkTopologyDeploymentTargetParam: + in: query + name: deploymentTarget + required: false + schema: + type: string + enum: [kubernetes, remote, edgelet] + NetworkTopologySearchParam: + in: query + name: search + required: false + schema: + type: string + NetworkTopologyRouterRoleParam: + in: query + name: role + required: false + schema: + type: string + enum: [default, edge, interior] + NetworkTopologyNatsRoleParam: + in: query + name: role + required: false + schema: + type: string + enum: [hub, leaf, server] responses: AuthRateLimitExceeded: description: Too many authentication requests from this IP address @@ -7160,7 +7619,7 @@ components: codes: - { code: 1000, reason: Normal closure, when: Session ended cleanly } - { code: 1001, reason: Server draining, when: SIGTERM / k8s preStop (R85) } - - { code: 1008, reason: Policy violation, when: RBAC deny, exec/log quota (3 per MS), invalid tail params, pending timeout, sessionId mismatch } + - { code: 1008, reason: Policy violation, when: RBAC deny, exec/log quota (5 per MS), invalid tail params, pending timeout, sessionId mismatch } - { code: 1013, reason: Router unavailable for cross-replica session, when: AMQP router down (R84) } EventRecord: type: object @@ -10419,4 +10878,298 @@ components: properties: host: type: string - additionalProperties: false \ No newline at end of file + additionalProperties: false + DefaultRouterResponse: + type: object + properties: + host: + type: string + messagingPort: + type: integer + edgeRouterPort: + type: integer + nullable: true + interRouterPort: + type: integer + nullable: true + required: + - host + - messagingPort + DefaultRouterUpsertRequest: + type: object + required: + - host + properties: + host: + type: string + messagingPort: + type: integer + minimum: 1 + maximum: 65535 + description: Defaults to 5671 when omitted + edgeRouterPort: + type: integer + minimum: 1 + maximum: 65535 + description: Defaults to 45671 when omitted + interRouterPort: + type: integer + minimum: 1 + maximum: 65535 + description: Defaults to 55671 when omitted + additionalProperties: true + DefaultRouterRecordResponse: + type: object + properties: + id: + type: integer + isEdge: + type: boolean + messagingPort: + type: integer + edgeRouterPort: + type: integer + nullable: true + interRouterPort: + type: integer + nullable: true + host: + type: string + isDefault: + type: boolean + iofogUuid: + type: string + nullable: true + createdAt: + type: string + format: date-time + updatedAt: + type: string + format: date-time + required: + - id + - isEdge + - messagingPort + - host + - isDefault + NetworkTopologyDeploymentTarget: + type: string + enum: [kubernetes, remote, edgelet] + NetworkTopologyConnection: + type: object + properties: + id: + type: integer + source: + type: string + dest: + type: string + required: [id, source, dest] + NetworkTopologyNodeBase: + type: object + properties: + id: + type: string + iofogUuid: + type: string + nullable: true + fogName: + type: string + nullable: true + host: + type: string + nullable: true + deploymentTarget: + $ref: "#/components/schemas/NetworkTopologyDeploymentTarget" + displayName: + type: string + role: + type: string + mode: + type: string + required: [id, deploymentTarget, displayName, role, mode] + RouterTopologyNode: + allOf: + - $ref: "#/components/schemas/NetworkTopologyNodeBase" + NatsTopologyNode: + allOf: + - $ref: "#/components/schemas/NetworkTopologyNodeBase" + RouterTopologyNodeDetail: + allOf: + - $ref: "#/components/schemas/RouterTopologyNode" + - type: object + properties: + messagingPort: + type: integer + edgeRouterPort: + type: integer + nullable: true + interRouterPort: + type: integer + nullable: true + isDefault: + type: boolean + NatsTopologyNodeDetail: + allOf: + - $ref: "#/components/schemas/NatsTopologyNode" + - type: object + properties: + serverPort: + type: integer + nullable: true + leafPort: + type: integer + nullable: true + clusterPort: + type: integer + nullable: true + mqttPort: + type: integer + nullable: true + httpPort: + type: integer + nullable: true + jsStorageSize: + type: string + nullable: true + jsMemoryStoreSize: + type: string + nullable: true + isHub: + type: boolean + NetworkTopologySummaryResponse: + type: object + properties: + controlPlane: + type: string + enum: [kubernetes, remote] + router: + type: object + properties: + totalNodes: + type: integer + totalConnections: + type: integer + byRole: + type: object + additionalProperties: + type: integer + nats: + type: object + properties: + totalNodes: + type: integer + totalConnections: + type: integer + byRole: + type: object + additionalProperties: + type: integer + TopologySpokeGroup: + type: object + properties: + upstreamOf: + type: string + role: + type: string + count: + type: integer + required: [upstreamOf, role, count] + RouterTopologyOverviewResponse: + type: object + properties: + defaultNode: + $ref: "#/components/schemas/RouterTopologyNode" + nullable: true + interiorNodes: + type: array + items: + $ref: "#/components/schemas/RouterTopologyNode" + spokeGroups: + type: array + items: + $ref: "#/components/schemas/TopologySpokeGroup" + NatsTopologyOverviewResponse: + type: object + properties: + defaultNode: + $ref: "#/components/schemas/NatsTopologyNode" + nullable: true + serverNodes: + type: array + items: + $ref: "#/components/schemas/NatsTopologyNode" + spokeGroups: + type: array + items: + $ref: "#/components/schemas/TopologySpokeGroup" + PaginatedListMeta: + type: object + properties: + total: + type: integer + limit: + type: integer + offset: + type: integer + required: [total, limit, offset] + RouterTopologyNodeListResponse: + allOf: + - $ref: "#/components/schemas/PaginatedListMeta" + - type: object + properties: + nodes: + type: array + items: + $ref: "#/components/schemas/RouterTopologyNode" + NatsTopologyNodeListResponse: + allOf: + - $ref: "#/components/schemas/PaginatedListMeta" + - type: object + properties: + nodes: + type: array + items: + $ref: "#/components/schemas/NatsTopologyNode" + RouterTopologyConnectionListResponse: + allOf: + - $ref: "#/components/schemas/PaginatedListMeta" + - type: object + properties: + connections: + type: array + items: + $ref: "#/components/schemas/NetworkTopologyConnection" + NatsTopologyConnectionListResponse: + allOf: + - $ref: "#/components/schemas/PaginatedListMeta" + - type: object + properties: + connections: + type: array + items: + $ref: "#/components/schemas/NetworkTopologyConnection" + TopologyNodeConnectionsResponse: + type: object + properties: + upstream: + type: array + items: + $ref: "#/components/schemas/NetworkTopologyConnection" + downstream: + type: array + items: + $ref: "#/components/schemas/NetworkTopologyConnection" + TopologySubgraphResponse: + type: object + properties: + nodes: + type: array + items: + oneOf: + - $ref: "#/components/schemas/RouterTopologyNode" + - $ref: "#/components/schemas/NatsTopologyNode" + connections: + type: array + items: + $ref: "#/components/schemas/NetworkTopologyConnection" \ No newline at end of file diff --git a/package.json b/package.json index 048c2f59..53eda8fe 100644 --- a/package.json +++ b/package.json @@ -5,6 +5,8 @@ "main": "./src/main.js", "author": "Eclipse ioFog Project", "contributors": [ + "Emirhan Durmus ", + "Alpaslan Doğan ", "Kilton Hopkins ", "Saeid Rezaei Baghbidi", "Alexandre de Wergifosse", @@ -20,9 +22,7 @@ "Eugene Pankov", "Maksim Chepelev", "Tetiana Yatsiuk", - "Sergey Valevich", - "Emirhan Durmus ", - "Alpaslan Doğan " + "Sergey Valevich" ], "license": "EPL-2.0", "engines": { @@ -57,7 +57,9 @@ "test": "node scripts/run-test.js test", "test:all": "node scripts/run-test.js test-all", "test:k8s-client": "node scripts/run-test.js test test/integration/k8s-client-integration.test.js", + "test:integration:first-fog": "RUN_INTEGRATION=1 node scripts/run-test.js test test/src/integration/first-fog-reconcile-sqlite.test.js", "test:ws-load": "node test/load/ws-pairing-load.js", + "test:load:tx": "node test/load/transaction-safety-load.js --fogs 200 --soak-minutes 5", "precli-tests": "npm run lint", "cli-tests": "node scripts/run-test.js cli-tests", "precoverage": "npm run lint", diff --git a/scripts/build-console-dev.js b/scripts/build-console-dev.js index 118b572f..45235b97 100644 --- a/scripts/build-console-dev.js +++ b/scripts/build-console-dev.js @@ -9,7 +9,7 @@ const CONSOLE_DIR = path.join(DEV_DIR, 'console') const BUILD_OUT = path.join(CONSOLE_DIR, 'build') const REPO = process.env.EDGEOPS_CONSOLE_REPO || 'https://github.com/Datasance/edgeops-console' -const VERSION = process.env.EDGEOPS_CONSOLE_VERSION || 'v1.0.3' +const VERSION = process.env.EDGEOPS_CONSOLE_VERSION || 'v1.0.8' const FLAVOR = process.env.EDGEOPS_CONSOLE_FLAVOR || 'datasance' function normalizeTag (version) { diff --git a/scripts/check-dockerfile-digests.sh b/scripts/check-dockerfile-digests.sh new file mode 100755 index 00000000..01f0a282 --- /dev/null +++ b/scripts/check-dockerfile-digests.sh @@ -0,0 +1,249 @@ +#!/usr/bin/env bash +# Compare digest-pinned base images in a Dockerfile against registry tags via skopeo. +set -euo pipefail + +usage() { + cat <<'EOF' +Usage: check-dockerfile-digests.sh [OPTIONS] DOCKERFILE + +Check digest-pinned base images in a Dockerfile against registry tags. + +Options: + -h, --help Show this help + --min-archs N Minimum platform count for multi-arch (default: 2) + --require PLATFORMS Comma-separated required platforms (e.g. linux/amd64,linux/arm64) + +Environment: + MIN_ARCHES Same as --min-archs + REQUIRE_PLATFORMS Same as --require + +Requires: skopeo, jq +EOF +} + +DOCKERFILE="" +MIN_ARCHES="${MIN_ARCHES:-2}" +REQUIRE_PLATFORMS="${REQUIRE_PLATFORMS:-}" + +while [[ $# -gt 0 ]]; do + case "$1" in + -h|--help) usage; exit 0 ;; + --min-archs) MIN_ARCHES="$2"; shift 2 ;; + --require) REQUIRE_PLATFORMS="$2"; shift 2 ;; + -*) echo "error: unknown option $1" >&2; exit 1 ;; + *) + if [[ -n "$DOCKERFILE" ]]; then + echo "error: only one Dockerfile path allowed" >&2 + exit 1 + fi + DOCKERFILE="$1" + shift + ;; + esac +done + +DOCKERFILE="${DOCKERFILE:-Dockerfile}" + +if ! command -v skopeo >/dev/null 2>&1; then + cat >&2 <<'EOF' +error: skopeo is required but not found in PATH. + +Install: + macOS: brew install skopeo + Fedora: dnf install skopeo + Ubuntu: apt install skopeo + +Skopeo reads registry auth from ~/.docker/config.json (docker login / podman login). +EOF + exit 1 +fi + +if ! command -v jq >/dev/null 2>&1; then + echo "error: jq is required" >&2 + exit 1 +fi + +if [[ ! -f "$DOCKERFILE" ]]; then + echo "error: Dockerfile not found: $DOCKERFILE" >&2 + exit 1 +fi + +COMMENT_TAG_RE='^[[:space:]]*#[[:space:]]*(.+)[[:space:]]—[[:space:]]*pin manifest list digest' + +resolve_tag() { + local image_ref="$1" prev_line="$2" + + if [[ "$image_ref" == *:* ]]; then + echo "${image_ref##*:}" + return 0 + fi + + if [[ "$prev_line" =~ $COMMENT_TAG_RE ]]; then + local hinted="${BASH_REMATCH[1]}" + if [[ "$hinted" == *:* ]]; then + echo "${hinted##*:}" + else + echo "latest" + fi + return 0 + fi + + echo "latest" +} + +image_without_tag() { + local image_ref="$1" + if [[ "$image_ref" == *:* ]]; then + echo "${image_ref%%:*}" + else + echo "$image_ref" + fi +} + +platforms_from_raw() { + jq -r ' + if (.manifests // .Manifests) then + [(.manifests // .Manifests)[] + | (.platform // .Platform) + | select(.architecture != null and .architecture != "unknown") + | "\(.os)/\(.architecture)" + ] | unique | .[] + elif (.architecture // .Architecture) then + "\(.os // .Os)/\(.architecture // .Architecture)" + else + empty + end + ' +} + +is_index() { + case "$1" in + application/vnd.docker.distribution.manifest.list.v2+json|application/vnd.oci.image.index.v1+json) + return 0 + ;; + *) + return 1 + ;; + esac +} + +report_multi_arch() { + local label="$1" raw_file="$2" + local media_type platforms count + + media_type="$(jq -r '.mediaType // .MediaType // empty' "$raw_file")" + + if is_index "$media_type"; then + platforms="$(platforms_from_raw < "$raw_file" | sort -u)" + count="$(printf '%s\n' "$platforms" | sed '/^$/d' | wc -l | tr -d ' ')" + echo " ${label}: multi-arch yes (${count} platforms)" + while IFS= read -r platform; do + [[ -n "$platform" ]] && echo " - ${platform}" + done <<< "$platforms" + + if [[ "$count" -lt "$MIN_ARCHES" ]]; then + echo " WARNING: ${label} has fewer than ${MIN_ARCHES} platforms." + return 1 + fi + else + local single + single="$(platforms_from_raw < "$raw_file" | head -n1)" + echo " ${label}: multi-arch no (single platform: ${single:-unknown})" + return 1 + fi + + if [[ -n "$REQUIRE_PLATFORMS" ]]; then + IFS=',' read -ra required <<< "$REQUIRE_PLATFORMS" + for req in "${required[@]}"; do + req="$(echo "$req" | xargs)" + if ! printf '%s\n' "$platforms" | grep -qx "$req"; then + echo " WARNING: ${label} missing required platform: ${req}" + return 1 + fi + done + fi + + return 0 +} + +skopeo_tag_ref() { + local image_ref="$1" tag="$2" + local image + image="$(image_without_tag "$image_ref")" + echo "docker://${image}:${tag}" +} + +skopeo_digest_ref() { + local image_ref="$1" digest="$2" + echo "docker://$(image_without_tag "$image_ref")@${digest}" +} + +skopeo_digest() { + # Override host OS/arch so multi-arch tags resolve to the manifest-list digest on macOS too. + skopeo inspect "$1" \ + --override-os linux \ + --override-arch amd64 \ + --format '{{.Digest}}' +} + +skopeo_raw_to_file() { + local dest="$1" + skopeo inspect --raw "$2" > "$dest" +} + +exit_code=0 +seen=() +prev_line="" + +while IFS= read -r line || [[ -n "$line" ]]; do + if [[ "$line" =~ ^FROM[[:space:]]+([^[:space:]]+)@sha256:([a-f0-9]{64}) ]]; then + image_ref="${BASH_REMATCH[1]}" + pinned="sha256:${BASH_REMATCH[2]}" + + if [[ " ${seen[*]:-} " == *" ${image_ref} "* ]]; then + prev_line="$line" + continue + fi + seen+=("$image_ref") + + tag="$(resolve_tag "$image_ref" "$prev_line")" + tag_ref="$(skopeo_tag_ref "$image_ref" "$tag")" + digest_ref="$(skopeo_digest_ref "$image_ref" "$pinned")" + + echo "==> ${image_ref} (tag: ${tag})" + echo " pinned: ${pinned}" + + latest_digest="$(skopeo_digest "$tag_ref")" + echo " latest: ${latest_digest}" + + latest_raw="$(mktemp)" + pinned_raw="$(mktemp)" + skopeo_raw_to_file "$latest_raw" "$tag_ref" + skopeo_raw_to_file "$pinned_raw" "$digest_ref" + + if [[ "$pinned" == "$latest_digest" ]]; then + echo " status: OK" + else + echo " status: OUTDATED" + echo + echo " Suggestion — update Dockerfile pin to latest multi-arch manifest list:" + echo " FROM ${image_ref}@${latest_digest}" + echo + exit_code=1 + fi + + report_multi_arch "Pinned digest" "$pinned_raw" || exit_code=1 + report_multi_arch "Latest tag" "$latest_raw" || exit_code=1 + + rm -f "$latest_raw" "$pinned_raw" + echo + fi + prev_line="$line" +done < "$DOCKERFILE" + +if [[ "${#seen[@]}" -eq 0 ]]; then + echo "error: no digest-pinned FROM lines in ${DOCKERFILE}" >&2 + exit 1 +fi + +exit "$exit_code" diff --git a/scripts/test.js b/scripts/test.js index 2b1eb19c..69bb5240 100644 --- a/scripts/test.js +++ b/scripts/test.js @@ -13,6 +13,9 @@ function test (useReporter, extraArgs) { } options.env = setDbEnvVars(options.env) + if (process.env.RUN_INTEGRATION) { + options.env.RUN_INTEGRATION = process.env.RUN_INTEGRATION + } const mochaBin = require.resolve('mocha/bin/mocha.js') const mochaReporterOptions = '--reporter mocha-junit-reporter --reporter-options mochaFile=./unit-results.xml' diff --git a/src/config/config.yaml b/src/config/config.yaml index 3ee59f6b..53851641 100644 --- a/src/config/config.yaml +++ b/src/config/config.yaml @@ -23,15 +23,15 @@ server: timeout: 3600000 # Legacy idle fallback (ms); exec uses execMaxDurationMs maxConnections: 100 # Maximum connections per session cleanupInterval: 30000 # Session cleanup interval (30 seconds) - execPendingTimeoutMs: 60000 # Exec: user wait for agent (R81) - execMaxDurationMs: 28800000 # Exec: max active session 8h (R81) - execMaxConcurrentPerResource: 3 # Exec: max user WS per MS (R92) - logPendingTimeoutMs: 120000 # Log: user wait for agent (R82) - logIdleTimeoutMs: 7200000 # Log: idle session 2h (R82) - logMaxConcurrentPerResource: 3 # Log: max user WS per MS or fog (R82) - logTailMaxLines: 5000 # Log: tail query param max (R82) - replicaMaxConcurrentWs: 500 # Scale SLO per replica (R88) - drainTimeoutMs: 30000 # Graceful drain on SIGTERM/preStop (R85) + execPendingTimeoutMs: 60000 # Exec: user wait for agen + execMaxDurationMs: 28800000 # Exec: max active session 8h + execMaxConcurrentPerResource: 5 # Exec: max user WS per MS + logPendingTimeoutMs: 120000 # Log: user wait for agent + logIdleTimeoutMs: 7200000 # Log: idle session 2h + logMaxConcurrentPerResource: 5 # Log: max user WS per MS or fog + logTailMaxLines: 5000 # Log: tail query param max + replicaMaxConcurrentWs: 500 # Scale SLO per replica + drainTimeoutMs: 30000 # Graceful drain on SIGTERM/preStop relay: amqp: poolSize: 8 @@ -42,7 +42,7 @@ server: maxPendingMessages: 8192 publishTimeoutMs: 5000 ha: - crossReplicaRequiresAmqp: true # Cross-replica exec/log relay requires AMQP router (R84) + crossReplicaRequiresAmqp: true # Cross-replica exec/log relay requires AMQP router failFastOnRouterUnavailable: true security: maxConnectionsPerIp: 10 @@ -88,6 +88,7 @@ settings: natsReconcileWorkerIntervalSeconds: 3 # NATS reconcile worker poll interval in seconds (default: 3) fogPlatformReconcileWorkerIntervalSeconds: 3 # Platform reconcile worker poll interval in seconds (default: 3) fogPlatformReconcileTaskStalenessSeconds: 300 # Stale fog platform task reclaim in seconds (default: 300 = 5 minutes) + fogPlatformDeleteReconcileTaskStalenessSeconds: 60 # Stale delete task reclaim in seconds (default: 60) fogPlatformReconcileMaxAttempts: 10 # Permanent fail threshold for fog platform tasks (default: 10) fogPlatformReconcileBackoffBaseSeconds: 5 # Exponential backoff base for fog platform tasks (default: 5) fogPlatformSweepIntervalSeconds: 900 # Platform drift sweep interval in seconds (default: 900 = 15 minutes) @@ -95,12 +96,22 @@ settings: hubRouterConfigLockTimeoutSeconds: 120 # Hub router ConfigMap lock wait in seconds (default: 120) serviceLoadBalancerWatchTimeoutSeconds: 300 # LoadBalancer IP watch timeout in service reconcile (default: 300) jobStartupDelaySeconds: 3 # Delay before reconcile-heavy background jobs start (default: 3) + reconcileOutboxDrainerIntervalSeconds: 1 # Outbox drainer poll interval in seconds (default: 1) + reconcileOutboxDrainerBatchSize: 32 # Max outbox rows per drainer batch (default: 32) wsSessionReconcileIntervalSeconds: 60 # WS exec/log stale DB row reconcile interval (R89) + sqliteEnterpriseFogWarningThreshold: 50 # Log + metric when sqlite fog count exceeds (R124) + dbWriteQueueMaxDepth: 256 # SQLite write queue depth before error log (R123) + dbBusyRetryMaxAttempts: 8 # SQLITE_BUSY retry attempts per transaction (Plan 19) + dbBusyRetryBaseMs: 25 # Exponential backoff base for busy retry (Plan 19) # Database Configuration database: provider: sqlite # Database provider (sqlite/mysql/postgres) - # mysql: + mysql: + pool: + max: 10 + min: 0 + idle: 20000 # host: "" # MySQL host # port: 3306 # MySQL port # username: "" # MySQL username @@ -108,7 +119,11 @@ database: # databaseName: "" # MySQL database name # useSSL: false # Use SSL for MySQL connection # sslCA: "" # MySQL SSL CA in base64 encoded string - # postgres: + postgres: + pool: + max: 10 + min: 0 + idle: 20000 # host: "" # PostgreSQL host # port: 5432 # PostgreSQL port # username: "" # PostgreSQL username @@ -165,7 +180,7 @@ flavor: distribution: datasance rbacApiVersion: datasance.com/v3 serviceAnnotationTag: service.iofog.org/tag - componentLabelDomain: iofog.org/component + # componentLabelDomain: derived from distribution (datasance.com/component | iofog.org/component) # Bridge Ports Configuration for Services bridgePorts: diff --git a/src/config/env-mapping.js b/src/config/env-mapping.js index 31844774..999a79d2 100644 --- a/src/config/env-mapping.js +++ b/src/config/env-mapping.js @@ -68,6 +68,7 @@ module.exports = { CONTROLLER_CLEANUP_INTERVAL: 'settings.controllerCleanupInterval', FOG_PLATFORM_RECONCILE_WORKER_INTERVAL_SECONDS: 'settings.fogPlatformReconcileWorkerIntervalSeconds', FOG_PLATFORM_RECONCILE_TASK_STALENESS_SECONDS: 'settings.fogPlatformReconcileTaskStalenessSeconds', + FOG_PLATFORM_DELETE_RECONCILE_TASK_STALENESS_SECONDS: 'settings.fogPlatformDeleteReconcileTaskStalenessSeconds', FOG_PLATFORM_RECONCILE_MAX_ATTEMPTS: 'settings.fogPlatformReconcileMaxAttempts', FOG_PLATFORM_RECONCILE_BACKOFF_BASE_SECONDS: 'settings.fogPlatformReconcileBackoffBaseSeconds', FOG_PLATFORM_SWEEP_INTERVAL_SECONDS: 'settings.fogPlatformSweepIntervalSeconds', @@ -75,6 +76,12 @@ module.exports = { HUB_ROUTER_CONFIG_LOCK_TIMEOUT_SECONDS: 'settings.hubRouterConfigLockTimeoutSeconds', SERVICE_LOAD_BALANCER_WATCH_TIMEOUT_SECONDS: 'settings.serviceLoadBalancerWatchTimeoutSeconds', JOB_STARTUP_DELAY_SECONDS: 'settings.jobStartupDelaySeconds', + RECONCILE_OUTBOX_DRAINER_INTERVAL_SECONDS: 'settings.reconcileOutboxDrainerIntervalSeconds', + RECONCILE_OUTBOX_DRAINER_BATCH_SIZE: 'settings.reconcileOutboxDrainerBatchSize', + SQLITE_ENTERPRISE_FOG_WARNING_THRESHOLD: 'settings.sqliteEnterpriseFogWarningThreshold', + DB_WRITE_QUEUE_MAX_DEPTH: 'settings.dbWriteQueueMaxDepth', + DB_BUSY_RETRY_MAX_ATTEMPTS: 'settings.dbBusyRetryMaxAttempts', + DB_BUSY_RETRY_BASE_MS: 'settings.dbBusyRetryBaseMs', // Database Configuration DB_PROVIDER: 'database.provider', diff --git a/src/config/flavor.js b/src/config/flavor.js index 615473bf..8110ec6e 100644 --- a/src/config/flavor.js +++ b/src/config/flavor.js @@ -6,12 +6,28 @@ const DEFAULT_SERVICE_ANNOTATION_TAG = 'service.iofog.org/tag' const DEFAULT_COMPONENT_LABEL_DOMAIN = 'iofog.org/component' const DEFAULT_APP_LABEL = 'iofog' +const DISTRIBUTION_COMPONENT_LABEL = { + datasance: 'datasance.com/component', + iofog: 'iofog.org/component' +} + function getRbacApiVersion () { return process.env.RBAC_API_VERSION || config.get('flavor.rbacApiVersion', DEFAULT_RBAC_API_VERSION) } +function getConfiguredDistribution () { + if (process.env.CONTROLLER_DISTRIBUTION) { + return process.env.CONTROLLER_DISTRIBUTION + } + const fromConfig = config.get('flavor.distribution') + if (fromConfig != null && fromConfig !== '') { + return fromConfig + } + return null +} + function getControllerDistribution () { - return process.env.CONTROLLER_DISTRIBUTION || config.get('flavor.distribution', DEFAULT_CONTROLLER_DISTRIBUTION) + return getConfiguredDistribution() || DEFAULT_CONTROLLER_DISTRIBUTION } function getServiceAnnotationTag () { @@ -19,7 +35,18 @@ function getServiceAnnotationTag () { } function getComponentLabelKey () { - return process.env.COMPONENT_LABEL_DOMAIN || config.get('flavor.componentLabelDomain', DEFAULT_COMPONENT_LABEL_DOMAIN) + if (process.env.COMPONENT_LABEL_DOMAIN) { + return process.env.COMPONENT_LABEL_DOMAIN + } + const fromConfig = config.get('flavor.componentLabelDomain') + if (fromConfig != null && fromConfig !== '') { + return fromConfig + } + const distribution = getConfiguredDistribution() + if (distribution != null && DISTRIBUTION_COMPONENT_LABEL[distribution]) { + return DISTRIBUTION_COMPONENT_LABEL[distribution] + } + return DEFAULT_COMPONENT_LABEL_DOMAIN } function getAppLabelKey () { diff --git a/src/config/rbac-resources.yaml b/src/config/rbac-resources.yaml index 6c98ab84..515b1cad 100644 --- a/src/config/rbac-resources.yaml +++ b/src/config/rbac-resources.yaml @@ -248,6 +248,53 @@ resources: GET: [get] PUT: [update] + networkTopology: + basePath: /api/v3/network-topology + routes: + - path: /api/v3/network-topology/summary + methods: + GET: [list] + - path: /api/v3/network-topology/router/overview + methods: + GET: [list] + - path: /api/v3/network-topology/nats/overview + methods: + GET: [list] + - path: /api/v3/network-topology/router/nodes + methods: + GET: [list] + - path: /api/v3/network-topology/nats/nodes + methods: + GET: [list] + - path: /api/v3/network-topology/router/nodes/:id + methods: + GET: [get] + resourceNameParam: id + - path: /api/v3/network-topology/nats/nodes/:id + methods: + GET: [get] + resourceNameParam: id + - path: /api/v3/network-topology/router/nodes/:id/connections + methods: + GET: [get] + resourceNameParam: id + - path: /api/v3/network-topology/nats/nodes/:id/connections + methods: + GET: [get] + resourceNameParam: id + - path: /api/v3/network-topology/router/connections + methods: + GET: [list] + - path: /api/v3/network-topology/nats/connections + methods: + GET: [list] + - path: /api/v3/network-topology/router/subgraph + methods: + GET: [list] + - path: /api/v3/network-topology/nats/subgraph + methods: + GET: [list] + # NATS (granular resources) natsOperator: basePath: /api/v3/nats diff --git a/src/config/rbac-system-roles.js b/src/config/rbac-system-roles.js index e9051fea..3dbe4ee1 100644 --- a/src/config/rbac-system-roles.js +++ b/src/config/rbac-system-roles.js @@ -40,7 +40,7 @@ module.exports = { rules: [ { apiGroups: [''], - resources: ['microservices', 'systemMicroservices', 'fogs', 'applications', 'systemApplications', 'applicationTemplates', 'services', 'router', 'natsAccounts', 'natsUsers', 'natsAccountRules', 'natsUserRules', 'catalog', 'registries', 'secrets', 'configMaps', 'volumeMounts', 'tunnels', 'certificates', 'capabilities', 'cluster', 'serviceAccounts', 'events', 'users', 'authUsers', 'authGroups', 'config', 'controller', 'execSessions', 'systemExecSessions', 'logs', 'systemLogs'], + resources: ['microservices', 'systemMicroservices', 'fogs', 'applications', 'systemApplications', 'applicationTemplates', 'services', 'router', 'networkTopology', 'natsAccounts', 'natsUsers', 'natsAccountRules', 'natsUserRules', 'catalog', 'registries', 'secrets', 'configMaps', 'volumeMounts', 'tunnels', 'certificates', 'capabilities', 'cluster', 'serviceAccounts', 'events', 'users', 'authUsers', 'authGroups', 'config', 'controller', 'execSessions', 'systemExecSessions', 'logs', 'systemLogs'], verbs: ['*'] }, { @@ -67,7 +67,7 @@ module.exports = { }, { apiGroups: [''], - resources: ['fogs', 'router', 'tunnels', 'users', 'authUsers', 'authGroups', 'config', 'roles', 'roleBindings', 'systemMicroservices', 'systemApplications', 'systemExecSessions', 'systemLogs', 'cluster', 'natsOperator', 'natsBootstrap', 'natsHub'], + resources: ['fogs', 'router', 'networkTopology', 'tunnels', 'users', 'authUsers', 'authGroups', 'config', 'roles', 'roleBindings', 'systemMicroservices', 'systemApplications', 'systemExecSessions', 'systemLogs', 'cluster', 'natsOperator', 'natsBootstrap', 'natsHub'], verbs: ['get', 'list'] } ] @@ -84,7 +84,7 @@ module.exports = { rules: [ { apiGroups: [''], - resources: ['microservices', 'fogs', 'applications', 'systemMicroservices', 'systemApplications', 'applicationTemplates', 'services', 'router', 'natsOperator', 'natsBootstrap', 'natsHub', 'natsAccounts', 'natsUsers', 'natsAccountRules', 'natsUserRules', 'catalog', 'registries', 'secrets', 'configMaps', 'volumeMounts', 'certificates', 'capabilities', 'cluster', 'serviceAccounts', 'users', 'authUsers', 'authGroups', 'config', 'controller', 'roles', 'roleBindings'], + resources: ['microservices', 'fogs', 'applications', 'systemMicroservices', 'systemApplications', 'applicationTemplates', 'services', 'router', 'networkTopology', 'natsOperator', 'natsBootstrap', 'natsHub', 'natsAccounts', 'natsUsers', 'natsAccountRules', 'natsUserRules', 'catalog', 'registries', 'secrets', 'configMaps', 'volumeMounts', 'certificates', 'capabilities', 'cluster', 'serviceAccounts', 'users', 'authUsers', 'authGroups', 'config', 'controller', 'roles', 'roleBindings'], verbs: ['get', 'list'] } ] diff --git a/src/controllers/cluster-controller.js b/src/controllers/cluster-controller.js index 75277634..1700e3da 100644 --- a/src/controllers/cluster-controller.js +++ b/src/controllers/cluster-controller.js @@ -1,23 +1,25 @@ const ClusterControllerService = require('../services/cluster-controller-service') +const { parseBoolean } = require('../config/parse-boolean') const listClusterControllersEndPoint = async function (req) { - return ClusterControllerService.listClusterControllers(false) + const includeInactive = parseBoolean(req.query && req.query.includeInactive, false) + return ClusterControllerService.listClusterControllers(includeInactive) } const getClusterControllerEndPoint = async function (req) { const uuid = req.params.uuid - return ClusterControllerService.getClusterController(uuid, false) + return ClusterControllerService.getClusterController(uuid) } const updateClusterControllerEndPoint = async function (req) { const uuid = req.params.uuid const data = req.body - return ClusterControllerService.updateClusterController(uuid, data, false) + return ClusterControllerService.updateClusterController(uuid, data) } const deleteClusterControllerEndPoint = async function (req) { const uuid = req.params.uuid - return ClusterControllerService.deleteClusterController(uuid, false) + return ClusterControllerService.deleteClusterController(uuid) } module.exports = { diff --git a/src/controllers/network-topology-controller.js b/src/controllers/network-topology-controller.js new file mode 100644 index 00000000..7feb5444 --- /dev/null +++ b/src/controllers/network-topology-controller.js @@ -0,0 +1,69 @@ +const NetworkTopologyService = require('../services/network-topology-service') + +const getSummaryEndPoint = async function (req) { + return NetworkTopologyService.getSummary(req) +} + +const getRouterOverviewEndPoint = async function (req) { + return NetworkTopologyService.getRouterOverview(req) +} + +const getNatsOverviewEndPoint = async function (req) { + return NetworkTopologyService.getNatsOverview(req) +} + +const listRouterNodesEndPoint = async function (req) { + return NetworkTopologyService.listRouterNodes(req) +} + +const listNatsNodesEndPoint = async function (req) { + return NetworkTopologyService.listNatsNodes(req) +} + +const getRouterNodeEndPoint = async function (req) { + return NetworkTopologyService.getRouterNode(req) +} + +const getNatsNodeEndPoint = async function (req) { + return NetworkTopologyService.getNatsNode(req) +} + +const getRouterNodeConnectionsEndPoint = async function (req) { + return NetworkTopologyService.getRouterNodeConnections(req) +} + +const getNatsNodeConnectionsEndPoint = async function (req) { + return NetworkTopologyService.getNatsNodeConnections(req) +} + +const listRouterConnectionsEndPoint = async function (req) { + return NetworkTopologyService.listRouterConnections(req) +} + +const listNatsConnectionsEndPoint = async function (req) { + return NetworkTopologyService.listNatsConnections(req) +} + +const getRouterSubgraphEndPoint = async function (req) { + return NetworkTopologyService.getRouterSubgraph(req) +} + +const getNatsSubgraphEndPoint = async function (req) { + return NetworkTopologyService.getNatsSubgraph(req) +} + +module.exports = { + getSummaryEndPoint, + getRouterOverviewEndPoint, + getNatsOverviewEndPoint, + listRouterNodesEndPoint, + listNatsNodesEndPoint, + getRouterNodeEndPoint, + getNatsNodeEndPoint, + getRouterNodeConnectionsEndPoint, + getNatsNodeConnectionsEndPoint, + listRouterConnectionsEndPoint, + listNatsConnectionsEndPoint, + getRouterSubgraphEndPoint, + getNatsSubgraphEndPoint +} diff --git a/src/data/adapters/oidc-provider-adapter.js b/src/data/adapters/oidc-provider-adapter.js index e2a28464..9cc0358d 100644 --- a/src/data/adapters/oidc-provider-adapter.js +++ b/src/data/adapters/oidc-provider-adapter.js @@ -1,6 +1,7 @@ 'use strict' const { Op } = require('sequelize') +const { runInTransaction, PRIORITY_INTERACTIVE, PRIORITY_BACKGROUND } = require('../../helpers/transaction-runner') function isExpired (expiresAt) { return expiresAt && expiresAt <= new Date() @@ -24,97 +25,123 @@ class OidcProviderAdapter { const AuthOidcProviderState = this.getStateModel() const expiresAt = expiresIn ? new Date(Date.now() + expiresIn * 1000) : null - await AuthOidcProviderState.upsert({ - model: this.name, - recordId: id, - payload: JSON.stringify(payload), - expiresAt, - grantId: payload.grantId || null, - uid: payload.uid || null, - userCode: payload.userCode || null, - consumed: false, - consumedAt: null - }) + await runInTransaction(async (transaction) => { + await AuthOidcProviderState.upsert({ + model: this.name, + recordId: id, + payload: JSON.stringify(payload), + expiresAt, + grantId: payload.grantId || null, + uid: payload.uid || null, + userCode: payload.userCode || null, + consumed: false, + consumedAt: null + }, { transaction, conflictFields: ['model', 'record_id'] }) + }, { priority: PRIORITY_INTERACTIVE, label: 'oidc.adapter.upsert' }) } async find (id) { const AuthOidcProviderState = this.getStateModel() - const row = await AuthOidcProviderState.findOne({ - where: { - model: this.name, - recordId: id - } - }) - if (!row || isExpired(row.expiresAt)) { - return undefined - } + return runInTransaction(async (transaction) => { + const row = await AuthOidcProviderState.findOne({ + where: { + model: this.name, + recordId: id + }, + transaction + }) + + if (!row || isExpired(row.expiresAt)) { + return undefined + } - return rowToPayload(row) + return rowToPayload(row) + }, { priority: PRIORITY_INTERACTIVE, label: 'oidc.adapter.find' }) } async findByUserCode (userCode) { const AuthOidcProviderState = this.getStateModel() - const row = await AuthOidcProviderState.findOne({ - where: { - model: this.name, - userCode - } - }) - if (!row || isExpired(row.expiresAt)) { - return undefined - } + return runInTransaction(async (transaction) => { + const row = await AuthOidcProviderState.findOne({ + where: { + model: this.name, + userCode + }, + transaction + }) + + if (!row || isExpired(row.expiresAt)) { + return undefined + } - return rowToPayload(row) + return rowToPayload(row) + }, { priority: PRIORITY_INTERACTIVE, label: 'oidc.adapter.findByUserCode' }) } async findByUid (uid) { const AuthOidcProviderState = this.getStateModel() - const row = await AuthOidcProviderState.findOne({ - where: { - model: this.name, - uid - } - }) - if (!row || isExpired(row.expiresAt)) { - return undefined - } + return runInTransaction(async (transaction) => { + const row = await AuthOidcProviderState.findOne({ + where: { + model: this.name, + uid + }, + transaction + }) + + if (!row || isExpired(row.expiresAt)) { + return undefined + } - return rowToPayload(row) + return rowToPayload(row) + }, { priority: PRIORITY_INTERACTIVE, label: 'oidc.adapter.findByUid' }) } async consume (id) { const AuthOidcProviderState = this.getStateModel() - await AuthOidcProviderState.update({ - consumed: true, - consumedAt: new Date() - }, { - where: { - model: this.name, - recordId: id - } - }) + + await runInTransaction(async (transaction) => { + await AuthOidcProviderState.update({ + consumed: true, + consumedAt: new Date() + }, { + where: { + model: this.name, + recordId: id + }, + transaction + }) + }, { priority: PRIORITY_INTERACTIVE, label: 'oidc.adapter.consume' }) } async destroy (id) { const AuthOidcProviderState = this.getStateModel() - await AuthOidcProviderState.destroy({ - where: { - model: this.name, - recordId: id - } - }) + + await runInTransaction(async (transaction) => { + await AuthOidcProviderState.destroy({ + where: { + model: this.name, + recordId: id + }, + transaction + }) + }, { priority: PRIORITY_INTERACTIVE, label: 'oidc.adapter.destroy' }) } async revokeByGrantId (grantId) { const AuthOidcProviderState = this.getStateModel() - await AuthOidcProviderState.destroy({ - where: { - grantId - } - }) + + await runInTransaction(async (transaction) => { + await AuthOidcProviderState.destroy({ + where: { + grantId + }, + transaction + }) + }, { priority: PRIORITY_INTERACTIVE, label: 'oidc.adapter.revokeByGrantId' }) } } @@ -124,13 +151,17 @@ function createOidcProviderAdapterFactory (getStateModel) { async function purgeExpiredOidcProviderStates (getStateModel) { const AuthOidcProviderState = getStateModel() - await AuthOidcProviderState.destroy({ - where: { - expiresAt: { - [Op.lte]: new Date() - } - } - }) + + await runInTransaction(async (transaction) => { + await AuthOidcProviderState.destroy({ + where: { + expiresAt: { + [Op.lte]: new Date() + } + }, + transaction + }) + }, { priority: PRIORITY_BACKGROUND, label: 'oidc.adapter.purgeExpired' }) } module.exports = { diff --git a/src/data/managers/base-manager.js b/src/data/managers/base-manager.js index f7e7f325..2f3660e2 100644 --- a/src/data/managers/base-manager.js +++ b/src/data/managers/base-manager.js @@ -1,7 +1,6 @@ const AppHelper = require('../../helpers/app-helper') const Errors = require('../../helpers/errors') -// TODO [when transactions concurrency issue fixed]: Transactions should be used always module.exports = class BaseManager { getEntity () { throw new Error('Not implemented getEntity method in manager') @@ -12,24 +11,20 @@ module.exports = class BaseManager { object = object || {} - const options = transaction.fakeTransaction - ? { - where: object - } - : { - where: object, - transaction - } - - return this.getEntity().findAll(options) + return this.getEntity().findAll({ + where: object, + transaction + }) } findAllWithAttributes (where, attributes, transaction) { + AppHelper.checkTransaction(transaction) + return this.getEntity().findAll({ where, - attributes - }, - { transaction }) + attributes, + transaction + }) } async findOne (object, transaction) { @@ -37,36 +32,22 @@ module.exports = class BaseManager { object = object || {} - const options = transaction.fakeTransaction - ? { - where: object - } - : { - where: object, - transaction - } - - return this.getEntity().findOne(options) + return this.getEntity().findOne({ + where: object, + transaction + }) } async create (object, transaction) { AppHelper.checkTransaction(transaction) - const options = transaction.fakeTransaction - ? {} - : { transaction } - - return this.getEntity().create(object, options) + return this.getEntity().create(object, { transaction }) } async bulkCreate (arr, transaction) { AppHelper.checkTransaction(transaction) - const options = transaction.fakeTransaction - ? {} - : { transaction } - - return this.getEntity().bulkCreate(arr, options) + return this.getEntity().bulkCreate(arr, { transaction }) } async delete (data, transaction) { @@ -74,16 +55,10 @@ module.exports = class BaseManager { data = data || {} - const options = transaction.fakeTransaction - ? { - where: data - } - : { - where: data, - transaction - } - - return this.getEntity().destroy(options) + return this.getEntity().destroy({ + where: data, + transaction + }) } async update (whereData, newData, transaction) { @@ -91,26 +66,16 @@ module.exports = class BaseManager { whereData = whereData || {} - const options = transaction.fakeTransaction - ? { - where: whereData - } - : { - where: whereData, - transaction - } - - return this.getEntity().update(newData, options) + return this.getEntity().update(newData, { + where: whereData, + transaction + }) } async upsert (data, transaction) { AppHelper.checkTransaction(transaction) - const options = transaction.fakeTransaction - ? {} - : { transaction } - - return this.getEntity().upsert(data, options) + return this.getEntity().upsert(data, { transaction }) } async updateOrCreate (whereData, data, transaction) { diff --git a/src/data/managers/certificate-manager.js b/src/data/managers/certificate-manager.js index f55bf126..933a3c31 100644 --- a/src/data/managers/certificate-manager.js +++ b/src/data/managers/certificate-manager.js @@ -11,11 +11,9 @@ class CertificateManager extends BaseManager { } async createCertificateRecord (certData, transaction) { - // First find the secret by name to get its ID const secret = await SecretManager.findOne({ name: certData.name }, transaction) if (secret) { - // Link the certificate to the secret certData.secretId = secret.id } @@ -25,17 +23,11 @@ class CertificateManager extends BaseManager { async findCertificatesByCA (caId, transaction) { AppHelper.checkTransaction(transaction) - const options = transaction.fakeTransaction - ? { - where: { signedById: caId }, - include: ['secret'] - } - : { - where: { signedById: caId }, - include: ['secret'], - transaction - } - return this.getEntity().findAll(options) + return this.getEntity().findAll({ + where: { signedById: caId }, + include: ['secret'], + transaction + }) } async findExpiringCertificates (days = 30, transaction) { @@ -44,65 +36,42 @@ class CertificateManager extends BaseManager { const expirationDate = new Date() expirationDate.setDate(expirationDate.getDate() + days) - const options = transaction.fakeTransaction - ? { - where: { - validTo: { [Op.lt]: expirationDate } - }, - include: ['signingCA'] - } - : { - where: { - validTo: { [Op.lt]: expirationDate } - }, - include: ['signingCA'], - transaction - } - return this.getEntity().findAll(options) + return this.getEntity().findAll({ + where: { + validTo: { [Op.lt]: expirationDate } + }, + include: ['signingCA'], + transaction + }) } async findCertificateByName (name, transaction) { AppHelper.checkTransaction(transaction) - const options = transaction.fakeTransaction - ? { - where: { name }, - include: ['signingCA', 'secret'] - } - : { - where: { name }, - include: ['signingCA', 'secret'], - transaction - } - return this.getEntity().findOne(options) + return this.getEntity().findOne({ + where: { name }, + include: ['signingCA', 'secret'], + transaction + }) } async findAllCAs (transaction) { AppHelper.checkTransaction(transaction) - const options = transaction.fakeTransaction - ? { - where: { isCA: true }, - include: ['secret'] - } - : { - where: { isCA: true }, - include: ['secret'], - transaction - } - return this.getEntity().findAll(options) + return this.getEntity().findAll({ + where: { isCA: true }, + include: ['secret'], + transaction + }) } async findAllCertificates (transaction) { AppHelper.checkTransaction(transaction) - const options = transaction.fakeTransaction - ? { include: ['signingCA', 'secret'] } - : { - include: ['signingCA', 'secret'], - transaction - } - return this.getEntity().findAll(options) + return this.getEntity().findAll({ + include: ['signingCA', 'secret'], + transaction + }) } async deleteCertificate (name, transaction) { @@ -112,20 +81,15 @@ class CertificateManager extends BaseManager { async updateCertificate (id, updates, transaction) { AppHelper.checkTransaction(transaction) - // Find existing certificate - const options = transaction.fakeTransaction - ? { where: { id } } - : { - where: { id }, - transaction - } - const cert = await this.getEntity().findOne(options) + const cert = await this.getEntity().findOne({ + where: { id }, + transaction + }) if (!cert) { throw new Error(`Certificate with id ${id} not found`) } - // Update certificate return this.update({ id }, updates, transaction) } @@ -134,38 +98,24 @@ class CertificateManager extends BaseManager { const currentDate = new Date() - const options = transaction.fakeTransaction - ? { - where: { - validTo: { [Op.lt]: currentDate } - }, - include: ['signingCA', 'secret'] - } - : { - where: { - validTo: { [Op.lt]: currentDate } - }, - include: ['signingCA', 'secret'], - transaction - } - return this.getEntity().findAll(options) + return this.getEntity().findAll({ + where: { + validTo: { [Op.lt]: currentDate } + }, + include: ['signingCA', 'secret'], + transaction + }) } async getCertificateChain (certId, transaction) { AppHelper.checkTransaction(transaction) const chain = [] - const options = transaction.fakeTransaction - ? { - where: { id: certId }, - include: ['signingCA', 'secret'] - } - : { - where: { id: certId }, - include: ['signingCA', 'secret'], - transaction - } - let currentCert = await this.getEntity().findOne(options) + let currentCert = await this.getEntity().findOne({ + where: { id: certId }, + include: ['signingCA', 'secret'], + transaction + }) if (!currentCert) { return chain @@ -173,12 +123,12 @@ class CertificateManager extends BaseManager { chain.push(currentCert) - // Traverse up the chain of signing CAs while (currentCert.signingCA) { - const parentOptions = transaction.fakeTransaction - ? { where: { id: currentCert.signedById }, include: ['signingCA', 'secret'] } - : { where: { id: currentCert.signedById }, include: ['signingCA', 'secret'], transaction } - currentCert = await this.getEntity().findOne(parentOptions) + currentCert = await this.getEntity().findOne({ + where: { id: currentCert.signedById }, + include: ['signingCA', 'secret'], + transaction + }) if (currentCert) { chain.push(currentCert) @@ -193,48 +143,30 @@ class CertificateManager extends BaseManager { async findCertificatesForRenewal (days = 30, transaction) { AppHelper.checkTransaction(transaction) - // Calculate the date range - we want certificates that expire between now and (now + days) const now = new Date() const futureDate = new Date() futureDate.setDate(futureDate.getDate() + days) - const options = transaction.fakeTransaction - ? { - where: { - validTo: { - [Op.gt]: now, - [Op.lt]: futureDate - } - }, - include: ['signingCA', 'secret'] - } - : { - where: { - validTo: { - [Op.gt]: now, - [Op.lt]: futureDate - } - }, - include: ['signingCA', 'secret'], - transaction + return this.getEntity().findAll({ + where: { + validTo: { + [Op.gt]: now, + [Op.lt]: futureDate } - return this.getEntity().findAll(options) + }, + include: ['signingCA', 'secret'], + transaction + }) } async getCertificateChildren (caId, transaction) { AppHelper.checkTransaction(transaction) - const options = transaction.fakeTransaction - ? { - where: { signedById: caId }, - include: ['secret'] - } - : { - where: { signedById: caId }, - include: ['secret'], - transaction - } - return this.getEntity().findAll(options) + return this.getEntity().findAll({ + where: { signedById: caId }, + include: ['secret'], + transaction + }) } } diff --git a/src/data/managers/config-map-manager.js b/src/data/managers/config-map-manager.js index 0b61b396..a96bbeb6 100644 --- a/src/data/managers/config-map-manager.js +++ b/src/data/managers/config-map-manager.js @@ -1,5 +1,6 @@ const BaseManager = require('./base-manager') -const SecretHelper = require('../../helpers/secret-helper') +const vaultManager = require('../../vault/vault-manager') +const { scheduleVaultDeleteAfterCommit } = require('../../helpers/vault-transaction-helper') const models = require('../models') const ConfigMap = models.ConfigMap @@ -31,8 +32,7 @@ class ConfigMapManager extends BaseManager { existing.useVault = useVault !== null ? useVault : existing.useVault // Save the instance - this triggers beforeSave hook which handles encryption/vault - const options = transaction.fakeTransaction ? {} : { transaction } - await existing.save(options) + await existing.save({ transaction }) return existing } @@ -61,16 +61,12 @@ class ConfigMapManager extends BaseManager { } async deleteConfigMap (name, transaction) { - // Get ConfigMap to check if it's in vault const configMap = await this.findOne({ name }, transaction) - if (configMap && configMap.useVault) { - // Delete from vault if it was stored there - const vaultManager = require('../../vault/vault-manager') - if (vaultManager.isEnabled()) { - await SecretHelper.deleteSecret(name, 'configmap') - } + const useVault = configMap && configMap.useVault + await this.delete({ name }, transaction) + if (useVault && vaultManager.isEnabled()) { + scheduleVaultDeleteAfterCommit(transaction, name, 'configmap') } - return this.delete({ name }, transaction) } } diff --git a/src/data/managers/event-manager.js b/src/data/managers/event-manager.js index 302c0b9c..3c2abd64 100644 --- a/src/data/managers/event-manager.js +++ b/src/data/managers/event-manager.js @@ -85,9 +85,7 @@ class EventManager extends BaseManager { offset: Number(offset) // Ensure it's a number } - if (!transaction.fakeTransaction) { - options.transaction = transaction - } + options.transaction = transaction const { count, rows } = await Event.findAndCountAll(options) @@ -126,9 +124,7 @@ class EventManager extends BaseManager { } // If days = 0, where clause is empty, so all events will be deleted - if (!transaction.fakeTransaction) { - options.transaction = transaction - } + options.transaction = transaction const deletedCount = await Event.destroy(options) return deletedCount diff --git a/src/data/managers/fog-platform-reconcile-task-manager.js b/src/data/managers/fog-platform-reconcile-task-manager.js index 820b655c..8fbcee3f 100644 --- a/src/data/managers/fog-platform-reconcile-task-manager.js +++ b/src/data/managers/fog-platform-reconcile-task-manager.js @@ -1,13 +1,18 @@ const BaseManager = require('./base-manager') const models = require('../models') const config = require('../../config') -const databaseProvider = require('../providers/database-factory') const { Op } = require('sequelize') const { FOG_PLATFORM_REASONS } = require('../../schemas/fog-platform-spec') const { withDbBusyRetry } = require('../../helpers/db-busy-retry') +const { claimNextReconcileTask } = require('../../helpers/db-dialect') const ACTIVE_STATUSES = ['pending', 'in_progress'] +const FOG_TASK_SELECT_SQL = `SELECT id, fog_uuid AS fogUuid, reason, spec_generation AS specGeneration, + status, leader_uuid AS leaderUuid, claimed_at AS claimedAt, next_attempt_at AS nextAttemptAt, + attempts, last_error AS lastError, created_at AS createdAt, updated_at AS updatedAt + FROM :table` + class FogPlatformReconcileTaskManager extends BaseManager { getEntity () { return models.FogPlatformReconcileTask @@ -18,12 +23,6 @@ class FogPlatformReconcileTaskManager extends BaseManager { } async enqueueFogPlatformReconcileTask (options = {}, transaction) { - if (transaction.fakeTransaction) { - return databaseProvider.sequelize.transaction((t) => - this.enqueueFogPlatformReconcileTask(options, t) - ) - } - const fogUuid = options.fogUuid if (!fogUuid) { throw new Error('fogUuid is required to enqueue fog platform reconcile task') @@ -42,6 +41,20 @@ class FogPlatformReconcileTaskManager extends BaseManager { }) if (existing) { + if (reason === 'delete' && existing.status === 'in_progress') { + await Entity.update({ + reason: 'delete', + specGeneration, + status: 'pending', + leaderUuid: null, + claimedAt: null, + nextAttemptAt: null, + attempts: 0, + lastError: null + }, { where: { id: existing.id }, transaction }) + return this.findOne({ id: existing.id }, transaction) + } + const update = { specGeneration } if (reason === 'delete' || existing.reason !== 'delete') { update.reason = reason @@ -68,50 +81,24 @@ class FogPlatformReconcileTaskManager extends BaseManager { } async _claimNextFogTaskInternal (controllerUuid, stalenessSeconds) { - const sequelize = databaseProvider.sequelize const T = stalenessSeconds != null ? stalenessSeconds : config.get('settings.fogPlatformReconcileTaskStalenessSeconds', 300) + const deleteT = config.get('settings.fogPlatformDeleteReconcileTaskStalenessSeconds', 60) const staleThreshold = new Date(Date.now() - T * 1000) - const Entity = this.getEntity() + const deleteStaleThreshold = new Date(Date.now() - deleteT * 1000) const now = new Date() - return sequelize.transaction(async (transaction) => { - const task = await Entity.findOne({ - where: { - status: { [Op.in]: ACTIVE_STATUSES }, - [Op.or]: [ - { nextAttemptAt: null }, - { nextAttemptAt: { [Op.lte]: now } } - ], - [Op.and]: [{ - [Op.or]: [ - { leaderUuid: null }, - { claimedAt: { [Op.lt]: staleThreshold } } - ] - }] - }, - order: [['id', 'ASC']], - limit: 1, - transaction - }) - if (!task) return null - - const [affected] = await Entity.update( - { leaderUuid: controllerUuid, claimedAt: new Date(), status: 'in_progress' }, - { - where: { - id: task.id, - [Op.or]: [ - { leaderUuid: null }, - { claimedAt: { [Op.lt]: staleThreshold } } - ] - }, - transaction - } - ) - if (affected === 0) return null - return this.findOne({ id: task.id }, transaction) + return claimNextReconcileTask({ + Entity: this.getEntity(), + controllerUuid, + staleThreshold, + deleteStaleThreshold, + now, + activeStatuses: ACTIVE_STATUSES, + includeNextAttemptFilter: true, + selectSql: FOG_TASK_SELECT_SQL, + reloadTask: (id, transaction) => this.findOne({ id }, transaction) }) } diff --git a/src/data/managers/fog-used-token-manager.js b/src/data/managers/fog-used-token-manager.js index aa534227..65961669 100644 --- a/src/data/managers/fog-used-token-manager.js +++ b/src/data/managers/fog-used-token-manager.js @@ -1,19 +1,13 @@ const models = require('../models') const logger = require('../../logger') +const AppHelper = require('../../helpers/app-helper') const { Op } = require('sequelize') class FogUsedTokenManager { - /** - * Store a JTI (JWT ID) to mark it as used - * @param {string} jti - The JWT ID - * @param {string} fogUuid - The UUID of the fog node - * @param {number} exp - The expiration timestamp - * @param {Object} transaction - Sequelize transaction - * @returns {Promise} - */ static async storeJti (jti, fogUuid, exp, transaction) { + AppHelper.checkTransaction(transaction) + try { - // Input validation if (!jti || typeof jti !== 'string') { throw new Error('JTI must be a non-empty string') } @@ -21,27 +15,17 @@ class FogUsedTokenManager { throw new Error('Fog UUID must be a non-empty string') } - // Ensure exp is a valid integer (Unix timestamp) const expiryTime = parseInt(exp, 10) if (isNaN(expiryTime) || expiryTime <= 0) { throw new Error('Expiration timestamp must be a positive integer') } - // Prepare the data object - const tokenData = { + await models.FogUsedToken.create({ jti, iofogUuid: fogUuid, expiryTime - } - - // Create the record with or without transaction - if (!transaction || transaction.fakeTransaction) { - await models.FogUsedToken.create(tokenData) - } else { - await models.FogUsedToken.create(tokenData, { transaction }) - } + }, { transaction }) } catch (error) { - // Check if it's a duplicate JTI error if (error.name === 'SequelizeUniqueConstraintError' && error.fields && error.fields.jti) { logger.warn(`JTI already exists: ${jti}`) throw new Error('JWT token already used') @@ -52,27 +36,14 @@ class FogUsedTokenManager { } } - /** - * Check if a JTI has already been used - * @param {string} jti - The JWT ID to check - * @param {Object} transaction - Sequelize transaction - * @returns {Promise} True if the JTI has been used, false otherwise - */ static async isJtiUsed (jti, transaction) { + AppHelper.checkTransaction(transaction) + try { - let token - if (!transaction || transaction.fakeTransaction) { - // If no transaction or fake transaction, query without transaction - token = await models.FogUsedToken.findOne({ - where: { jti } - }) - } else { - // Use the provided transaction - token = await models.FogUsedToken.findOne({ - where: { jti }, - transaction - }) - } + const token = await models.FogUsedToken.findOne({ + where: { jti }, + transaction + }) return !!token } catch (error) { logger.error(`Failed to check JTI: ${error.message}`) @@ -80,19 +51,18 @@ class FogUsedTokenManager { } } - /** - * Clean up expired JTIs - * @returns {Promise} Number of deleted tokens - */ - static async cleanupExpiredJtis () { + static async cleanupExpiredJtis (transaction) { + AppHelper.checkTransaction(transaction) + try { - const now = Math.floor(Date.now() / 1000) // Convert to Unix timestamp (seconds) + const now = Math.floor(Date.now() / 1000) const result = await models.FogUsedToken.destroy({ where: { expiryTime: { [Op.lt]: now } - } + }, + transaction }) logger.debug(`Cleaned up ${result} expired JTIs`) return result diff --git a/src/data/managers/hub-router-config-lock-manager.js b/src/data/managers/hub-router-config-lock-manager.js index 6b7d64ea..687fcff0 100644 --- a/src/data/managers/hub-router-config-lock-manager.js +++ b/src/data/managers/hub-router-config-lock-manager.js @@ -1,6 +1,5 @@ const BaseManager = require('./base-manager') const models = require('../models') -const databaseProvider = require('../providers/database-factory') const config = require('../../config') const LOCK_ROW_ID = 1 @@ -10,12 +9,6 @@ class HubRouterConfigLockManager extends BaseManager { return models.HubRouterConfigLock } - _getModelOptions (transaction) { - return transaction && transaction.fakeTransaction - ? {} - : { transaction } - } - _isUniqueConstraintError (error) { return error && error.name === 'SequelizeUniqueConstraintError' } @@ -41,12 +34,6 @@ class HubRouterConfigLockManager extends BaseManager { } async tryAcquire (controllerUuid, timeoutSeconds, transaction) { - if (transaction.fakeTransaction) { - return databaseProvider.sequelize.transaction((t) => - this.tryAcquire(controllerUuid, timeoutSeconds, t) - ) - } - await this.initializeLock(transaction) const stalenessSeconds = this._getStalenessSeconds(timeoutSeconds) diff --git a/src/data/managers/iofog-manager.js b/src/data/managers/iofog-manager.js index 988a0a78..f328640f 100644 --- a/src/data/managers/iofog-manager.js +++ b/src/data/managers/iofog-manager.js @@ -62,14 +62,14 @@ class FogManager extends BaseManager { }) } - // no transaction required here, used by agent-last-active decorator - updateLastActive (uuid, timestamp) { + updateLastActive (uuid, timestamp, transaction) { return Fog.update({ lastActive: timestamp }, { where: { uuid - } + }, + transaction }) } } diff --git a/src/data/managers/iofog-public-key-manager.js b/src/data/managers/iofog-public-key-manager.js index 7e88a80e..ae5069a9 100644 --- a/src/data/managers/iofog-public-key-manager.js +++ b/src/data/managers/iofog-public-key-manager.js @@ -7,71 +7,40 @@ class FogPublicKeyManager extends BaseManager { return FogPublicKey } - // Find public key by fog UUID findByFogUuid (fogUuid, transaction) { - const options = transaction.fakeTransaction - ? { - where: { - iofogUuid: fogUuid - } - } - : { - where: { - iofogUuid: fogUuid - }, - transaction - } - - return FogPublicKey.findOne(options) + return FogPublicKey.findOne({ + where: { + iofogUuid: fogUuid + }, + transaction + }) } - // Update or create public key for a fog updateOrCreate (fogUuid, publicKey, transaction) { - const options = transaction.fakeTransaction - ? { - where: { - iofogUuid: fogUuid - } - } - : { + return FogPublicKey.findOne({ + where: { + iofogUuid: fogUuid + }, + transaction + }).then((existingKey) => { + if (existingKey) { + return FogPublicKey.update({ + publicKey + }, { where: { iofogUuid: fogUuid }, transaction - } - - return FogPublicKey.findOne(options).then((existingKey) => { - if (existingKey) { - const updateOptions = transaction.fakeTransaction - ? { - where: { - iofogUuid: fogUuid - } - } - : { - where: { - iofogUuid: fogUuid - }, - transaction - } - - return FogPublicKey.update({ - publicKey - }, updateOptions) - } else { - const createOptions = transaction.fakeTransaction - ? {} - : { transaction } - - return FogPublicKey.create({ - iofogUuid: fogUuid, - publicKey - }, createOptions) + }) } + + return FogPublicKey.create({ + iofogUuid: fogUuid, + publicKey + }, { transaction }) }) } - // Delete public key by fog UUID deleteByFogUuid (fogUuid, transaction) { return this.delete({ iofogUuid: fogUuid }, transaction) } diff --git a/src/data/managers/nats-connection-manager.js b/src/data/managers/nats-connection-manager.js index 94d6fa46..83346863 100644 --- a/src/data/managers/nats-connection-manager.js +++ b/src/data/managers/nats-connection-manager.js @@ -22,8 +22,9 @@ class NatsConnectionManager extends BaseManager { required: true } ], - where - }, { transaction }) + where, + transaction + }) } } diff --git a/src/data/managers/nats-instance-manager.js b/src/data/managers/nats-instance-manager.js index 2703185e..29eabb5b 100644 --- a/src/data/managers/nats-instance-manager.js +++ b/src/data/managers/nats-instance-manager.js @@ -8,7 +8,10 @@ class NatsInstanceManager extends BaseManager { } findByFog (iofogUuid, transaction) { - return NatsInstance.findOne({ where: { iofogUuid } }, { transaction }) + return NatsInstance.findOne({ + where: { iofogUuid }, + transaction + }) } } diff --git a/src/data/managers/nats-reconcile-task-manager.js b/src/data/managers/nats-reconcile-task-manager.js index 89569e1b..3df3adfb 100644 --- a/src/data/managers/nats-reconcile-task-manager.js +++ b/src/data/managers/nats-reconcile-task-manager.js @@ -1,9 +1,16 @@ const BaseManager = require('./base-manager') const models = require('../models') const config = require('../../config') -const databaseProvider = require('../providers/database-factory') -const { Op } = require('sequelize') const { withDbBusyRetry } = require('../../helpers/db-busy-retry') +const { claimNextReconcileTask } = require('../../helpers/db-dialect') + +const ACTIVE_STATUSES = ['pending', 'in_progress'] + +const NATS_TASK_SELECT_SQL = `SELECT id, reason, application_id AS applicationId, + account_rule_id AS accountRuleId, user_rule_id AS userRuleId, fog_uuids AS fogUuids, + status, leader_uuid AS leaderUuid, claimed_at AS claimedAt, + created_at AS createdAt, updated_at AS updatedAt + FROM :table` class NatsReconcileTaskManager extends BaseManager { getEntity () { @@ -15,39 +22,19 @@ class NatsReconcileTaskManager extends BaseManager { } async _claimNextInternal (controllerUuid, stalenessSeconds) { - const sequelize = databaseProvider.sequelize const T = stalenessSeconds != null ? stalenessSeconds : config.get('settings.natsReconcileTaskStalenessSeconds', 900) const staleThreshold = new Date(Date.now() - T * 1000) - const Entity = this.getEntity() - return sequelize.transaction(async (transaction) => { - const task = await Entity.findOne({ - where: { - status: { [Op.in]: ['pending', 'in_progress'] }, - [Op.or]: [ - { leaderUuid: null }, - { claimedAt: { [Op.lt]: staleThreshold } } - ] - }, - order: [['id', 'ASC']], - limit: 1, - transaction - }) - if (!task) return null - const [affected] = await Entity.update( - { leaderUuid: controllerUuid, claimedAt: new Date(), status: 'in_progress' }, - { - where: { - id: task.id, - [Op.or]: [ - { leaderUuid: null }, - { claimedAt: { [Op.lt]: staleThreshold } } - ] - }, - transaction - } - ) - if (affected === 0) return null - return this.findOne({ id: task.id }, transaction) + const now = new Date() + + return claimNextReconcileTask({ + Entity: this.getEntity(), + controllerUuid, + staleThreshold, + now, + activeStatuses: ACTIVE_STATUSES, + includeNextAttemptFilter: false, + selectSql: NATS_TASK_SELECT_SQL, + reloadTask: (id, transaction) => this.findOne({ id }, transaction) }) } } diff --git a/src/data/managers/rbac-cache-version-manager.js b/src/data/managers/rbac-cache-version-manager.js index 3ad6013e..b92fbd07 100644 --- a/src/data/managers/rbac-cache-version-manager.js +++ b/src/data/managers/rbac-cache-version-manager.js @@ -8,11 +8,6 @@ class RbacCacheVersionManager extends BaseManager { return RbacCacheVersion } - /** - * Get current cache version - * @param {Object} transaction - Database transaction - * @returns {Promise} Current version number - */ async getVersion (transaction) { const cacheVersion = await this.findOne({ id: 1 }, transaction) if (!cacheVersion) { @@ -22,9 +17,7 @@ class RbacCacheVersionManager extends BaseManager { } _getModelOptions (transaction) { - return transaction && transaction.fakeTransaction - ? {} - : { transaction } + return { transaction } } _extractAffectedRows (updateResult) { @@ -79,12 +72,6 @@ class RbacCacheVersionManager extends BaseManager { } } - /** - * Increment cache version - * This should be called whenever any RBAC resource (Role, RoleBinding, ServiceAccount) is modified - * @param {Object} transaction - Database transaction - * @returns {Promise} - */ async incrementVersion (transaction) { try { const updateResult = await this._incrementVersionAtomic(transaction) @@ -113,16 +100,9 @@ class RbacCacheVersionManager extends BaseManager { } } - /** - * Initialize cache version row if it doesn't exist - * This is called on server startup to ensure the row exists - * @param {Object} transaction - Database transaction (optional) - * @returns {Promise} - */ async initializeVersion (transaction) { const cacheVersion = await this.findOne({ id: 1 }, transaction) if (!cacheVersion) { - // Create initial version row await this.create({ id: 1, version: 1 }, transaction) } } diff --git a/src/data/managers/rbac-role-manager.js b/src/data/managers/rbac-role-manager.js index 69b16e47..1ed8dc01 100644 --- a/src/data/managers/rbac-role-manager.js +++ b/src/data/managers/rbac-role-manager.js @@ -104,8 +104,7 @@ class RbacRoleManager extends BaseManager { resourceNames: rule.resourceNames || null })) - const bulkCreateOptions = transaction.fakeTransaction ? {} : { transaction } - await RbacRoleRule.bulkCreate(rules, bulkCreateOptions) + await RbacRoleRule.bulkCreate(rules, { transaction }) } // Increment cache version to invalidate caches on all instances @@ -145,10 +144,10 @@ class RbacRoleManager extends BaseManager { // Update rules if provided if (roleData.rules && Array.isArray(roleData.rules)) { // Delete existing rules - const destroyOptions = transaction.fakeTransaction - ? { where: { roleId: role.id } } - : { where: { roleId: role.id }, transaction } - await RbacRoleRule.destroy(destroyOptions) + await RbacRoleRule.destroy({ + where: { roleId: role.id }, + transaction + }) // Create new rules const rules = roleData.rules.map(rule => ({ @@ -159,8 +158,7 @@ class RbacRoleManager extends BaseManager { resourceNames: rule.resourceNames || null })) - const bulkCreateOptions = transaction.fakeTransaction ? {} : { transaction } - await RbacRoleRule.bulkCreate(rules, bulkCreateOptions) + await RbacRoleRule.bulkCreate(rules, { transaction }) } // Increment cache version to invalidate caches on all instances @@ -215,10 +213,10 @@ class RbacRoleManager extends BaseManager { return null } - const findAllOptions = transaction.fakeTransaction - ? { where: { roleId: role.id } } - : { where: { roleId: role.id }, transaction } - const rules = await RbacRoleRule.findAll(findAllOptions) + const rules = await RbacRoleRule.findAll({ + where: { roleId: role.id }, + transaction + }) return { id: role.id, diff --git a/src/data/managers/rbac-service-account-manager.js b/src/data/managers/rbac-service-account-manager.js index 5adc6475..0c2b4571 100644 --- a/src/data/managers/rbac-service-account-manager.js +++ b/src/data/managers/rbac-service-account-manager.js @@ -54,10 +54,11 @@ class RbacServiceAccountManager extends BaseManager { if (!application) { throw new Errors.NotFoundError(`Application '${appName}' not found`) } - const options = transaction.fakeTransaction - ? { where: { applicationId: application.id, name }, include: serviceAccountIncludeApplication } - : { where: { applicationId: application.id, name }, include: serviceAccountIncludeApplication, transaction } - const sa = await RbacServiceAccount.findOne(options) + const sa = await RbacServiceAccount.findOne({ + where: { applicationId: application.id, name }, + include: serviceAccountIncludeApplication, + transaction + }) return sa ? mapToResponse(sa) : null } @@ -168,7 +169,7 @@ class RbacServiceAccountManager extends BaseManager { const updated = await RbacServiceAccount.findByPk(sa.id, { include: serviceAccountIncludeApplication, - transaction: transaction.fakeTransaction ? undefined : transaction + transaction }) return mapToResponse(updated) } @@ -216,10 +217,11 @@ class RbacServiceAccountManager extends BaseManager { } where.applicationId = application.id } - const findOptions = transaction.fakeTransaction - ? { where, include: serviceAccountIncludeApplication } - : { where, include: serviceAccountIncludeApplication, transaction } - const list = await RbacServiceAccount.findAll(findOptions) + const list = await RbacServiceAccount.findAll({ + where, + include: serviceAccountIncludeApplication, + transaction + }) return list.map(sa => mapToResponse(sa)) } } diff --git a/src/data/managers/reconcile-outbox-manager.js b/src/data/managers/reconcile-outbox-manager.js new file mode 100644 index 00000000..78fd21f3 --- /dev/null +++ b/src/data/managers/reconcile-outbox-manager.js @@ -0,0 +1,155 @@ +const BaseManager = require('./base-manager') +const models = require('../models') +const { buildIdempotencyKey } = require('../../helpers/reconcile-outbox-keys') + +class ReconcileOutboxManager extends BaseManager { + getEntity () { + return models.ReconcileOutbox + } + + _isUniqueConstraintError (error) { + return error && error.name === 'SequelizeUniqueConstraintError' + } + + _serializePayload (payload) { + return JSON.stringify(payload) + } + + _parsePayload (row) { + if (!row || row.payload == null) { + return null + } + return JSON.parse(row.payload) + } + + async _reopenProcessedRow (existing, kind, serializedPayload, transaction) { + await this.update({ id: existing.id }, { + kind, + payload: serializedPayload, + processedAt: null, + lastError: null + }, transaction) + return this.findOne({ id: existing.id }, transaction) + } + + async _resolveExistingEnqueue (existing, kind, serializedPayload, transaction) { + if (existing.processedAt == null) { + if (existing.payload !== serializedPayload) { + await this.update({ id: existing.id }, { payload: serializedPayload }, transaction) + return this.findOne({ id: existing.id }, transaction) + } + return existing + } + return this._reopenProcessedRow(existing, kind, serializedPayload, transaction) + } + + async enqueue (kind, payload, idempotencyKey, transaction) { + const serializedPayload = this._serializePayload(payload) + const existing = await this.findOne({ idempotencyKey }, transaction) + if (existing) { + return this._resolveExistingEnqueue(existing, kind, serializedPayload, transaction) + } + + const sequelize = models.sequelize + const useSavepoint = sequelize.getDialect() === 'postgres' + const savepointName = useSavepoint ? `sp_outbox_${Math.random().toString(36).slice(2, 10)}` : null + + try { + if (useSavepoint) { + await sequelize.query(`SAVEPOINT ${savepointName}`, { transaction }) + } + const row = await this.create({ + kind, + payload: serializedPayload, + idempotencyKey, + processedAt: null, + lastError: null + }, transaction) + if (useSavepoint) { + await sequelize.query(`RELEASE SAVEPOINT ${savepointName}`, { transaction }) + } + return row + } catch (error) { + if (this._isUniqueConstraintError(error)) { + if (useSavepoint) { + await sequelize.query(`ROLLBACK TO SAVEPOINT ${savepointName}`, { transaction }) + await sequelize.query(`RELEASE SAVEPOINT ${savepointName}`, { transaction }) + } + const raced = await this.findOne({ idempotencyKey }, transaction) + if (raced) { + return this._resolveExistingEnqueue(raced, kind, serializedPayload, transaction) + } + } + throw error + } + } + + async enqueueFogPlatform (payload, transaction) { + const idempotencyKey = buildIdempotencyKey('fog_platform', payload) + return this.enqueue('fog_platform', payload, idempotencyKey, transaction) + } + + async enqueueServicePlatform (payload, transaction) { + const idempotencyKey = buildIdempotencyKey('service_platform', payload) + return this.enqueue('service_platform', payload, idempotencyKey, transaction) + } + + async enqueueNats (payload, transaction) { + if (payload && payload.triggerReconcile === false) { + return null + } + const { triggerReconcile, ...rest } = payload || {} + const idempotencyKey = buildIdempotencyKey('nats', rest) + return this.enqueue('nats', rest, idempotencyKey, transaction) + } + + async claimUnprocessed (limit, transaction) { + const sequelize = models.sequelize + const dialect = sequelize.getDialect() + const Entity = this.getEntity() + const safeLimit = Math.max(1, limit || 1) + + if (dialect === 'sqlite') { + return Entity.findAll({ + where: { processedAt: null }, + order: [['id', 'ASC']], + limit: safeLimit, + transaction + }) + } + + const tableName = Entity.getTableName() + const quotedTable = dialect === 'postgres' ? `"${tableName}"` : `\`${tableName}\`` + const rows = await sequelize.query( + `SELECT id, kind, payload, idempotency_key AS idempotencyKey, created_at AS createdAt, processed_at AS processedAt, last_error AS lastError + FROM ${quotedTable} + WHERE processed_at IS NULL + ORDER BY id ASC + LIMIT :limit + FOR UPDATE SKIP LOCKED`, + { + replacements: { limit: safeLimit }, + type: sequelize.QueryTypes.SELECT, + transaction + } + ) + + return rows.map((row) => Entity.build(row, { isNewRecord: false })) + } + + async markProcessed (id, transaction) { + await this.update({ id }, { processedAt: new Date(), lastError: null }, transaction) + return this.findOne({ id }, transaction) + } + + async markFailed (id, errorMessage, transaction) { + await this.update({ id }, { lastError: errorMessage }, transaction) + return this.findOne({ id }, transaction) + } + + parsePayload (row) { + return this._parsePayload(row) + } +} + +module.exports = new ReconcileOutboxManager() diff --git a/src/data/managers/registry-manager.js b/src/data/managers/registry-manager.js index 4bd066fd..3eb490d0 100644 --- a/src/data/managers/registry-manager.js +++ b/src/data/managers/registry-manager.js @@ -1,6 +1,6 @@ const BaseManager = require('./base-manager') -const SecretHelper = require('../../helpers/secret-helper') const vaultManager = require('../../vault/vault-manager') +const { scheduleVaultDeleteAfterCommit } = require('../../helpers/vault-transaction-helper') const models = require('../models') const Registry = models.Registry @@ -11,14 +11,11 @@ class RegistryManager extends BaseManager { async delete (data, transaction) { const registry = await this.findOne(data || {}, transaction) + const result = await super.delete(data, transaction) if (registry && vaultManager.isEnabled()) { - try { - await SecretHelper.deleteSecret('registry-' + registry.id, 'registry') - } catch (err) { - // Ignore 404 or other errors (e.g. password was never stored in vault) - } + scheduleVaultDeleteAfterCommit(transaction, 'registry-' + registry.id, 'registry') } - return super.delete(data, transaction) + return result } } diff --git a/src/data/managers/router-connection-manager.js b/src/data/managers/router-connection-manager.js index c6d9ae84..0729dae8 100644 --- a/src/data/managers/router-connection-manager.js +++ b/src/data/managers/router-connection-manager.js @@ -22,8 +22,9 @@ class RouterConnectionManager extends BaseManager { required: true } ], - where - }, { transaction }) + where, + transaction + }) } } diff --git a/src/data/managers/secret-manager.js b/src/data/managers/secret-manager.js index b75ca337..0253a20f 100644 --- a/src/data/managers/secret-manager.js +++ b/src/data/managers/secret-manager.js @@ -1,5 +1,4 @@ const BaseManager = require('./base-manager') -const SecretHelper = require('../../helpers/secret-helper') const models = require('../models') const Secret = models.Secret @@ -18,12 +17,16 @@ class SecretManager extends BaseManager { } async updateSecret (name, type, data, transaction) { - const encryptedData = await SecretHelper.encryptSecret(data, name, type) - return this.update( - { name }, - { type, data: encryptedData }, - transaction - ) + const existing = await this.findOne({ name }, transaction) + if (!existing) { + throw new Error(`Secret ${name} not found`) + } + + existing.type = type + existing.data = data + await existing.save({ transaction }) + + return existing } async getSecret (name, transaction) { diff --git a/src/data/managers/service-platform-reconcile-task-manager.js b/src/data/managers/service-platform-reconcile-task-manager.js index 7bd91105..953a9d76 100644 --- a/src/data/managers/service-platform-reconcile-task-manager.js +++ b/src/data/managers/service-platform-reconcile-task-manager.js @@ -1,7 +1,6 @@ const BaseManager = require('./base-manager') const models = require('../models') const config = require('../../config') -const databaseProvider = require('../providers/database-factory') const { Op } = require('sequelize') const { SERVICE_PLATFORM_REASONS, @@ -9,9 +8,15 @@ const { parseSpecSnapshot } = require('../../schemas/fog-platform-spec') const { withDbBusyRetry } = require('../../helpers/db-busy-retry') +const { claimNextReconcileTask } = require('../../helpers/db-dialect') const ACTIVE_STATUSES = ['pending', 'in_progress'] +const SERVICE_TASK_SELECT_SQL = `SELECT id, service_name AS serviceName, reason, spec_snapshot AS specSnapshot, + status, leader_uuid AS leaderUuid, claimed_at AS claimedAt, next_attempt_at AS nextAttemptAt, + attempts, last_error AS lastError, created_at AS createdAt, updated_at AS updatedAt + FROM :table` + class ServicePlatformReconcileTaskManager extends BaseManager { getEntity () { return models.ServicePlatformReconcileTask @@ -29,12 +34,6 @@ class ServicePlatformReconcileTaskManager extends BaseManager { } async enqueueServicePlatformReconcileTask (options = {}, transaction) { - if (transaction.fakeTransaction) { - return databaseProvider.sequelize.transaction((t) => - this.enqueueServicePlatformReconcileTask(options, t) - ) - } - const serviceName = options.serviceName if (!serviceName) { throw new Error('serviceName is required to enqueue service platform reconcile task') @@ -76,50 +75,21 @@ class ServicePlatformReconcileTaskManager extends BaseManager { } async _claimNextServiceTaskInternal (controllerUuid, stalenessSeconds) { - const sequelize = databaseProvider.sequelize const T = stalenessSeconds != null ? stalenessSeconds : config.get('settings.fogPlatformReconcileTaskStalenessSeconds', 300) const staleThreshold = new Date(Date.now() - T * 1000) - const Entity = this.getEntity() const now = new Date() - return sequelize.transaction(async (transaction) => { - const task = await Entity.findOne({ - where: { - status: { [Op.in]: ACTIVE_STATUSES }, - [Op.or]: [ - { nextAttemptAt: null }, - { nextAttemptAt: { [Op.lte]: now } } - ], - [Op.and]: [{ - [Op.or]: [ - { leaderUuid: null }, - { claimedAt: { [Op.lt]: staleThreshold } } - ] - }] - }, - order: [['id', 'ASC']], - limit: 1, - transaction - }) - if (!task) return null - - const [affected] = await Entity.update( - { leaderUuid: controllerUuid, claimedAt: new Date(), status: 'in_progress' }, - { - where: { - id: task.id, - [Op.or]: [ - { leaderUuid: null }, - { claimedAt: { [Op.lt]: staleThreshold } } - ] - }, - transaction - } - ) - if (affected === 0) return null - return this.findOne({ id: task.id }, transaction) + return claimNextReconcileTask({ + Entity: this.getEntity(), + controllerUuid, + staleThreshold, + now, + activeStatuses: ACTIVE_STATUSES, + includeNextAttemptFilter: true, + selectSql: SERVICE_TASK_SELECT_SQL, + reloadTask: (id, transaction) => this.findOne({ id }, transaction) }) } diff --git a/src/data/managers/volume-mapping-manager.js b/src/data/managers/volume-mapping-manager.js index 7cc1d8bb..a4c40f27 100644 --- a/src/data/managers/volume-mapping-manager.js +++ b/src/data/managers/volume-mapping-manager.js @@ -10,8 +10,9 @@ class VolumeMappingManager extends BaseManager { findAll (where, transaction) { return VolumeMapping.findAll({ where, - attributes: ['hostDestination', 'containerDestination', 'accessMode', 'id', 'type'] - }, { transaction }) + attributes: ['hostDestination', 'containerDestination', 'accessMode', 'id', 'type'], + transaction + }) } } diff --git a/src/data/managers/volume-mounting-manager.js b/src/data/managers/volume-mounting-manager.js index 8c1b5487..63c71859 100644 --- a/src/data/managers/volume-mounting-manager.js +++ b/src/data/managers/volume-mounting-manager.js @@ -19,29 +19,33 @@ class VolumeMountingManager extends BaseManager { getAll (where, transaction) { return VolumeMount.findAll({ where, - attributes: ['uuid', 'name', 'configMapName', 'secretName'] - }, { transaction }) + attributes: ['uuid', 'name', 'configMapName', 'secretName'], + transaction + }) } getOne (where, transaction) { return VolumeMount.findOne({ where, - attributes: ['uuid', 'name', 'configMapName', 'secretName', 'version'] - }, { transaction }) + attributes: ['uuid', 'name', 'configMapName', 'secretName', 'version'], + transaction + }) } findOne (where, transaction) { return VolumeMount.findOne({ where, - attributes: ['uuid', 'name', 'configMapName', 'secretName', 'version'] - }, { transaction }) + attributes: ['uuid', 'name', 'configMapName', 'secretName', 'version'], + transaction + }) } findAll (where, transaction) { return VolumeMount.findAll({ where, - attributes: ['uuid', 'name', 'configMapName', 'secretName', 'version'] - }, { transaction }) + attributes: ['uuid', 'name', 'configMapName', 'secretName', 'version'], + transaction + }) } } diff --git a/src/data/migrations/mysql/db_migration_mysql_v3.8.0.sql b/src/data/migrations/mysql/db_migration_mysql_v3.8.0.sql index 23a5f2ea..bf90dff0 100644 --- a/src/data/migrations/mysql/db_migration_mysql_v3.8.0.sql +++ b/src/data/migrations/mysql/db_migration_mysql_v3.8.0.sql @@ -120,7 +120,7 @@ CREATE TABLE IF NOT EXISTS Fogs ( deployment_type VARCHAR(36), active_volume_mounts BIGINT DEFAULT 0, volume_mount_last_update BIGINT DEFAULT 0, - warning_message TEXT DEFAULT 'HEALTHY', + warning_message TEXT, gps_device VARCHAR(36), gps_scan_frequency INT DEFAULT 60, edge_guard_frequency INT DEFAULT 0, @@ -646,7 +646,7 @@ CREATE INDEX idx_microservice_exec_status_microservice_uuid ON MicroserviceExecS CREATE TABLE IF NOT EXISTS MicroserviceHealthChecks ( id INT AUTO_INCREMENT PRIMARY KEY NOT NULL, test TEXT, - interval BIGINT, + `interval` BIGINT, timeout BIGINT, start_period BIGINT, start_interval BIGINT, @@ -707,7 +707,7 @@ CREATE INDEX idx_microservice_log_status_session_id ON MicroserviceLogStatuses ( CREATE TABLE IF NOT EXISTS MicroserviceExecSessions ( id INT AUTO_INCREMENT PRIMARY KEY NOT NULL, microservice_uuid VARCHAR(36) NOT NULL, - session_id TEXT UNIQUE NOT NULL, + session_id VARCHAR(255) UNIQUE NOT NULL, status TEXT, user_connected BOOLEAN DEFAULT false, agent_connected BOOLEAN DEFAULT false, @@ -738,8 +738,8 @@ CREATE INDEX idx_fog_log_status_session_id ON FogLogStatuses (session_id); CREATE TABLE IF NOT EXISTS RbacRoles ( id INT AUTO_INCREMENT PRIMARY KEY, - name TEXT UNIQUE NOT NULL, - kind TEXT DEFAULT 'Role', + name TEXT NOT NULL, + kind TEXT, created_at DATETIME, updated_at DATETIME, UNIQUE KEY unique_name (name(255)) @@ -759,8 +759,8 @@ CREATE TABLE IF NOT EXISTS RbacRoleRules ( CREATE TABLE IF NOT EXISTS RbacRoleBindings ( id INT AUTO_INCREMENT PRIMARY KEY, - name TEXT UNIQUE NOT NULL, - kind TEXT DEFAULT 'RoleBinding', + name TEXT NOT NULL, + kind TEXT, role_ref TEXT NOT NULL, subjects TEXT NOT NULL, created_at DATETIME, @@ -800,7 +800,7 @@ CREATE INDEX idx_rbac_role_bindings_role_id ON RbacRoleBindings (role_id); CREATE INDEX idx_rbac_service_accounts_role_id ON RbacServiceAccounts (role_id); CREATE UNIQUE INDEX idx_rbac_service_accounts_microservice_uuid_unique ON RbacServiceAccounts (microservice_uuid); -CREATE UNIQUE INDEX idx_rbac_service_accounts_application_id_name_unique ON RbacServiceAccounts (application_id, name); +CREATE UNIQUE INDEX idx_rbac_service_accounts_application_id_name_unique ON RbacServiceAccounts (application_id, name(255)); CREATE TABLE IF NOT EXISTS ClusterControllers ( uuid VARCHAR(36) PRIMARY KEY NOT NULL, @@ -1214,6 +1214,19 @@ CREATE INDEX idx_service_platform_reconcile_tasks_name_status ON ServicePlatform CREATE INDEX idx_service_platform_reconcile_tasks_status_claimed ON ServicePlatformReconcileTasks (status, claimed_at); CREATE INDEX idx_service_platform_reconcile_tasks_next_attempt ON ServicePlatformReconcileTasks (next_attempt_at); +CREATE TABLE IF NOT EXISTS ReconcileOutbox ( + id INT AUTO_INCREMENT PRIMARY KEY, + kind VARCHAR(32) NOT NULL, + payload TEXT NOT NULL, + idempotency_key VARCHAR(255) NOT NULL, + created_at DATETIME, + processed_at DATETIME, + last_error TEXT, + UNIQUE KEY uk_reconcile_outbox_idempotency_key (idempotency_key) +); + +CREATE INDEX idx_reconcile_outbox_unprocessed ON ReconcileOutbox (processed_at, id); + CREATE TABLE IF NOT EXISTS HubRouterConfigLocks ( id INT PRIMARY KEY, leader_uuid VARCHAR(36), diff --git a/src/data/migrations/postgres/db_migration_pg_v3.8.0.sql b/src/data/migrations/postgres/db_migration_pg_v3.8.0.sql index 19714e68..fdc07499 100644 --- a/src/data/migrations/postgres/db_migration_pg_v3.8.0.sql +++ b/src/data/migrations/postgres/db_migration_pg_v3.8.0.sql @@ -4,8 +4,8 @@ CREATE TABLE IF NOT EXISTS "Applications" ( description VARCHAR(255) DEFAULT '', is_activated BOOLEAN DEFAULT false, is_system BOOLEAN DEFAULT false, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), nats_access BOOLEAN DEFAULT false, nats_rule_id INTEGER ); @@ -111,14 +111,14 @@ CREATE TABLE IF NOT EXISTS "Fogs" ( is_system BOOLEAN DEFAULT FALSE, router_id INT DEFAULT 0, time_zone VARCHAR(36) DEFAULT 'Etc/UTC', - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), arch_id INT DEFAULT 0, container_engine VARCHAR(36), deployment_type VARCHAR(36), active_volume_mounts BIGINT DEFAULT 0, volume_mount_last_update BIGINT DEFAULT 0, - warning_message TEXT DEFAULT 'HEALTHY', + warning_message TEXT, gps_device VARCHAR(36), gps_scan_frequency INT DEFAULT 60, edge_guard_frequency INT DEFAULT 0, @@ -176,8 +176,8 @@ CREATE INDEX idx_fog_version_commands_iofogUuid ON "FogVersionCommands" (iofog_u CREATE TABLE IF NOT EXISTS "HWInfos" ( id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY NOT NULL, info TEXT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), iofog_uuid VARCHAR(36), FOREIGN KEY (iofog_uuid) REFERENCES "Fogs" (uuid) ON DELETE CASCADE ); @@ -187,8 +187,8 @@ CREATE INDEX idx_hw_infos_iofogUuid ON "HWInfos" (iofog_uuid); CREATE TABLE IF NOT EXISTS "USBInfos" ( id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY NOT NULL, info TEXT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), iofog_uuid VARCHAR(36), FOREIGN KEY (iofog_uuid) REFERENCES "Fogs" (uuid) ON DELETE CASCADE ); @@ -219,8 +219,8 @@ CREATE TABLE IF NOT EXISTS "Microservices" ( log_size BIGINT DEFAULT 0, delete BOOLEAN DEFAULT false, delete_with_cleanup BOOLEAN DEFAULT false, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), catalog_item_id INT, registry_id INT DEFAULT 1, iofog_uuid VARCHAR(36), @@ -298,8 +298,8 @@ CREATE TABLE IF NOT EXISTS "MicroservicePorts" ( port_internal INT, port_external INT, is_udp BOOLEAN, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), microservice_uuid VARCHAR(36), FOREIGN KEY (microservice_uuid) REFERENCES "Microservices" (uuid) ON DELETE CASCADE ); @@ -317,8 +317,8 @@ CREATE TABLE IF NOT EXISTS "MicroserviceStatuses" ( percentage DOUBLE PRECISION DEFAULT 0.00, error_message TEXT, microservice_uuid VARCHAR(36), - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), ip_address TEXT, exec_session_ids TEXT, health_status TEXT, @@ -383,8 +383,8 @@ CREATE TABLE IF NOT EXISTS "Routers" ( host TEXT, is_default BOOLEAN DEFAULT false, iofog_uuid VARCHAR(36), - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (iofog_uuid) REFERENCES "Fogs" (uuid) ON DELETE CASCADE ); @@ -394,8 +394,8 @@ CREATE TABLE "RouterConnections" ( id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, source_router INT, dest_router INT, - created_at TIMESTAMP(0) NOT NULL, - updated_at TIMESTAMP(0) NOT NULL, + created_at TIMESTAMPTZ(0) NOT NULL, + updated_at TIMESTAMPTZ(0) NOT NULL, FOREIGN KEY (source_router) REFERENCES "Routers"(id) ON DELETE CASCADE, FOREIGN KEY (dest_router) REFERENCES "Routers"(id) ON DELETE CASCADE ); @@ -407,8 +407,8 @@ CREATE TABLE IF NOT EXISTS "Config" ( id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY NOT NULL, key VARCHAR(255) NOT NULL UNIQUE, value VARCHAR(255) NOT NULL, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE INDEX idx_config_key ON "Config" (key); @@ -435,8 +435,8 @@ CREATE TABLE IF NOT EXISTS "ApplicationTemplates" ( description VARCHAR(255) DEFAULT '', schema_version VARCHAR(255) DEFAULT '', application_json TEXT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE TABLE IF NOT EXISTS "ApplicationTemplateVariables" ( @@ -445,8 +445,8 @@ CREATE TABLE IF NOT EXISTS "ApplicationTemplateVariables" ( key TEXT, description VARCHAR(255) DEFAULT '', default_value VARCHAR(255), - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (application_template_id) REFERENCES "ApplicationTemplates" (id) ON DELETE CASCADE ); @@ -504,8 +504,8 @@ CREATE TABLE IF NOT EXISTS "FogPublicKeys" ( id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY NOT NULL, public_key TEXT, iofog_uuid VARCHAR(36), - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (iofog_uuid) REFERENCES "Fogs" (uuid) ON DELETE CASCADE ); @@ -516,8 +516,8 @@ CREATE TABLE IF NOT EXISTS "FogUsedTokens" ( jti VARCHAR(255) NOT NULL, iofog_uuid VARCHAR(36), expiry_time BIGINT NOT NULL, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (iofog_uuid) REFERENCES "Fogs" (uuid) ON DELETE CASCADE ); @@ -528,8 +528,8 @@ CREATE TABLE IF NOT EXISTS "Secrets" ( name VARCHAR(255) UNIQUE NOT NULL, type VARCHAR(50) NOT NULL CHECK (type IN ('Opaque', 'tls')), data TEXT NOT NULL, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE INDEX idx_secrets_name ON "Secrets" (name); @@ -541,12 +541,12 @@ CREATE TABLE IF NOT EXISTS "Certificates" ( is_ca BOOLEAN DEFAULT false, signed_by_id INT, hosts TEXT, - valid_from TIMESTAMP(0) NOT NULL, - valid_to TIMESTAMP(0) NOT NULL, + valid_from TIMESTAMPTZ(0) NOT NULL, + valid_to TIMESTAMPTZ(0) NOT NULL, serial_number TEXT NOT NULL, secret_id INT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (signed_by_id) REFERENCES "Certificates" (id) ON DELETE SET NULL, FOREIGN KEY (secret_id) REFERENCES "Secrets" (id) ON DELETE CASCADE ); @@ -568,8 +568,8 @@ CREATE TABLE IF NOT EXISTS "Services" ( bridge_port INTEGER, default_bridge TEXT, service_endpoint TEXT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), provisioning_status VARCHAR(36) DEFAULT 'pending', provisioning_error TEXT ); @@ -581,8 +581,8 @@ CREATE TABLE IF NOT EXISTS "ServiceTags" ( id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY NOT NULL, service_id INTEGER NOT NULL, tag_id INTEGER NOT NULL, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (service_id) REFERENCES "Services" (id) ON DELETE CASCADE, FOREIGN KEY (tag_id) REFERENCES "Tags" (id) ON DELETE CASCADE ); @@ -595,8 +595,8 @@ CREATE TABLE IF NOT EXISTS "ConfigMaps" ( name VARCHAR(255) UNIQUE NOT NULL, immutable BOOLEAN DEFAULT false, data TEXT NOT NULL, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), use_vault BOOLEAN DEFAULT true ); @@ -608,8 +608,8 @@ CREATE TABLE IF NOT EXISTS "VolumeMounts" ( config_map_name VARCHAR(255), secret_name VARCHAR(255), version INT DEFAULT 1, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (config_map_name) REFERENCES "ConfigMaps" (name) ON DELETE CASCADE, FOREIGN KEY (secret_name) REFERENCES "Secrets" (name) ON DELETE CASCADE ); @@ -634,8 +634,8 @@ CREATE TABLE IF NOT EXISTS "MicroserviceExecStatuses" ( status VARCHAR(255) DEFAULT 'INACTIVE', exec_session_id VARCHAR(255), microservice_uuid VARCHAR(36), - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (microservice_uuid) REFERENCES "Microservices" (uuid) ON DELETE CASCADE ); @@ -650,8 +650,8 @@ CREATE TABLE IF NOT EXISTS "MicroserviceHealthChecks" ( start_interval DOUBLE PRECISION, retries INT, microservice_uuid VARCHAR(36), - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (microservice_uuid) REFERENCES "Microservices" (uuid) ON DELETE CASCADE ); @@ -672,8 +672,8 @@ CREATE TABLE IF NOT EXISTS "Events" ( status_code INT, status_message TEXT, request_id VARCHAR(255), - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE INDEX idx_events_timestamp ON "Events" (timestamp); @@ -694,8 +694,8 @@ CREATE TABLE IF NOT EXISTS "MicroserviceLogStatuses" ( tail_config TEXT, agent_connected BOOLEAN DEFAULT false, user_connected BOOLEAN DEFAULT false, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (microservice_uuid) REFERENCES "Microservices" (uuid) ON DELETE CASCADE ); @@ -709,8 +709,8 @@ CREATE TABLE IF NOT EXISTS "MicroserviceExecSessions" ( status TEXT, user_connected BOOLEAN DEFAULT false, agent_connected BOOLEAN DEFAULT false, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (microservice_uuid) REFERENCES "Microservices" (uuid) ON DELETE CASCADE ); @@ -726,8 +726,8 @@ CREATE TABLE IF NOT EXISTS "FogLogStatuses" ( tail_config TEXT, agent_connected BOOLEAN DEFAULT false, user_connected BOOLEAN DEFAULT false, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (iofog_uuid) REFERENCES "Fogs" (uuid) ON DELETE CASCADE ); @@ -737,9 +737,9 @@ CREATE INDEX idx_fog_log_status_session_id ON "FogLogStatuses" (session_id); CREATE TABLE IF NOT EXISTS "RbacRoles" ( id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, name TEXT UNIQUE NOT NULL, - kind TEXT DEFAULT 'Role', - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + kind TEXT, + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE TABLE IF NOT EXISTS "RbacRoleRules" ( @@ -749,19 +749,19 @@ CREATE TABLE IF NOT EXISTS "RbacRoleRules" ( resources TEXT NOT NULL, verbs TEXT NOT NULL, resource_names TEXT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (role_id) REFERENCES "RbacRoles" (id) ON DELETE CASCADE ); CREATE TABLE IF NOT EXISTS "RbacRoleBindings" ( id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, name TEXT UNIQUE NOT NULL, - kind TEXT DEFAULT 'RoleBinding', + kind TEXT, role_ref TEXT NOT NULL, subjects TEXT NOT NULL, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), role_id INTEGER ); @@ -772,8 +772,8 @@ CREATE TABLE IF NOT EXISTS "RbacServiceAccounts" ( role_id INT REFERENCES "RbacRoles" (id), microservice_uuid VARCHAR(36) REFERENCES "Microservices" (uuid) ON DELETE CASCADE, application_id INT REFERENCES "Applications" (id) ON DELETE SET NULL, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE INDEX idx_rbac_role_rules_role_id ON "RbacRoleRules" (role_id); @@ -784,8 +784,8 @@ CREATE INDEX idx_rbac_service_accounts_name ON "RbacServiceAccounts" (name); CREATE TABLE IF NOT EXISTS "RbacCacheVersion" ( id INT PRIMARY KEY DEFAULT 1, version BIGINT NOT NULL DEFAULT 1, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), CONSTRAINT single_row CHECK (id = 1) ); @@ -799,10 +799,10 @@ CREATE TABLE IF NOT EXISTS "ClusterControllers" ( uuid VARCHAR(36) PRIMARY KEY NOT NULL, host VARCHAR(255), process_id INT, - last_heartbeat TIMESTAMP(0), + last_heartbeat TIMESTAMPTZ(0), is_active BOOLEAN DEFAULT true, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE INDEX idx_cluster_controllers_uuid ON "ClusterControllers" (uuid); @@ -815,8 +815,8 @@ CREATE TABLE IF NOT EXISTS "NatsOperators" ( public_key TEXT NOT NULL, jwt TEXT NOT NULL, seed_secret_name TEXT NOT NULL, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE TABLE IF NOT EXISTS "NatsAccounts" ( @@ -829,8 +829,8 @@ CREATE TABLE IF NOT EXISTS "NatsAccounts" ( is_leaf_system BOOLEAN DEFAULT false, operator_id INT NOT NULL, application_id INT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (operator_id) REFERENCES "NatsOperators" (id) ON DELETE CASCADE, FOREIGN KEY (application_id) REFERENCES "Applications" (id) ON DELETE CASCADE ); @@ -844,8 +844,8 @@ CREATE TABLE IF NOT EXISTS "NatsUsers" ( is_bearer BOOLEAN DEFAULT false, account_id INT NOT NULL, microservice_uuid VARCHAR(36), - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), nats_user_rule_id INTEGER, FOREIGN KEY (account_id) REFERENCES "NatsAccounts" (id) ON DELETE CASCADE, FOREIGN KEY (microservice_uuid) REFERENCES "Microservices" (uuid) ON DELETE SET NULL @@ -867,8 +867,8 @@ CREATE TABLE IF NOT EXISTS "NatsInstances" ( cert_secret_name TEXT, js_storage_size TEXT, js_memory_store_size TEXT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (iofog_uuid) REFERENCES "Fogs" (uuid) ON DELETE CASCADE ); @@ -876,8 +876,8 @@ CREATE TABLE IF NOT EXISTS "NatsConnections" ( id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY NOT NULL, source_nats INT NOT NULL, dest_nats INT NOT NULL, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (source_nats) REFERENCES "NatsInstances" (id) ON DELETE CASCADE, FOREIGN KEY (dest_nats) REFERENCES "NatsInstances" (id) ON DELETE CASCADE ); @@ -891,9 +891,9 @@ CREATE TABLE IF NOT EXISTS "NatsReconcileTasks" ( fog_uuids TEXT, status VARCHAR(32) NOT NULL DEFAULT 'pending', leader_uuid VARCHAR(36), - claimed_at TIMESTAMP(0), - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + claimed_at TIMESTAMPTZ(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE TABLE IF NOT EXISTS "NatsAccountRules" ( @@ -928,8 +928,8 @@ CREATE TABLE IF NOT EXISTS "NatsAccountRules" ( pub_deny TEXT, sub_allow TEXT, sub_deny TEXT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE TABLE IF NOT EXISTS "NatsUserRules" ( @@ -952,8 +952,8 @@ CREATE TABLE IF NOT EXISTS "NatsUserRules" ( sub_allow TEXT, sub_deny TEXT, tags TEXT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE UNIQUE INDEX idx_nats_accounts_application_id_unique ON "NatsAccounts" (application_id) WHERE application_id IS NOT NULL; @@ -984,10 +984,10 @@ CREATE TABLE IF NOT EXISTS "AuthUsers" ( must_change_password BOOLEAN DEFAULT false, is_bootstrap BOOLEAN DEFAULT false, failed_attempts INT DEFAULT 0, - locked_until TIMESTAMP(0), - deleted_at TIMESTAMP(0), - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + locked_until TIMESTAMPTZ(0), + deleted_at TIMESTAMPTZ(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE INDEX idx_auth_users_email ON "AuthUsers" (email); @@ -998,15 +998,15 @@ CREATE TABLE IF NOT EXISTS "AuthGroups" ( name VARCHAR(255) NOT NULL UNIQUE, is_system BOOLEAN DEFAULT false, mfa_required BOOLEAN NOT NULL DEFAULT false, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE TABLE IF NOT EXISTS "AuthUserGroups" ( id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY NOT NULL, user_id VARCHAR(36) NOT NULL, group_id INT NOT NULL, - created_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), FOREIGN KEY (user_id) REFERENCES "AuthUsers" (id) ON DELETE CASCADE, FOREIGN KEY (group_id) REFERENCES "AuthGroups" (id) ON DELETE CASCADE, UNIQUE (user_id, group_id) @@ -1021,8 +1021,8 @@ CREATE TABLE IF NOT EXISTS "AuthMfa" ( totp_secret_encrypted TEXT, enabled BOOLEAN DEFAULT false, recovery_codes_hash TEXT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (user_id) REFERENCES "AuthUsers" (id) ON DELETE CASCADE ); @@ -1031,8 +1031,8 @@ CREATE INDEX idx_auth_mfa_user_id ON "AuthMfa" (user_id); CREATE TABLE IF NOT EXISTS "AuthPasswordResetSessions" ( id VARCHAR(36) PRIMARY KEY NOT NULL, user_id VARCHAR(36) NOT NULL, - expires_at TIMESTAMP(0) NOT NULL, - created_at TIMESTAMP(0), + expires_at TIMESTAMPTZ(0) NOT NULL, + created_at TIMESTAMPTZ(0), FOREIGN KEY (user_id) REFERENCES "AuthUsers" (id) ON DELETE CASCADE ); @@ -1044,10 +1044,10 @@ CREATE TABLE IF NOT EXISTS "AuthRefreshTokens" ( token_hash VARCHAR(255) NOT NULL, user_id VARCHAR(36) NOT NULL, family_id VARCHAR(36) NOT NULL, - expires_at TIMESTAMP(0) NOT NULL, + expires_at TIMESTAMPTZ(0) NOT NULL, revoked BOOLEAN DEFAULT false, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (user_id) REFERENCES "AuthUsers" (id) ON DELETE CASCADE ); @@ -1062,8 +1062,8 @@ CREATE TABLE IF NOT EXISTS "AuthOidcKeys" ( key_material_encrypted TEXT, vault_ref TEXT, active BOOLEAN DEFAULT true, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE INDEX idx_auth_oidc_keys_active ON "AuthOidcKeys" (active); @@ -1073,8 +1073,8 @@ CREATE TABLE IF NOT EXISTS "AuthOidcClients" ( client_id VARCHAR(255) NOT NULL UNIQUE, secret_ref TEXT, client_type VARCHAR(32) NOT NULL DEFAULT 'confidential', - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE TABLE IF NOT EXISTS "AuthOidcProviderStates" ( @@ -1082,14 +1082,14 @@ CREATE TABLE IF NOT EXISTS "AuthOidcProviderStates" ( model VARCHAR(64) NOT NULL, record_id VARCHAR(255) NOT NULL, payload TEXT NOT NULL, - expires_at TIMESTAMP(0), + expires_at TIMESTAMPTZ(0), grant_id VARCHAR(255), uid VARCHAR(255), user_code VARCHAR(255), consumed BOOLEAN DEFAULT false, - consumed_at TIMESTAMP(0), - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + consumed_at TIMESTAMPTZ(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), UNIQUE (model, record_id) ); @@ -1101,9 +1101,9 @@ CREATE INDEX idx_auth_oidc_provider_states_expires_at ON "AuthOidcProviderStates CREATE TABLE IF NOT EXISTS "AuthBffSessions" ( sid VARCHAR(255) PRIMARY KEY NOT NULL, data TEXT NOT NULL, - expires_at TIMESTAMP NOT NULL, - created_at TIMESTAMP, - updated_at TIMESTAMP + expires_at TIMESTAMPTZ NOT NULL, + created_at TIMESTAMPTZ, + updated_at TIMESTAMPTZ ); CREATE INDEX idx_auth_bff_sessions_expires_at ON "AuthBffSessions" (expires_at); @@ -1111,20 +1111,20 @@ CREATE INDEX idx_auth_bff_sessions_expires_at ON "AuthBffSessions" (expires_at); CREATE TABLE IF NOT EXISTS "AuthInteractionStates" ( uid VARCHAR(255) PRIMARY KEY NOT NULL, payload TEXT NOT NULL, - expires_at TIMESTAMP NOT NULL, - created_at TIMESTAMP, - updated_at TIMESTAMP + expires_at TIMESTAMPTZ NOT NULL, + created_at TIMESTAMPTZ, + updated_at TIMESTAMPTZ ); CREATE INDEX idx_auth_interaction_states_expires_at ON "AuthInteractionStates" (expires_at); CREATE TABLE IF NOT EXISTS "AuthBootstrapMeta" ( id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY NOT NULL, - completed_at TIMESTAMP(0), + completed_at TIMESTAMPTZ(0), bootstrap_admin_user_id VARCHAR(36), session_secret_ref TEXT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (bootstrap_admin_user_id) REFERENCES "AuthUsers" (id) ON DELETE SET NULL ); @@ -1142,16 +1142,16 @@ CREATE TABLE IF NOT EXISTS "AuthPolicy" ( refresh_token_ttl_seconds INT DEFAULT 3600, refresh_rotation BOOLEAN DEFAULT true, max_concurrent_sessions INT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE TABLE IF NOT EXISTS "FogPlatformSpecs" ( fog_uuid VARCHAR(36) PRIMARY KEY NOT NULL, spec_json TEXT NOT NULL, generation INT NOT NULL DEFAULT 1, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (fog_uuid) REFERENCES "Fogs" (uuid) ON DELETE CASCADE ); @@ -1160,10 +1160,10 @@ CREATE TABLE IF NOT EXISTS "FogPlatformStatuses" ( observed_generation INT NOT NULL DEFAULT 0, phase VARCHAR(32) NOT NULL DEFAULT 'Pending', last_error TEXT, - last_transition_at TIMESTAMP(0), + last_transition_at TIMESTAMPTZ(0), conditions_json TEXT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (fog_uuid) REFERENCES "Fogs" (uuid) ON DELETE CASCADE ); @@ -1174,12 +1174,12 @@ CREATE TABLE IF NOT EXISTS "FogPlatformReconcileTasks" ( spec_generation INT, status VARCHAR(32) NOT NULL DEFAULT 'pending', leader_uuid VARCHAR(36), - claimed_at TIMESTAMP(0), - next_attempt_at TIMESTAMP(0), + claimed_at TIMESTAMPTZ(0), + next_attempt_at TIMESTAMPTZ(0), attempts INT NOT NULL DEFAULT 0, last_error TEXT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0), FOREIGN KEY (fog_uuid) REFERENCES "Fogs" (uuid) ON DELETE CASCADE ); @@ -1194,22 +1194,34 @@ CREATE TABLE IF NOT EXISTS "ServicePlatformReconcileTasks" ( spec_snapshot TEXT, status VARCHAR(32) NOT NULL DEFAULT 'pending', leader_uuid VARCHAR(36), - claimed_at TIMESTAMP(0), - next_attempt_at TIMESTAMP(0), + claimed_at TIMESTAMPTZ(0), + next_attempt_at TIMESTAMPTZ(0), attempts INT NOT NULL DEFAULT 0, last_error TEXT, - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); CREATE UNIQUE INDEX idx_service_platform_reconcile_tasks_active_service_name ON "ServicePlatformReconcileTasks" (service_name) WHERE status IN ('pending', 'in_progress'); CREATE INDEX idx_service_platform_reconcile_tasks_status_claimed ON "ServicePlatformReconcileTasks" (status, claimed_at); CREATE INDEX idx_service_platform_reconcile_tasks_next_attempt ON "ServicePlatformReconcileTasks" (next_attempt_at); +CREATE TABLE IF NOT EXISTS "ReconcileOutbox" ( + id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY NOT NULL, + kind VARCHAR(32) NOT NULL, + payload TEXT NOT NULL, + idempotency_key VARCHAR(255) NOT NULL UNIQUE, + created_at TIMESTAMPTZ(0), + processed_at TIMESTAMPTZ(0), + last_error TEXT +); + +CREATE INDEX idx_reconcile_outbox_unprocessed ON "ReconcileOutbox" (processed_at, id); + CREATE TABLE IF NOT EXISTS "HubRouterConfigLocks" ( id INT PRIMARY KEY NOT NULL CHECK (id = 1), leader_uuid VARCHAR(36), - claimed_at TIMESTAMP(0), - created_at TIMESTAMP(0), - updated_at TIMESTAMP(0) + claimed_at TIMESTAMPTZ(0), + created_at TIMESTAMPTZ(0), + updated_at TIMESTAMPTZ(0) ); diff --git a/src/data/migrations/sqlite/db_migration_sqlite_v3.8.0.sql b/src/data/migrations/sqlite/db_migration_sqlite_v3.8.0.sql index 9792e096..bf946f3a 100644 --- a/src/data/migrations/sqlite/db_migration_sqlite_v3.8.0.sql +++ b/src/data/migrations/sqlite/db_migration_sqlite_v3.8.0.sql @@ -118,7 +118,7 @@ CREATE TABLE IF NOT EXISTS Fogs ( deployment_type VARCHAR(36), active_volume_mounts BIGINT DEFAULT 0, volume_mount_last_update BIGINT DEFAULT 0, - warning_message TEXT DEFAULT 'HEALTHY', + warning_message TEXT, gps_device VARCHAR(36), gps_scan_frequency INT DEFAULT 60, edge_guard_frequency INT DEFAULT 0, @@ -737,7 +737,7 @@ CREATE INDEX idx_fog_log_status_session_id ON FogLogStatuses (session_id); CREATE TABLE IF NOT EXISTS RbacRoles ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, name TEXT UNIQUE NOT NULL, - kind TEXT DEFAULT 'Role', + kind TEXT, created_at DATETIME, updated_at DATETIME ); @@ -757,7 +757,7 @@ CREATE TABLE IF NOT EXISTS RbacRoleRules ( CREATE TABLE IF NOT EXISTS RbacRoleBindings ( id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, name TEXT UNIQUE NOT NULL, - kind TEXT DEFAULT 'RoleBinding', + kind TEXT, role_ref TEXT NOT NULL, subjects TEXT NOT NULL, created_at DATETIME, @@ -1205,6 +1205,18 @@ CREATE UNIQUE INDEX idx_service_platform_reconcile_tasks_active_service_name ON CREATE INDEX idx_service_platform_reconcile_tasks_status_claimed ON ServicePlatformReconcileTasks (status, claimed_at); CREATE INDEX idx_service_platform_reconcile_tasks_next_attempt ON ServicePlatformReconcileTasks (next_attempt_at); +CREATE TABLE IF NOT EXISTS ReconcileOutbox ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + kind VARCHAR(32) NOT NULL, + payload TEXT NOT NULL, + idempotency_key VARCHAR(255) NOT NULL UNIQUE, + created_at DATETIME, + processed_at DATETIME, + last_error TEXT +); + +CREATE INDEX idx_reconcile_outbox_unprocessed ON ReconcileOutbox (processed_at, id); + CREATE TABLE IF NOT EXISTS HubRouterConfigLocks ( id INTEGER PRIMARY KEY NOT NULL CHECK (id = 1), leader_uuid VARCHAR(36), diff --git a/src/data/models/configMap.js b/src/data/models/configMap.js index 63a2c088..db766352 100644 --- a/src/data/models/configMap.js +++ b/src/data/models/configMap.js @@ -1,6 +1,11 @@ 'use strict' const SecretHelper = require('../../helpers/secret-helper') +const { + scheduleVaultPromoteAfterCommit, + shouldDeferVaultStore +} = require('../../helpers/vault-transaction-helper') +const models = require('../models') module.exports = (sequelize, DataTypes) => { const ConfigMap = sequelize.define('ConfigMap', { @@ -59,21 +64,35 @@ module.exports = (sequelize, DataTypes) => { } ], hooks: { - beforeSave: async (configMap) => { + beforeSave: async (configMap, options) => { if (configMap.changed('data')) { - // Get useVault value - prioritize getDataValue (for updates), then property, default to true let useVault = configMap.getDataValue('useVault') - // If getDataValue returns undefined/null, try the property (for new instances) if (useVault === undefined || useVault === null) { useVault = configMap.useVault !== undefined && configMap.useVault !== null ? configMap.useVault : true } - // Ensure boolean type useVault = Boolean(useVault) + const plainData = configMap.data + const transaction = options.transaction + + if (transaction && shouldDeferVaultStore('configmap', useVault)) { + configMap.data = await SecretHelper.encryptSecretInternal(plainData, configMap.name) + scheduleVaultPromoteAfterCommit(transaction, { + secretData: plainData, + secretName: configMap.name, + secretType: 'configmap', + useVault, + model: () => models.ConfigMap, + where: { name: configMap.name }, + field: 'data' + }) + return + } + const encryptedData = await SecretHelper.encryptSecret( - configMap.data, + plainData, configMap.name, 'configmap', useVault diff --git a/src/data/models/index.js b/src/data/models/index.js index a6f323fa..ccfa2a9c 100644 --- a/src/data/models/index.js +++ b/src/data/models/index.js @@ -78,8 +78,11 @@ db.initDB = async (isStart) => { // Initialize RBAC cache version if it doesn't exist try { const RbacCacheVersionManager = require('../managers/rbac-cache-version-manager') - const fakeTransaction = { fakeTransaction: true } - await RbacCacheVersionManager.initializeVersion(fakeTransaction) + const { runInTransaction } = require('../../helpers/transaction-runner') + await runInTransaction( + (transaction) => RbacCacheVersionManager.initializeVersion(transaction), + { label: 'init-rbac-cache-version' } + ) logger.info('RBAC cache version initialized') } catch (error) { logger.warn(`Failed to initialize RBAC cache version: ${error.message}. Continuing...`) @@ -94,12 +97,26 @@ db.initDB = async (isStart) => { // Initialize controller UUID try { const ClusterControllerService = require('../../services/cluster-controller-service') - const fakeTransaction = { fakeTransaction: true } - await ClusterControllerService.initializeControllerUuid(fakeTransaction) + const { runInTransaction } = require('../../helpers/transaction-runner') + await runInTransaction( + (transaction) => ClusterControllerService.initializeControllerUuid(transaction), + { label: 'init-controller-uuid' } + ) logger.info('Controller UUID initialized') } catch (error) { logger.warn(`Failed to initialize controller UUID: ${error.message}. Continuing...`) } + + const { initDbMetrics } = require('../../helpers/db-metrics') + const { getProviderName, getWriteQueueDepth } = require('../../helpers/transaction-runner') + initDbMetrics(databaseProvider.sequelize, getProviderName(), { getWriteQueueDepth }) + + try { + const { checkSqliteFogCountWarning } = require('../../helpers/sqlite-fog-warning') + await checkSqliteFogCountWarning() + } catch (error) { + logger.warn(`Failed sqlite fog count warning check: ${error.message}. Continuing...`) + } } } diff --git a/src/data/models/microserviceExecSession.js b/src/data/models/microserviceExecSession.js index 5dfb01da..e9d9e59c 100644 --- a/src/data/models/microserviceExecSession.js +++ b/src/data/models/microserviceExecSession.js @@ -14,7 +14,7 @@ module.exports = (sequelize, DataTypes) => { allowNull: false }, sessionId: { - type: DataTypes.TEXT, + type: DataTypes.STRING(255), field: 'session_id', allowNull: false, unique: true diff --git a/src/data/models/reconcileOutbox.js b/src/data/models/reconcileOutbox.js new file mode 100644 index 00000000..0e1fd746 --- /dev/null +++ b/src/data/models/reconcileOutbox.js @@ -0,0 +1,54 @@ +'use strict' + +const RECONCILE_OUTBOX_KINDS = ['nats', 'fog_platform', 'service_platform'] + +module.exports = (sequelize, DataTypes) => { + const ReconcileOutbox = sequelize.define('ReconcileOutbox', { + id: { + type: DataTypes.INTEGER, + primaryKey: true, + autoIncrement: true, + allowNull: false, + field: 'id' + }, + kind: { + type: DataTypes.STRING(32), + allowNull: false, + field: 'kind', + validate: { + isIn: [RECONCILE_OUTBOX_KINDS] + } + }, + payload: { + type: DataTypes.TEXT, + allowNull: false, + field: 'payload' + }, + idempotencyKey: { + type: DataTypes.STRING(255), + allowNull: false, + unique: true, + field: 'idempotency_key' + }, + processedAt: { + type: DataTypes.DATE, + allowNull: true, + field: 'processed_at' + }, + lastError: { + type: DataTypes.TEXT, + allowNull: true, + field: 'last_error' + } + }, { + tableName: 'ReconcileOutbox', + timestamps: true, + createdAt: 'created_at', + updatedAt: false, + underscored: true + }) + + return ReconcileOutbox +} + +module.exports.RECONCILE_OUTBOX_KINDS = RECONCILE_OUTBOX_KINDS diff --git a/src/data/models/registry.js b/src/data/models/registry.js index 93f05c8d..5a721d2c 100644 --- a/src/data/models/registry.js +++ b/src/data/models/registry.js @@ -1,6 +1,11 @@ 'use strict' const SecretHelper = require('../../helpers/secret-helper') +const { + scheduleVaultPromoteAfterCommit, + shouldDeferVaultStore +} = require('../../helpers/vault-transaction-helper') +const models = require('../models') // Minimum length for internal encryption format: base64(salt(16) + iv(12) + tag(16) + encrypted) const INTERNAL_ENCRYPTED_MIN_LENGTH = 60 @@ -63,7 +68,7 @@ module.exports = (sequelize, DataTypes) => { timestamps: false, underscored: true, hooks: { - beforeSave: async (registry) => { + beforeSave: async (registry, options) => { if (!registry.changed('password')) return const password = registry.password if (isPasswordEmpty(password)) { @@ -76,9 +81,27 @@ module.exports = (sequelize, DataTypes) => { if (SecretHelper.isVaultReference(password) || looksLikeInternalEncrypted(password)) { return } + + const transaction = options.transaction + const secretName = 'registry-' + registry.id + const secretData = { value: password } + + if (transaction && shouldDeferVaultStore('registry')) { + registry.password = await SecretHelper.encryptSecretInternal(secretData, secretName) + scheduleVaultPromoteAfterCommit(transaction, { + secretData, + secretName, + secretType: 'registry', + model: () => models.Registry, + where: { id: registry.id }, + field: 'password' + }) + return + } + const encrypted = await SecretHelper.encryptSecret( - { value: password }, - 'registry-' + registry.id, + secretData, + secretName, 'registry' ) registry.password = encrypted diff --git a/src/data/models/secret.js b/src/data/models/secret.js index 8bfd3ad2..978c8b70 100644 --- a/src/data/models/secret.js +++ b/src/data/models/secret.js @@ -1,6 +1,11 @@ 'use strict' const SecretHelper = require('../../helpers/secret-helper') +const { + scheduleVaultPromoteAfterCommit, + shouldDeferVaultStore +} = require('../../helpers/vault-transaction-helper') +const models = require('../models') module.exports = (sequelize, DataTypes) => { const Secret = sequelize.define('Secret', { @@ -57,10 +62,26 @@ module.exports = (sequelize, DataTypes) => { } ], hooks: { - beforeSave: async (secret) => { + beforeSave: async (secret, options) => { if (secret.changed('data')) { + const plainData = secret.data + const transaction = options.transaction + + if (transaction && shouldDeferVaultStore(secret.type)) { + secret.data = await SecretHelper.encryptSecretInternal(plainData, secret.name) + scheduleVaultPromoteAfterCommit(transaction, { + secretData: plainData, + secretName: secret.name, + secretType: secret.type, + model: () => models.Secret, + where: { name: secret.name }, + field: 'data' + }) + return + } + const encryptedData = await SecretHelper.encryptSecret( - secret.data, + plainData, secret.name, secret.type ) diff --git a/src/data/providers/database-provider.js b/src/data/providers/database-provider.js index 95754346..331f60ea 100644 --- a/src/data/providers/database-provider.js +++ b/src/data/providers/database-provider.js @@ -206,8 +206,8 @@ class DatabaseProvider { id INT GENERATED ALWAYS AS IDENTITY PRIMARY KEY, migration_version VARCHAR(255) NOT NULL, seeder_version VARCHAR(255), - created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP, - updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP + created_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMPTZ DEFAULT CURRENT_TIMESTAMP ) ` break diff --git a/src/data/providers/mysql.js b/src/data/providers/mysql.js index fe4260b6..2c5fb5d4 100644 --- a/src/data/providers/mysql.js +++ b/src/data/providers/mysql.js @@ -36,6 +36,7 @@ class MySqlDatabaseProvider extends DatabaseProvider { } // Sequelize configuration + const poolConfig = mysqlConfig.pool || {} const sequelizeConfig = { dialect: 'mysql', host: connectionOptions.host, @@ -46,6 +47,12 @@ class MySqlDatabaseProvider extends DatabaseProvider { dialectOptions: { connectTimeout: connectionOptions.connectTimeout }, + pool: { + max: poolConfig.max != null ? poolConfig.max : 10, + min: poolConfig.min != null ? poolConfig.min : 0, + idle: poolConfig.idle != null ? poolConfig.idle : 20000 + }, + timezone: '+00:00', logging: false } diff --git a/src/data/providers/postgres.js b/src/data/providers/postgres.js index 1fdbeb0f..f351c312 100644 --- a/src/data/providers/postgres.js +++ b/src/data/providers/postgres.js @@ -36,6 +36,7 @@ class PostgresDatabaseProvider extends DatabaseProvider { } // Sequelize configuration + const poolConfig = postgresConfig.pool || {} const sequelizeConfig = { dialect: 'postgres', host: connectionOptions.host, @@ -46,6 +47,12 @@ class PostgresDatabaseProvider extends DatabaseProvider { dialectOptions: { connectTimeout: connectionOptions.connectTimeout }, + pool: { + max: poolConfig.max != null ? poolConfig.max : 10, + min: poolConfig.min != null ? poolConfig.min : 0, + idle: poolConfig.idle != null ? poolConfig.idle : 20000 + }, + timezone: '+00:00', logging: false } // Add SSL configuration to Sequelize if enabled diff --git a/src/data/stores/sequelize-session-store.js b/src/data/stores/sequelize-session-store.js index d54359ee..dc6f9b6e 100644 --- a/src/data/stores/sequelize-session-store.js +++ b/src/data/stores/sequelize-session-store.js @@ -37,7 +37,7 @@ class SequelizeSessionStore extends Store { sid, data, expiresAt - }) + }, { conflictFields: ['sid'] }) .then(() => callback(null)) .catch((error) => callback(error)) } diff --git a/src/decorators/authorization-decorator.js b/src/decorators/authorization-decorator.js index dc10a692..35e93e03 100644 --- a/src/decorators/authorization-decorator.js +++ b/src/decorators/authorization-decorator.js @@ -3,6 +3,7 @@ const FogManager = require('../data/managers/iofog-manager') const FogKeyService = require('../services/iofog-key-service') const Errors = require('../helpers/errors') const { isTest } = require('../helpers/app-helper') +const { runInTransaction } = require('../helpers/transaction-runner') function checkFogToken (f) { return async function (...fArgs) { @@ -18,7 +19,6 @@ function checkFogToken (f) { throw new Errors.AuthenticationError('authorization failed') } - // Extract token from Bearer scheme const [scheme, token] = authHeader.split(' ') if (scheme.toLowerCase() !== 'bearer' || !token) { logger.error('Invalid authorization scheme') @@ -26,10 +26,8 @@ function checkFogToken (f) { } try { - // Debug log for JWT logger.debug({ token }, 'Received JWT') - // First, decode the JWT without verification to get the fog UUID const tokenParts = token.split('.') if (tokenParts.length !== 3) { logger.error('Invalid JWT format') @@ -46,23 +44,25 @@ function checkFogToken (f) { throw new Errors.AuthenticationError('authorization failed') } - // Get the fog with transaction - const fog = await FogManager.findOne({ - uuid: fogUuid - }, { fakeTransaction: true }) + const fog = await runInTransaction(async (transaction) => { + const foundFog = await FogManager.findOne({ uuid: fogUuid }, transaction) + if (!foundFog) { + return null + } + + await FogKeyService.verifyJWT(token, fogUuid, transaction) + + const timestamp = Date.now() + await FogManager.updateLastActive(foundFog.uuid, timestamp, transaction) + + return foundFog + }, { label: 'checkFogToken' }) if (!fog) { logger.error(`Fog with UUID ${fogUuid} not found`) throw new Errors.AuthenticationError('authorization failed') } - // Verify the JWT with transaction - await FogKeyService.verifyJWT(token, fogUuid, { fakeTransaction: true }) - - // Update last active timestamp with transaction - const timestamp = Date.now() - await FogManager.updateLastActive(fog.uuid, timestamp, { fakeTransaction: true }) - fArgs.push(fog) return f.apply(this, fArgs) diff --git a/src/decorators/transaction-decorator.js b/src/decorators/transaction-decorator.js index 3c96a2f6..ee986d9f 100644 --- a/src/decorators/transaction-decorator.js +++ b/src/decorators/transaction-decorator.js @@ -1,85 +1,53 @@ -const cq = require('concurrent-queue') -const Transaction = require('sequelize/lib/transaction') - const { isTest } = require('../helpers/app-helper') -const { isSqliteBusyError } = require('../helpers/db-busy-retry') - -const transactionsQueue = cq() - .limit({ concurrency: 1 }) - .process((task, cb) => { - task.transaction - .apply(task.that, task.args) - .then((res) => cb(null, res)) - .catch((err) => cb(err, null)) - }) - -function transaction (f) { - const fakeTransactionObject = { fakeTransaction: true } - return function (...fArgs) { - if (isTest()) { - return f.apply(this, fArgs) - } - - if (fArgs.length > 0 && fArgs[fArgs.length - 1] instanceof Transaction) { - fArgs[fArgs.length - 1] = fakeTransactionObject - return f.apply(this, fArgs) - } else { - fArgs.push(fakeTransactionObject) - return f.apply(this, fArgs) - } - } +const { isSequelizeTransaction } = require('../helpers/sequelize-transaction') +const { + runInTransaction, + runWithTransactionContext, + PRIORITY_INTERACTIVE, + getActiveTransactionContext +} = require('../helpers/transaction-runner') + +function hasTransactionArg (args) { + return findTransactionArg(args) != null } -function queueTransaction (resolve, reject, transaction, that, retries, ...args) { - const task = { - transaction, - that, - retries, - args - } - - transactionsQueue(task, (error, success) => { - if (error === null) { - return resolve(success) - } - - if (retries < 1 || !isSqliteBusyError(error)) { - return reject(error) +function findTransactionArg (args) { + for (let i = args.length - 1; i >= 0; i--) { + if (isSequelizeTransaction(args[i])) { + return args[i] } - - queueTransaction(resolve, reject, transaction, that, retries - 1, ...args) - }) -} - -function applyTransaction (resolve, reject, transaction, that, ...args) { - transaction.apply(that, args) - .then(resolve) - .catch((error) => { - if (!isSqliteBusyError(error)) { - return reject(error) - } - - queueTransaction(resolve, reject, transaction, this, 5, ...args) - }) + } + return null } /** * @param {Function} f - Async function that accepts (..., transaction) as last argument - * @param {{ bypassQueue?: boolean }} [options] - If bypassQueue is true, run without enqueueing (so the call does not wait behind long-running queued transactions, e.g. NATS reconcile) + * @param {{ priority?: string, label?: string }} [options] */ function generateTransaction (f, options = {}) { - const { bypassQueue = false } = options - const t = transaction(f) + const priority = options.priority || PRIORITY_INTERACTIVE + const label = options.label || f.name || 'generateTransaction' + return function (...args) { if (isTest()) { - return t.apply(this, args) + return f.apply(this, args) } - if (bypassQueue) { - return Promise.resolve().then(() => t.apply(this, args)) + + if (hasTransactionArg(args)) { + const tx = findTransactionArg(args) + return runWithTransactionContext(tx, priority, () => f.apply(this, args)) } - return new Promise((resolve, reject) => { - applyTransaction(resolve, reject, t, this, ...args) - }) + + const parentCtx = getActiveTransactionContext() + if (parentCtx?.transaction) { + return runWithTransactionContext(parentCtx.transaction, parentCtx.priority, () => + f.apply(this, [...args, parentCtx.transaction])) + } + + return runInTransaction( + (transaction) => f.apply(this, [...args, transaction]), + { priority, label } + ) } } diff --git a/src/helpers/app-helper.js b/src/helpers/app-helper.js index 6df81381..b4b420ec 100644 --- a/src/helpers/app-helper.js +++ b/src/helpers/app-helper.js @@ -112,14 +112,13 @@ function checkTransaction (transaction) { if (isTest()) { return } - // TODO [when transactions concurrency issue fixed]: Remove '!transaction.fakeTransaction' - if (!transaction || (!(transaction instanceof Transaction) && !transaction.fakeTransaction)) { + if (!transaction || !(transaction instanceof Transaction)) { throw new Errors.TransactionError() } } function withTransaction (transaction, options = {}) { - if (transaction && !transaction.fakeTransaction) { + if (transaction) { options.transaction = transaction } return options diff --git a/src/helpers/db-busy-retry.js b/src/helpers/db-busy-retry.js index 7ce06dd4..71b3a2e9 100644 --- a/src/helpers/db-busy-retry.js +++ b/src/helpers/db-busy-retry.js @@ -1,4 +1,9 @@ +const config = require('../config') +const dbMetrics = require('./db-metrics') + const DEFAULT_MAX_RETRIES = 5 +const CONFIG_DEFAULT_MAX_RETRIES = 8 +const CONFIG_DEFAULT_BASE_MS = 25 function isSqliteBusyError (error) { if (!error) { @@ -12,12 +17,25 @@ function isSqliteBusyError (error) { return messages.some((message) => message && message.indexOf('SQLITE_BUSY') !== -1) } +function sleep (ms) { + return new Promise((resolve) => setTimeout(resolve, ms)) +} + +function getRetryDefaults () { + return { + maxRetries: config.get('settings.dbBusyRetryMaxAttempts', CONFIG_DEFAULT_MAX_RETRIES), + baseMs: config.get('settings.dbBusyRetryBaseMs', CONFIG_DEFAULT_BASE_MS) + } +} + /** * Retry an async DB operation when SQLite reports SQLITE_BUSY (same semantics as TransactionDecorator queue retries). * No-op for non-SQLITE_BUSY errors. Safe on mysql/postgres — busy errors never match. */ async function withDbBusyRetry (fn, options = {}) { - const maxRetries = options.maxRetries != null ? options.maxRetries : DEFAULT_MAX_RETRIES + const defaults = getRetryDefaults() + const maxRetries = options.maxRetries != null ? options.maxRetries : defaults.maxRetries + const baseMs = options.baseMs != null ? options.baseMs : defaults.baseMs let attempt = 0 while (true) { @@ -28,12 +46,18 @@ async function withDbBusyRetry (fn, options = {}) { throw error } attempt++ + dbMetrics.recordBusyRetry(options.label) + const delayMs = baseMs * Math.pow(2, attempt - 1) + Math.floor(Math.random() * baseMs) + await sleep(delayMs) } } } module.exports = { + CONFIG_DEFAULT_BASE_MS, + CONFIG_DEFAULT_MAX_RETRIES, DEFAULT_MAX_RETRIES, + getRetryDefaults, isSqliteBusyError, withDbBusyRetry } diff --git a/src/helpers/db-dialect.js b/src/helpers/db-dialect.js new file mode 100644 index 00000000..9ca6e1e7 --- /dev/null +++ b/src/helpers/db-dialect.js @@ -0,0 +1,150 @@ +const databaseProvider = require('../data/providers/database-factory') + +function getDbDialect () { + return databaseProvider.sequelize.getDialect() +} + +function supportsSkipLocked () { + const dialect = getDbDialect() + return dialect === 'mysql' || dialect === 'postgres' +} + +function quoteTableName (tableName, dialect = getDbDialect()) { + return dialect === 'postgres' ? `"${tableName}"` : `\`${tableName}\`` +} + +/** + * Claim the next eligible reconcile task row. + * sqlite: find + conditional update (serialized by global write queue). + * mysql/postgres: SELECT … FOR UPDATE SKIP LOCKED, then leader update in same tx. + */ +function buildLeaderOrStaleCondition (staleThreshold, deleteStaleThreshold) { + const { Op } = require('sequelize') + const conditions = [ + { leaderUuid: null }, + { claimedAt: { [Op.lt]: staleThreshold } } + ] + if (deleteStaleThreshold) { + conditions.push({ + [Op.and]: [ + { reason: 'delete' }, + { claimedAt: { [Op.lt]: deleteStaleThreshold } } + ] + }) + } + return { [Op.or]: conditions } +} + +function buildLeaderOrStaleSql (deleteStaleThreshold) { + if (!deleteStaleThreshold) { + return '(leader_uuid IS NULL OR claimed_at < :staleThreshold)' + } + return '(leader_uuid IS NULL OR claimed_at < :staleThreshold OR (reason = \'delete\' AND claimed_at < :deleteStaleThreshold))' +} + +async function claimNextReconcileTask ({ + Entity, + controllerUuid, + staleThreshold, + deleteStaleThreshold = null, + now, + activeStatuses, + includeNextAttemptFilter = true, + selectSql, + reloadTask +}) { + const { runInTransaction, PRIORITY_BACKGROUND } = require('./transaction-runner') + const { Op } = require('sequelize') + const sequelize = databaseProvider.sequelize + + const leaderOrStale = buildLeaderOrStaleCondition(staleThreshold, deleteStaleThreshold) + + const baseWhere = { + status: { [Op.in]: activeStatuses }, + [Op.and]: [leaderOrStale] + } + + if (includeNextAttemptFilter) { + baseWhere[Op.or] = [ + { nextAttemptAt: null }, + { nextAttemptAt: { [Op.lte]: now } } + ] + } + + return runInTransaction(async (transaction) => { + let task + + if (supportsSkipLocked()) { + const dialect = getDbDialect() + const tableName = Entity.getTableName() + const quotedTable = quoteTableName(tableName, dialect) + + const nextAttemptClause = includeNextAttemptFilter + ? 'AND (next_attempt_at IS NULL OR next_attempt_at <= :now)' + : '' + + const leaderOrStaleSql = buildLeaderOrStaleSql(deleteStaleThreshold) + const rows = await sequelize.query( + `${selectSql.replace(':table', quotedTable)} + WHERE status IN (:activeStatuses) + ${nextAttemptClause} + AND ${leaderOrStaleSql} + ORDER BY id ASC + LIMIT 1 + FOR UPDATE SKIP LOCKED`, + { + replacements: { + activeStatuses, + now, + staleThreshold, + deleteStaleThreshold + }, + type: sequelize.QueryTypes.SELECT, + transaction + } + ) + + if (!rows.length) { + return null + } + task = Entity.build(rows[0], { isNewRecord: false }) + } else { + task = await Entity.findOne({ + where: baseWhere, + order: [['id', 'ASC']], + limit: 1, + transaction + }) + if (!task) { + return null + } + } + + const [affected] = await Entity.update( + { leaderUuid: controllerUuid, claimedAt: new Date(), status: 'in_progress' }, + { + where: { + id: task.id, + ...buildLeaderOrStaleCondition(staleThreshold, deleteStaleThreshold) + }, + transaction + } + ) + if (affected === 0) { + return null + } + + if (reloadTask) { + return reloadTask(task.id, transaction) + } + + return Entity.findOne({ where: { id: task.id }, transaction }) + }, { priority: PRIORITY_BACKGROUND, label: 'reconcileTask.claim' }) +} + +module.exports = { + getDbDialect, + supportsSkipLocked, + quoteTableName, + claimNextReconcileTask +} diff --git a/src/helpers/db-metrics.js b/src/helpers/db-metrics.js new file mode 100644 index 00000000..847e2ad6 --- /dev/null +++ b/src/helpers/db-metrics.js @@ -0,0 +1,118 @@ +const { metrics } = require('@opentelemetry/api') + +const METER_NAME = 'iofog-controller-db' +const METER_VERSION = '1.0.0' + +let meter = null +let transactionDuration = null +let writeQueueWaitMs = null +let busyRetries = null +let connectionInvalidated = null +let sqliteFogCountWarning = null +let writeQueueDepthInteractive = null + +function getMeter () { + if (!meter) { + meter = metrics.getMeter(METER_NAME, METER_VERSION) + } + return meter +} + +function isConnectionInvalidatedError (error) { + if (!error) { + return false + } + const messages = [ + error.message, + error.parent && error.parent.message, + error.original && error.original.message + ] + return messages.some((message) => message && ( + message.indexOf('cannot rollback') !== -1 || + message.indexOf('ConnectionManager.getConnection was called after') !== -1 || + message.indexOf('Connection terminated') !== -1 || + message.indexOf('Connection lost') !== -1 || + message.indexOf('ECONNRESET') !== -1 || + message.indexOf('ECONNREFUSED') !== -1 + )) +} + +/** + * Register DB OTEL instruments and optional Sequelize connection hooks. + * @param {import('sequelize').Sequelize} [sequelize] + * @param {string} provider + * @param {{ getWriteQueueDepth?: () => { interactive: number, background: number } }} [queueReader] + */ +function initDbMetrics (_sequelize, _provider, queueReader) { + const m = getMeter() + + transactionDuration = m.createHistogram('db.transaction.duration', { + description: 'Sequelize transaction duration', + unit: 'ms' + }) + writeQueueWaitMs = m.createHistogram('db.write_queue.wait_ms', { + description: 'Time spent waiting in the SQLite write queue before execution', + unit: 'ms' + }) + busyRetries = m.createCounter('db.busy_retries', { + description: 'SQLite SQLITE_BUSY retries during transaction execution' + }) + connectionInvalidated = m.createCounter('db.connection.invalidated', { + description: 'Database connection errors indicating pool or transaction invalidation' + }) + sqliteFogCountWarning = m.createCounter('db.sqlite.fog_count_warning', { + description: 'SQLite fleet size exceeded enterprise recommended threshold' + }) + + if (queueReader && typeof queueReader.getWriteQueueDepth === 'function') { + writeQueueDepthInteractive = m.createObservableGauge('db.write_queue.depth', { + description: 'Pending SQLite write queue depth by priority lane' + }) + writeQueueDepthInteractive.addCallback((result) => { + const depth = queueReader.getWriteQueueDepth() + result.observe(depth.interactive, { priority: 'interactive' }) + result.observe(depth.background, { priority: 'background' }) + }) + } +} + +function recordTransactionDuration (attributes, durationMs) { + if (durationMs >= 0) { + transactionDuration?.record(durationMs, attributes) + } +} + +function recordWriteQueueWaitMs (priority, waitMs) { + if (waitMs >= 0) { + writeQueueWaitMs?.record(waitMs, { priority }) + } +} + +function recordBusyRetry (label) { + busyRetries?.add(1, { label: label || 'unknown' }) +} + +function recordConnectionInvalidated (provider) { + connectionInvalidated?.add(1, { provider: provider || 'unknown' }) +} + +function recordSqliteFogCountWarning () { + sqliteFogCountWarning?.add(1) +} + +function maybeRecordConnectionInvalidated (error, provider) { + if (isConnectionInvalidatedError(error)) { + recordConnectionInvalidated(provider) + } +} + +module.exports = { + initDbMetrics, + isConnectionInvalidatedError, + maybeRecordConnectionInvalidated, + recordBusyRetry, + recordConnectionInvalidated, + recordSqliteFogCountWarning, + recordTransactionDuration, + recordWriteQueueWaitMs +} diff --git a/src/helpers/job-startup.js b/src/helpers/job-startup.js index a764b869..97650a3f 100644 --- a/src/helpers/job-startup.js +++ b/src/helpers/job-startup.js @@ -5,7 +5,8 @@ const RECONCILE_HEAVY_JOBS = new Set([ 'platform-reconcile-worker-job.js', 'nats-reconcile-worker-job.js', 'fog-platform-sweep-job.js', - 'fog-status-job.js' + 'fog-status-job.js', + 'reconcile-outbox-drainer-job.js' ]) const JOB_STAGGER_MS = 500 diff --git a/src/helpers/reconcile-outbox-keys.js b/src/helpers/reconcile-outbox-keys.js new file mode 100644 index 00000000..e7a01ff5 --- /dev/null +++ b/src/helpers/reconcile-outbox-keys.js @@ -0,0 +1,66 @@ +const crypto = require('crypto') + +function stableHash (value) { + return crypto.createHash('sha256').update(JSON.stringify(value)).digest('hex').slice(0, 16) +} + +function buildFogPlatformIdempotencyKey (payload = {}) { + const { fogUuid, reason, specGeneration } = payload + return `fp:${fogUuid}:${reason}:${specGeneration != null ? specGeneration : 'null'}` +} + +function buildServicePlatformIdempotencyKey (payload = {}) { + const { serviceName, reason, specSnapshot } = payload + const snapshotPart = specSnapshot != null ? stableHash(specSnapshot) : 'null' + return `sp:${serviceName}:${reason}:${snapshotPart}` +} + +function buildNatsIdempotencyKey (payload = {}) { + const { + reason, + applicationId, + accountRuleId, + userRuleId, + fogUuids, + microserviceUuid, + mutationKind, + authGeneration + } = payload + + const scopeSuffix = [ + applicationId ?? 'null', + accountRuleId ?? 'null', + userRuleId ?? 'null', + microserviceUuid ?? 'null', + mutationKind ?? 'null', + authGeneration ?? 'null' + ].join(':') + + if (Array.isArray(fogUuids) && fogUuids.length > 0) { + const sorted = [...fogUuids].sort().join(',') + return `nats:${reason}:${scopeSuffix}:${sorted}` + } + + return `nats:${reason}:${scopeSuffix}` +} + +function buildIdempotencyKey (kind, payload = {}) { + switch (kind) { + case 'fog_platform': + return buildFogPlatformIdempotencyKey(payload) + case 'service_platform': + return buildServicePlatformIdempotencyKey(payload) + case 'nats': + return buildNatsIdempotencyKey(payload) + default: + throw new Error(`Unknown reconcile outbox kind: ${kind}`) + } +} + +module.exports = { + stableHash, + buildFogPlatformIdempotencyKey, + buildServicePlatformIdempotencyKey, + buildNatsIdempotencyKey, + buildIdempotencyKey +} diff --git a/src/helpers/secret-helper.js b/src/helpers/secret-helper.js index 1eca13dc..59fd4538 100644 --- a/src/helpers/secret-helper.js +++ b/src/helpers/secret-helper.js @@ -21,39 +21,22 @@ class SecretHelper { * @param {boolean} useVault - For ConfigMaps: whether to use vault (optional, defaults to true if vault enabled) * @returns {Promise} - Returns encrypted data or vault reference */ - async encryptSecret (secretData, secretName, secretType = null, useVault = null) { + _shouldUseVault (secretType, useVault) { const isConfigMap = secretType === 'configmap' - // Determine if vault should be used - let shouldUseVault = false - if (isConfigMap) { - // For ConfigMaps, check the useVault parameter if (useVault === false) { - // Explicitly disabled - use internal encryption - shouldUseVault = false - } else if (useVault === true || useVault === null) { - // Explicitly enabled or default (null) - use vault if enabled - shouldUseVault = vaultManager.isEnabled() + return false } - } else { - // For non-ConfigMaps (Secrets, Agent Auth Keys), always use vault if enabled - shouldUseVault = vaultManager.isEnabled() - } - - // If vault should be used, store in vault - if (shouldUseVault) { - try { - const vaultPath = await vaultManager.store(secretName, secretType, secretData) - // Return vault reference that will be stored in database - return `${this.VAULT_REF_PREFIX}${vaultPath}` - } catch (error) { - logger.error(`Failed to store secret in vault: ${error.message}`) - throw error + if (useVault === true || useVault === null) { + return vaultManager.isEnabled() } } - // Fallback to internal encryption + return vaultManager.isEnabled() + } + + async encryptSecretInternal (secretData, secretName) { const salt = crypto.randomBytes(this.SALT_LENGTH) const key = await this._deriveKey(secretName, salt) const iv = crypto.randomBytes(this.IV_LENGTH) @@ -66,6 +49,28 @@ class SecretHelper { return Buffer.concat([salt, iv, tag, encrypted]).toString('base64') } + async storeInVaultAndGetReference (secretData, secretName, secretType = null, useVault = null) { + if (!this._shouldUseVault(secretType, useVault)) { + throw new Error('Vault storage requested but vault is not configured for this resource') + } + + try { + const vaultPath = await vaultManager.store(secretName, secretType, secretData) + return `${this.VAULT_REF_PREFIX}${vaultPath}` + } catch (error) { + logger.error(`Failed to store secret in vault: ${error.message}`) + throw error + } + } + + async encryptSecret (secretData, secretName, secretType = null, useVault = null) { + if (this._shouldUseVault(secretType, useVault)) { + return this.storeInVaultAndGetReference(secretData, secretName, secretType, useVault) + } + + return this.encryptSecretInternal(secretData, secretName) + } + /** * Retrieve secret data - uses vault if reference detected, otherwise uses internal decryption * @param {string} encryptedData - Encrypted data or vault reference diff --git a/src/helpers/sequelize-transaction.js b/src/helpers/sequelize-transaction.js new file mode 100644 index 00000000..7cdbcaba --- /dev/null +++ b/src/helpers/sequelize-transaction.js @@ -0,0 +1,15 @@ +const Transaction = require('sequelize/lib/transaction') + +function isSequelizeTransaction (value) { + if (value == null || typeof value !== 'object') { + return false + } + if (value instanceof Transaction) { + return true + } + return typeof value.commit === 'function' && typeof value.rollback === 'function' +} + +module.exports = { + isSequelizeTransaction +} diff --git a/src/helpers/sqlite-fog-warning.js b/src/helpers/sqlite-fog-warning.js new file mode 100644 index 00000000..45c3714e --- /dev/null +++ b/src/helpers/sqlite-fog-warning.js @@ -0,0 +1,39 @@ +const config = require('../config') +const models = require('../data/models') +const logger = require('../logger') +const transactionRunner = require('./transaction-runner') +const { recordSqliteFogCountWarning } = require('./db-metrics') + +const DEFAULT_THRESHOLD = 50 + +function getThreshold () { + return config.get('settings.sqliteEnterpriseFogWarningThreshold', DEFAULT_THRESHOLD) +} + +/** + * Log a soft warning when sqlite fleet size exceeds the enterprise threshold (R124). + * Does not block API — observability and operator guidance only. + */ +async function checkSqliteFogCountWarning () { + if (!transactionRunner.isSqliteProvider()) { + return + } + + const threshold = getThreshold() + const fogCount = await models.Fog.count() + if (fogCount <= threshold) { + return + } + + recordSqliteFogCountWarning() + logger.warn( + `SQLite deployment has ${fogCount} fogs (threshold ${threshold}). ` + + 'For enterprise scale and multi-user load, migrate to mysql or postgres. ' + + 'See docs/operations/database-transactions.md.' + ) +} + +module.exports = { + checkSqliteFogCountWarning, + getThreshold +} diff --git a/src/helpers/template-helper.js b/src/helpers/template-helper.js index 725381ba..b4d6354d 100755 --- a/src/helpers/template-helper.js +++ b/src/helpers/template-helper.js @@ -1,6 +1,7 @@ const ApplicationManager = require('../data/managers/application-manager.js') // Using manager instead of service to avoid dependency loop const FogService = require('../services/iofog-service') const MicroservicesService = require('../services/microservices-service') +const { runInTransaction } = require('../helpers/transaction-runner') // ninja2 like template engine const { Liquid } = require('../lib/liquidjs/liquid.node.cjs') @@ -29,7 +30,10 @@ async function findApplicationHandler (name) { return this.context.environments._applicationsByName[name] } - const result = await ApplicationManager.findOnePopulated({ exclude: ['created_at', 'updated_at'] }, { fakeTransaction: true }) // TODO: Get a proper DB transaction + const result = await runInTransaction( + (transaction) => ApplicationManager.findOnePopulated({ exclude: ['created_at', 'updated_at'] }, transaction), + { label: 'template-find-application' } + ) if (result) { result.microservices = (await MicroservicesService.listMicroservicesEndPoint({ applicationName: name }, false)).microservices if (this.context.environments._applicationsByName) { diff --git a/src/helpers/transaction-runner.js b/src/helpers/transaction-runner.js new file mode 100644 index 00000000..a9717edf --- /dev/null +++ b/src/helpers/transaction-runner.js @@ -0,0 +1,254 @@ +const { AsyncLocalStorage } = require('async_hooks') +const databaseProvider = require('../data/providers/database-factory') +const config = require('../config') +const logger = require('../logger') +const { withDbBusyRetry } = require('./db-busy-retry') +const { + maybeRecordConnectionInvalidated, + recordTransactionDuration, + recordWriteQueueWaitMs +} = require('./db-metrics') + +const PRIORITY_INTERACTIVE = 'interactive' +const PRIORITY_BACKGROUND = 'background' + +const interactiveLane = [] +const backgroundLane = [] +let workerPromise = null +let queueDepthExceededLogged = false +const activeTransactionStore = new AsyncLocalStorage() + +const queueDepth = { + interactive: 0, + background: 0 +} + +function getProviderName () { + return process.env.DB_PROVIDER || config.get('database.provider', 'sqlite') || 'sqlite' +} + +function isSqliteProvider () { + return getProviderName() === 'sqlite' +} + +function getWriteQueueMaxDepth () { + return config.get('settings.dbWriteQueueMaxDepth', 256) +} + +function updateQueueDepth () { + queueDepth.interactive = interactiveLane.length + queueDepth.background = backgroundLane.length +} + +function getWriteQueueDepth () { + return { ...queueDepth } +} + +function checkQueueBackpressure () { + const depth = getWriteQueueDepth() + const totalDepth = depth.interactive + depth.background + const maxDepth = getWriteQueueMaxDepth() + if (totalDepth <= maxDepth) { + queueDepthExceededLogged = false + return + } + if (!queueDepthExceededLogged) { + queueDepthExceededLogged = true + logger.error( + `SQLite write queue depth ${totalDepth} exceeds configured maximum ${maxDepth} ` + + `(interactive=${depth.interactive}, background=${depth.background}). ` + + 'Investigate background job pressure or migrate to mysql/postgres. ' + + 'Interactive requests are not rejected; see docs/operations/database-transactions.md.' + ) + } +} + +function dequeueNext () { + if (interactiveLane.length > 0) { + return interactiveLane.shift() + } + if (backgroundLane.length > 0) { + return backgroundLane.shift() + } + return null +} + +function getActiveTransactionContext () { + return activeTransactionStore.getStore() || null +} + +function getActiveTransaction () { + const ctx = getActiveTransactionContext() + return ctx ? ctx.transaction : null +} + +async function executeTransaction (fn, options) { + const sequelize = databaseProvider.sequelize + const provider = getProviderName() + const startedAt = Date.now() + const priority = options.priority || PRIORITY_INTERACTIVE + + try { + const result = await withDbBusyRetry( + () => sequelize.transaction((transaction) => { + return activeTransactionStore.run({ transaction, priority }, async () => fn(transaction)) + }), + options + ) + recordTransactionDuration( + { + label: options.label || 'unknown', + priority: options.priority || PRIORITY_INTERACTIVE, + provider + }, + Date.now() - startedAt + ) + return result + } catch (error) { + maybeRecordConnectionInvalidated(error, provider) + throw error + } +} + +async function runWorker () { + while (true) { + updateQueueDepth() + checkQueueBackpressure() + const item = dequeueNext() + if (!item) { + break + } + + recordWriteQueueWaitMs(item.priority, Date.now() - item.enqueuedAt) + + try { + const result = await executeTransaction(item.fn, item.retryOptions) + item.resolve(result) + } catch (error) { + item.reject(error) + } + } + workerPromise = null +} + +function ensureWorker () { + if (!workerPromise) { + workerPromise = runWorker() + } + return workerPromise +} + +function enqueueSqlite (fn, options) { + return new Promise((resolve, reject) => { + const item = { + fn, + resolve, + reject, + priority: options.priority, + enqueuedAt: Date.now(), + retryOptions: { + label: options.label, + priority: options.priority + } + } + + if (options.priority === PRIORITY_BACKGROUND) { + backgroundLane.push(item) + } else { + interactiveLane.push(item) + } + + updateQueueDepth() + checkQueueBackpressure() + ensureWorker() + }) +} + +async function runInTransactionPool (fn, options) { + return executeTransaction(fn, { + label: options.label, + priority: options.priority || PRIORITY_INTERACTIVE + }) +} + +/** + * Run a callback inside a real Sequelize transaction. + * SQLite: serialized through a global priority write queue (interactive before background). + * mysql/postgres: direct pool transaction, no global queue. + * + * On SQLite, interactive priority reuses an active ALS parent transaction when nested. + * Background priority always enqueues a fresh transaction so deferred work (e.g. audit + * events scheduled via setImmediate after a handler commit) cannot reuse a stale tx. + * + * @param {Function} fn - async (transaction) => result + * @param {{ priority?: string, label?: string }} [options] + */ +async function runInTransaction (fn, options = {}) { + const priority = options.priority || PRIORITY_INTERACTIVE + const label = options.label || 'unknown' + + if (isSqliteProvider()) { + const parentCtx = getActiveTransactionContext() + if (parentCtx && parentCtx.transaction && priority !== PRIORITY_BACKGROUND) { + return fn(parentCtx.transaction) + } + return enqueueSqlite(fn, { priority, label }) + } + + return runInTransactionPool(fn, { priority, label }) +} + +/** + * Run a callback with an existing Sequelize transaction registered in AsyncLocalStorage. + * Use whenever executing code that already holds a transaction outside executeTransaction's + * own ALS frame (e.g. generateTransaction explicit-tx and ALS-inject paths). + * + * @param {object} transaction - Sequelize transaction + * @param {string|undefined} priority - Lane priority; inherits from parent ALS when omitted + * @param {Function} fn - async (transaction) => result + */ +async function runWithTransactionContext (transaction, priority, fn) { + const parentCtx = getActiveTransactionContext() + const effectivePriority = priority ?? parentCtx?.priority ?? PRIORITY_INTERACTIVE + if (parentCtx?.transaction === transaction) { + return fn(transaction) + } + return activeTransactionStore.run({ transaction, priority: effectivePriority }, () => fn(transaction)) +} + +/** + * Defer work until after the current API tick so a committed ALS parent tx cannot + * be reused. Always runs fn inside a fresh PRIORITY_BACKGROUND transaction (R138). + * + * @param {string} label - transaction-runner label for metrics/logging + * @param {Function} fn - async (transaction) => result + */ +function schedulePostCommitBackground (label, fn) { + setImmediate(async () => { + await runInTransaction(fn, { priority: PRIORITY_BACKGROUND, label }) + }) +} + +function _resetQueueForTests () { + interactiveLane.length = 0 + backgroundLane.length = 0 + workerPromise = null + queueDepth.interactive = 0 + queueDepth.background = 0 + queueDepthExceededLogged = false +} + +module.exports = { + PRIORITY_BACKGROUND, + PRIORITY_INTERACTIVE, + _resetQueueForTests, + getActiveTransaction, + getActiveTransactionContext, + getProviderName, + getWriteQueueDepth, + getWriteQueueMaxDepth, + isSqliteProvider, + runInTransaction, + runWithTransactionContext, + schedulePostCommitBackground +} diff --git a/src/helpers/vault-transaction-helper.js b/src/helpers/vault-transaction-helper.js new file mode 100644 index 00000000..76b453e2 --- /dev/null +++ b/src/helpers/vault-transaction-helper.js @@ -0,0 +1,85 @@ +'use strict' + +const { isTest } = require('./app-helper') +const logger = require('../logger') +const SecretHelper = require('./secret-helper') +const vaultManager = require('../vault/vault-manager') +const { runInTransaction, PRIORITY_INTERACTIVE } = require('./transaction-runner') + +/** + * Run vault HTTP work after the Sequelize transaction commits (or immediately in tests). + * Failures are logged; they do not roll back the committed DB state. + */ +function scheduleVaultAfterCommit (transaction, fn, label = 'vault') { + const run = () => Promise.resolve(fn()).catch((err) => { + logger.warn(`Deferred vault work (${label}) failed: ${err.message}`) + }) + + if (transaction && typeof transaction.afterCommit === 'function') { + transaction.afterCommit(run) + return + } + + if (isTest()) { + return run() + } +} + +function shouldDeferVaultStore (secretType, useVault) { + if (secretType === 'configmap' && useVault === false) { + return false + } + return vaultManager.isEnabled() +} + +function scheduleVaultDeleteAfterCommit (transaction, secretName, secretType, label) { + if (!vaultManager.isEnabled()) { + return + } + scheduleVaultAfterCommit( + transaction, + () => SecretHelper.deleteSecret(secretName, secretType), + label || `vault.delete.${secretName}` + ) +} + +/** + * After commit: store plaintext in vault and patch the DB row with the vault reference. + */ +function scheduleVaultPromoteAfterCommit (transaction, { + secretData, + secretName, + secretType, + useVault = null, + model, + where, + field +}, label) { + if (!shouldDeferVaultStore(secretType, useVault)) { + return + } + + const promoteLabel = label || `vault.promote.${secretName}` + scheduleVaultAfterCommit(transaction, async () => { + const Model = typeof model === 'function' ? model() : model + if (!Model) { + throw new Error('Model not available for vault promote') + } + const vaultRef = await SecretHelper.storeInVaultAndGetReference( + secretData, + secretName, + secretType, + useVault + ) + await runInTransaction(async (tx) => { + await Model.update({ [field]: vaultRef }, { where, transaction: tx }) + }, { priority: PRIORITY_INTERACTIVE, label: promoteLabel }) + }, promoteLabel) +} + +module.exports = { + scheduleVaultAfterCommit, + scheduleVaultDeleteAfterCommit, + scheduleVaultPromoteAfterCommit, + shouldDeferVaultStore +} diff --git a/src/jobs/controller-cleanup-job.js b/src/jobs/controller-cleanup-job.js index 732693b3..0d365a76 100644 --- a/src/jobs/controller-cleanup-job.js +++ b/src/jobs/controller-cleanup-job.js @@ -3,6 +3,7 @@ const Config = require('../config') const logger = require('../logger') const Sequelize = require('sequelize') const Op = Sequelize.Op +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') async function run () { try { @@ -10,7 +11,6 @@ async function run () { } catch (error) { logger.error('Error during controller cleanup:', error) } finally { - // Schedule next run with current interval (may have changed via env var) const currentInterval = process.env.CONTROLLER_CLEANUP_INTERVAL || Config.get('settings.controllerCleanupInterval', 600) setTimeout(run, currentInterval * 1000) } @@ -23,22 +23,24 @@ async function cleanupInactiveControllers () { logger.debug(`Starting cleanup of controllers inactive for more than ${thresholdSeconds} seconds`) - const fakeTransaction = { fakeTransaction: true } - const inactive = await ClusterControllerManager.findAll({ - isActive: true, - lastHeartbeat: { [Op.lt]: threshold } - }, fakeTransaction) - - let cleanedCount = 0 - for (const controller of inactive) { - await ClusterControllerManager.update( - { uuid: controller.uuid }, - { isActive: false }, - fakeTransaction - ) - logger.info(`Marked controller ${controller.uuid} on host ${controller.host} as inactive (last heartbeat: ${controller.lastHeartbeat})`) - cleanedCount++ - } + const cleanedCount = await runInTransaction(async (transaction) => { + const inactive = await ClusterControllerManager.findAll({ + isActive: true, + lastHeartbeat: { [Op.lt]: threshold } + }, transaction) + + let count = 0 + for (const controller of inactive) { + await ClusterControllerManager.update( + { uuid: controller.uuid }, + { isActive: false }, + transaction + ) + logger.info(`Marked controller ${controller.uuid} on host ${controller.host} as inactive (last heartbeat: ${controller.lastHeartbeat})`) + count++ + } + return count + }, { priority: PRIORITY_BACKGROUND, label: 'controller-cleanup' }) if (cleanedCount > 0) { logger.info(`Cleaned up ${cleanedCount} inactive controller(s)`) diff --git a/src/jobs/controller-heartbeat-job.js b/src/jobs/controller-heartbeat-job.js index 85517ad8..4d5f605c 100644 --- a/src/jobs/controller-heartbeat-job.js +++ b/src/jobs/controller-heartbeat-job.js @@ -1,6 +1,8 @@ const ClusterControllerService = require('../services/cluster-controller-service') const Config = require('../config') const logger = require('../logger') +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') +const { checkSqliteFogCountWarning } = require('../helpers/sqlite-fog-warning') const scheduleTime = (Config.get('settings.controllerHeartbeatInterval', 30)) * 1000 @@ -22,8 +24,11 @@ async function updateControllerHeartbeat () { return } - const fakeTransaction = { fakeTransaction: true } - await ClusterControllerService.updateHeartbeat(uuid, fakeTransaction) + await runInTransaction( + (transaction) => ClusterControllerService.updateHeartbeat(uuid, transaction), + { priority: PRIORITY_BACKGROUND, label: 'controller-heartbeat' } + ) + await checkSqliteFogCountWarning() logger.debug(`Updated heartbeat for controller: ${uuid}`) } catch (error) { logger.error(`Failed to update controller heartbeat: ${error.message}`) diff --git a/src/jobs/event-cleanup-job.js b/src/jobs/event-cleanup-job.js index 64dc8d61..81878580 100644 --- a/src/jobs/event-cleanup-job.js +++ b/src/jobs/event-cleanup-job.js @@ -2,6 +2,7 @@ const EventManager = require('../data/managers/event-manager') const EventService = require('../services/event-service') const Config = require('../config') const logger = require('../logger') +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') async function run () { try { @@ -9,7 +10,6 @@ async function run () { } catch (error) { logger.error('Error during event cleanup:', error) } finally { - // Schedule next run with current interval (may have changed via env var) const currentInterval = process.env.EVENT_CLEANUP_INTERVAL || Config.get('settings.eventCleanupInterval', 86400) setTimeout(run, currentInterval * 1000) } @@ -17,15 +17,15 @@ async function run () { async function cleanupOldEvents () { try { - // Read retention days from config const retentionDays = process.env.EVENT_RETENTION_DAYS || Config.get('settings.eventRetentionDays', 7) logger.debug(`Starting cleanup of events older than ${retentionDays} days`) - const count = await EventManager.deleteEventsOlderThanDays(retentionDays, { fakeTransaction: true }) + const count = await runInTransaction( + (transaction) => EventManager.deleteEventsOlderThanDays(retentionDays, transaction), + { priority: PRIORITY_BACKGROUND, label: 'event-cleanup' } + ) logger.info(`Cleaned up ${count} events older than ${retentionDays} days`) - // Create audit trail for automated cleanup (non-blocking) - // This allows admins to distinguish between manual deletions and automated cleanup if (count > 0) { setImmediate(async () => { try { @@ -43,7 +43,7 @@ async function cleanupOldEvents () { statusCode: 200, statusMessage: `Automated cleanup: Deleted ${count} events older than ${retentionDays} days`, requestId: null - }, { fakeTransaction: true }).catch(err => { + }).catch(err => { logger.error('Failed to create cleanup job audit record (non-blocking):', err) }) } catch (error) { diff --git a/src/jobs/fog-platform-sweep-job.js b/src/jobs/fog-platform-sweep-job.js index b0a37ad8..975f7f6e 100644 --- a/src/jobs/fog-platform-sweep-job.js +++ b/src/jobs/fog-platform-sweep-job.js @@ -9,10 +9,10 @@ const ServiceManager = require('../data/managers/service-manager') const RouterManager = require('../data/managers/router-manager') const NatsInstanceManager = require('../data/managers/nats-instance-manager') const TransactionDecorator = require('../decorators/transaction-decorator') +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') const IofogService = require('../services/iofog-service') const ServicesService = require('../services/services-service') const K8sClient = require('../utils/k8s-client') -const databaseProvider = require('../data/providers/database-factory') const Config = require('../config') const logger = require('../logger') @@ -31,71 +31,182 @@ async function run () { } } -async function runSweepInternal (transaction) { - const uuid = ClusterControllerService.getCurrentControllerUuid() - if (!uuid) { - return { fogEnqueued: 0, serviceEnqueued: 0 } +async function fetchK8sRouterConfig () { + const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) + if (!configMap || !configMap.data || !configMap.data['skrouterd.json']) { + return null } + return JSON.parse(configMap.data['skrouterd.json']) +} - const execute = async (t) => { - let fogEnqueued = 0 - let serviceEnqueued = 0 +function hasK8sServiceHubDrift (service, k8sRouterConfig) { + const listenerName = `${service.name}-listener` + if (!k8sRouterConfig) { + return true + } + return !k8sRouterConfig.some((entry) => + entry[0] === 'tcpListener' && entry[1] && entry[1].name === listenerName + ) +} - const specs = await FogPlatformSpecManager.findAll({}, t) - for (const specRow of specs) { - const shouldEnqueue = await shouldEnqueueFogSweepInternal(specRow.fogUuid, t) - if (!shouldEnqueue) { - continue - } +function hasNonK8sServiceHubDrift (service, defaultRouterConfig) { + if (!defaultRouterConfig) { + return false + } + const listenerName = `${service.name}-listener` + const listeners = defaultRouterConfig.bridges?.tcpListeners || {} + return !listeners[listenerName] +} - const parsedSpec = await FogPlatformSpecManager.getParsedSpec(specRow.fogUuid, t) - await FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask({ - fogUuid: specRow.fogUuid, - reason: 'periodic-sweep', - specGeneration: parsedSpec ? parsedSpec.generation : specRow.generation - }, t) - fogEnqueued += 1 +async function evaluateSweepCandidates (transaction) { + const fogCandidates = [] + const failedServiceCandidates = [] + const readyServicesForDrift = [] + let needsK8sDriftCheck = false + let defaultRouterConfig = null + + const specs = await FogPlatformSpecManager.findAll({}, transaction) + for (const specRow of specs) { + if (!(await shouldEnqueueFogSweepInternal(specRow.fogUuid, transaction))) { + continue } + const parsedSpec = await FogPlatformSpecManager.getParsedSpec(specRow.fogUuid, transaction) + fogCandidates.push({ + fogUuid: specRow.fogUuid, + specGeneration: parsedSpec ? parsedSpec.generation : specRow.generation + }) + } - const services = await ServiceManager.findAllWithTags({}, t) - for (const service of services) { - const shouldEnqueue = await shouldEnqueueServiceSweepInternal(service, t) - if (!shouldEnqueue) { - continue + const services = await ServiceManager.findAllWithTags({}, transaction) + for (const service of services) { + if (service.provisioningStatus === 'failed') { + if (await shouldEnqueueFailedServiceSweep(service, transaction)) { + failedServiceCandidates.push(service) } + continue + } - const specSnapshot = { - name: service.name, - type: service.type, - resource: service.resource, - defaultBridge: service.defaultBridge, - bridgePort: service.bridgePort, - targetPort: service.targetPort, - servicePort: service.servicePort, - k8sType: service.k8sType, - serviceEndpoint: service.serviceEndpoint, - tags: (service.tags || []).map((tag) => (typeof tag === 'string' ? tag : tag.value)) + if (service.provisioningStatus === 'ready') { + readyServicesForDrift.push(service) + } + } + + if (readyServicesForDrift.length > 0) { + const isK8s = await ServicesService.checkKubernetesEnvironment() + if (isK8s) { + needsK8sDriftCheck = true + } else { + try { + const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) + if (defaultRouter) { + defaultRouterConfig = await IofogService._getRouterMicroserviceConfig(defaultRouter.iofogUuid, transaction) + } + } catch (error) { + defaultRouterConfig = null } - await ServicePlatformReconcileTaskManager.enqueueServicePlatformReconcileTask({ - serviceName: service.name, - reason: 'periodic-sweep', - specSnapshot - }, t) - serviceEnqueued += 1 } + } + + return { + fogCandidates, + failedServiceCandidates, + readyServicesForDrift, + needsK8sDriftCheck, + defaultRouterConfig + } +} + +async function persistSweepEnqueue (evaluation, k8sRouterConfig, isK8s, transaction) { + let fogEnqueued = 0 + let serviceEnqueued = 0 + + for (const candidate of evaluation.fogCandidates) { + await FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask({ + fogUuid: candidate.fogUuid, + reason: 'periodic-sweep', + specGeneration: candidate.specGeneration + }, transaction) + fogEnqueued += 1 + } + + for (const service of evaluation.failedServiceCandidates) { + await enqueueServiceSweepTask(service, transaction) + serviceEnqueued += 1 + } - if (fogEnqueued > 0 || serviceEnqueued > 0) { - logger.info('Fog platform sweep enqueued reconcile tasks', { fogEnqueued, serviceEnqueued }) + for (const service of evaluation.readyServicesForDrift) { + const hasDrift = isK8s + ? hasK8sServiceHubDrift(service, k8sRouterConfig) + : hasNonK8sServiceHubDrift(service, evaluation.defaultRouterConfig) + + if (!hasDrift || await hasActiveServiceTask(service.name, transaction)) { + continue } - return { fogEnqueued, serviceEnqueued } + await enqueueServiceSweepTask(service, transaction) + serviceEnqueued += 1 + } + + if (fogEnqueued > 0 || serviceEnqueued > 0) { + logger.info('Fog platform sweep enqueued reconcile tasks', { fogEnqueued, serviceEnqueued }) + } + + return { fogEnqueued, serviceEnqueued } +} + +function buildServiceSpecSnapshot (service) { + return { + name: service.name, + type: service.type, + resource: service.resource, + defaultBridge: service.defaultBridge, + bridgePort: service.bridgePort, + targetPort: service.targetPort, + servicePort: service.servicePort, + k8sType: service.k8sType, + serviceEndpoint: service.serviceEndpoint, + tags: (service.tags || []).map((tag) => (typeof tag === 'string' ? tag : tag.value)) + } +} + +async function enqueueServiceSweepTask (service, transaction) { + await ServicePlatformReconcileTaskManager.enqueueServicePlatformReconcileTask({ + serviceName: service.name, + reason: 'periodic-sweep', + specSnapshot: buildServiceSpecSnapshot(service) + }, transaction) +} + +async function runSweepInternal (transaction) { + const uuid = ClusterControllerService.getCurrentControllerUuid() + if (!uuid) { + return { fogEnqueued: 0, serviceEnqueued: 0 } + } + + const evaluate = (t) => evaluateSweepCandidates(t) + const evaluation = transaction + ? await evaluate(transaction) + : await runInTransaction(evaluate, { priority: PRIORITY_BACKGROUND, label: 'fogPlatformSweep.evaluate' }) + + let k8sRouterConfig = null + const isK8s = evaluation.needsK8sDriftCheck + ? await ServicesService.checkKubernetesEnvironment() + : false + + if (evaluation.needsK8sDriftCheck && isK8s) { + try { + k8sRouterConfig = await fetchK8sRouterConfig() + } catch (error) { + logger.warn('Fog platform sweep K8s config fetch failed', { err: error }) + } } + const persist = (t) => persistSweepEnqueue(evaluation, k8sRouterConfig, isK8s, t) if (transaction) { - return execute(transaction) + return persist(transaction) } - return databaseProvider.sequelize.transaction((t) => execute(t)) + return runInTransaction(persist, { priority: PRIORITY_BACKGROUND, label: 'fogPlatformSweep.persist' }) } async function hasActiveFogTask (fogUuid, transaction) { @@ -215,23 +326,34 @@ async function hasMissingServiceBridges (fogUuid, parsedSpec, transaction) { return false } -async function hasServiceHubDrift (service, transaction) { +async function shouldEnqueueFailedServiceSweep (service, transaction) { + if (await hasActiveServiceTask(service.name, transaction)) { + const task = await ServicePlatformReconcileTaskManager.getEntity().findOne({ + where: { + serviceName: service.name, + status: { [Op.in]: ACTIVE_STATUSES } + }, + transaction + }) + return task ? isBackoffElapsed(task.nextAttemptAt) : false + } + return true +} + +async function hasServiceHubDrift (service, transaction, options = {}) { if (service.provisioningStatus !== 'ready') { return false } - const listenerName = `${service.name}-listener` - const isK8s = await ServicesService.checkKubernetesEnvironment() + const isK8s = options.isK8s != null + ? options.isK8s + : await ServicesService.checkKubernetesEnvironment() if (isK8s) { - const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) - if (!configMap || !configMap.data || !configMap.data['skrouterd.json']) { - return true - } - const routerConfig = JSON.parse(configMap.data['skrouterd.json']) - return !routerConfig.some((entry) => - entry[0] === 'tcpListener' && entry[1] && entry[1].name === listenerName - ) + const k8sRouterConfig = options.k8sRouterConfig !== undefined + ? options.k8sRouterConfig + : await fetchK8sRouterConfig() + return hasK8sServiceHubDrift(service, k8sRouterConfig) } try { @@ -239,9 +361,10 @@ async function hasServiceHubDrift (service, transaction) { if (!defaultRouter) { return false } - const routerConfig = await IofogService._getRouterMicroserviceConfig(defaultRouter.iofogUuid, transaction) - const listeners = routerConfig?.bridges?.tcpListeners || {} - return !listeners[listenerName] + const routerConfig = options.defaultRouterConfig !== undefined + ? options.defaultRouterConfig + : await IofogService._getRouterMicroserviceConfig(defaultRouter.iofogUuid, transaction) + return hasNonK8sServiceHubDrift(service, routerConfig) } catch (error) { return false } @@ -281,22 +404,12 @@ async function shouldEnqueueFogSweepInternal (fogUuid, transaction) { return false } -async function shouldEnqueueServiceSweepInternal (service, transaction) { +async function shouldEnqueueServiceSweepInternal (service, transaction, options = {}) { if (service.provisioningStatus === 'failed') { - if (await hasActiveServiceTask(service.name, transaction)) { - const task = await ServicePlatformReconcileTaskManager.getEntity().findOne({ - where: { - serviceName: service.name, - status: { [Op.in]: ACTIVE_STATUSES } - }, - transaction - }) - return task ? isBackoffElapsed(task.nextAttemptAt) : false - } - return true + return shouldEnqueueFailedServiceSweep(service, transaction) } - if (await hasServiceHubDrift(service, transaction)) { + if (await hasServiceHubDrift(service, transaction, options)) { return !(await hasActiveServiceTask(service.name, transaction)) } @@ -305,7 +418,10 @@ async function shouldEnqueueServiceSweepInternal (service, transaction) { module.exports = { run, - runSweep: TransactionDecorator.generateTransaction(runSweepInternal), + runSweep: TransactionDecorator.generateTransaction(runSweepInternal, { priority: PRIORITY_BACKGROUND, label: 'fogPlatformSweep' }), shouldEnqueueFogSweep: TransactionDecorator.generateTransaction(shouldEnqueueFogSweepInternal), - shouldEnqueueServiceSweep: TransactionDecorator.generateTransaction(shouldEnqueueServiceSweepInternal) + shouldEnqueueServiceSweep: TransactionDecorator.generateTransaction(shouldEnqueueServiceSweepInternal), + fetchK8sRouterConfig, + hasK8sServiceHubDrift, + hasNonK8sServiceHubDrift } diff --git a/src/jobs/fog-status-job.js b/src/jobs/fog-status-job.js index 11fd62ac..118c0143 100644 --- a/src/jobs/fog-status-job.js +++ b/src/jobs/fog-status-job.js @@ -1,4 +1,5 @@ const TransactionDecorator = require('../decorators/transaction-decorator') +const { PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') const FogManager = require('../data/managers/iofog-manager') const MicroserviceManager = require('../data/managers/microservice-manager') @@ -14,7 +15,10 @@ const scheduleTime = Config.get('settings.fogStatusUpdateInterval') * 1000 async function run () { try { - const _updateFogsConnectionStatus = TransactionDecorator.generateTransaction(updateFogsConnectionStatus) + const _updateFogsConnectionStatus = TransactionDecorator.generateTransaction( + updateFogsConnectionStatus, + { priority: PRIORITY_BACKGROUND, label: 'fogStatus.updateConnection' } + ) await _updateFogsConnectionStatus() } catch (error) { logger.error('Error during fog status update:', error) diff --git a/src/jobs/fog-token-cleanup-job.js b/src/jobs/fog-token-cleanup-job.js index cb433b4a..4fcb448c 100644 --- a/src/jobs/fog-token-cleanup-job.js +++ b/src/jobs/fog-token-cleanup-job.js @@ -1,6 +1,7 @@ const FogUsedTokenManager = require('../data/managers/fog-used-token-manager') const Config = require('../config') const logger = require('../logger') +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') const scheduleTime = Config.get('settings.fogExpiredTokenCleanupInterval') * 1000 @@ -8,7 +9,7 @@ async function run () { try { await cleanupExpiredTokens() } catch (error) { - logger.error('Error during JTI cleanup:', error) + logger.error({ err: error }, 'Error during JTI cleanup') } finally { setTimeout(run, scheduleTime) } @@ -17,10 +18,13 @@ async function run () { async function cleanupExpiredTokens () { try { logger.debug('Starting cleanup of expired JTIs') - const count = await FogUsedTokenManager.cleanupExpiredJtis() + const count = await runInTransaction( + (transaction) => FogUsedTokenManager.cleanupExpiredJtis(transaction), + { priority: PRIORITY_BACKGROUND, label: 'fogToken.cleanupExpiredJtis' } + ) logger.debug(`Cleaned up ${count} expired JTIs`) } catch (error) { - logger.error('Error during JTI cleanup:', error) + logger.error({ err: error }, 'Error during JTI cleanup') } } diff --git a/src/jobs/nats-reconcile-worker-job.js b/src/jobs/nats-reconcile-worker-job.js index f785a45f..b7fabf58 100644 --- a/src/jobs/nats-reconcile-worker-job.js +++ b/src/jobs/nats-reconcile-worker-job.js @@ -1,9 +1,9 @@ const ClusterControllerService = require('../services/cluster-controller-service') const NatsService = require('../services/nats-service') const NatsReconcileTaskManager = require('../data/managers/nats-reconcile-task-manager') -const databaseProvider = require('../data/providers/database-factory') const Config = require('../config') const logger = require('../logger') +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') const scheduleTime = (Config.get('settings.natsReconcileWorkerIntervalSeconds', 3)) * 1000 @@ -50,12 +50,12 @@ async function processNextTask () { logger.info(`NATS reconcile task ${task.id} started`) await NatsService.reconcileResolverArtifacts(options) logger.info(`NATS reconcile task ${task.id} completed`) - await databaseProvider.sequelize.transaction(async (transaction) => { + await runInTransaction(async (transaction) => { await NatsReconcileTaskManager.getEntity().destroy({ where: { id: task.id }, transaction }) - }) + }, { priority: PRIORITY_BACKGROUND, label: 'natsReconcile.taskComplete' }) } catch (error) { logger.error({ err: error, diff --git a/src/jobs/platform-reconcile-worker-job.js b/src/jobs/platform-reconcile-worker-job.js index d211e7e8..a4e93366 100644 --- a/src/jobs/platform-reconcile-worker-job.js +++ b/src/jobs/platform-reconcile-worker-job.js @@ -2,11 +2,12 @@ const ClusterControllerService = require('../services/cluster-controller-service const FogPlatformService = require('../services/fog-platform-service') const ServicePlatformService = require('../services/service-platform-service') const FogPlatformReconcileTaskManager = require('../data/managers/fog-platform-reconcile-task-manager') +const FogPlatformStatusManager = require('../data/managers/fog-platform-status-manager') const ServicePlatformReconcileTaskManager = require('../data/managers/service-platform-reconcile-task-manager') const ServiceManager = require('../data/managers/service-manager') -const databaseProvider = require('../data/providers/database-factory') const Config = require('../config') const logger = require('../logger') +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') const scheduleTime = (Config.get('settings.fogPlatformReconcileWorkerIntervalSeconds', 3)) * 1000 @@ -21,6 +22,34 @@ async function run () { } } +async function isFogDeleteReconcileTask (task, transaction) { + if (task.reason === 'delete') { + return true + } + const status = await FogPlatformStatusManager.getParsedStatus(task.fogUuid, transaction) + return !!(status && status.phase === 'Deleting') +} + +async function runFogReconcileForTask (task) { + if (task.reason === 'delete') { + return FogPlatformService.reconcileFogDelete(task.fogUuid) + } + + const status = await runInTransaction( + (transaction) => FogPlatformStatusManager.getParsedStatus(task.fogUuid, transaction), + { priority: PRIORITY_BACKGROUND, label: 'platformReconcile.fogDeleteCheck' } + ) + if (status && status.phase === 'Deleting') { + return FogPlatformService.reconcileFogDelete(task.fogUuid) + } + + const result = await FogPlatformService.reconcileFog(task.fogUuid) + if (result && result.skipped && result.reason === 'deleting') { + return FogPlatformService.reconcileFogDelete(task.fogUuid) + } + return result +} + async function processNextFogTask () { const uuid = ClusterControllerService.getCurrentControllerUuid() if (!uuid) { @@ -46,9 +75,7 @@ async function processNextFogTask () { reason: task.reason }) - const result = task.reason === 'delete' - ? await FogPlatformService.reconcileFogDelete(task.fogUuid) - : await FogPlatformService.reconcileFog(task.fogUuid) + const result = await runFogReconcileForTask(task) logger.info(`Fog platform reconcile task ${task.id} completed`, { fogUuid: task.fogUuid, @@ -56,12 +83,12 @@ async function processNextFogTask () { result }) - await databaseProvider.sequelize.transaction(async (transaction) => { + await runInTransaction(async (transaction) => { await FogPlatformReconcileTaskManager.getEntity().destroy({ where: { id: task.id }, transaction }) - }) + }, { priority: PRIORITY_BACKGROUND, label: 'platformReconcile.fogTaskComplete' }) } catch (error) { logger.error({ err: error, @@ -116,12 +143,12 @@ async function processNextServiceTask () { }) if (task.reason !== 'delete') { - await databaseProvider.sequelize.transaction(async (transaction) => { + await runInTransaction(async (transaction) => { await ServicePlatformReconcileTaskManager.getEntity().destroy({ where: { id: task.id }, transaction }) - }) + }, { priority: PRIORITY_BACKGROUND, label: 'platformReconcile.serviceTaskComplete' }) } } catch (error) { logger.error({ @@ -146,15 +173,21 @@ async function processNextServiceTask () { async function handleFogTaskFailure (task, error) { const errorMessage = error.message || String(error) - await databaseProvider.sequelize.transaction(async (transaction) => { + await runInTransaction(async (transaction) => { await FogPlatformReconcileTaskManager.recordFogTaskFailure( task.id, errorMessage, { attempts: task.attempts }, transaction ) - await FogPlatformService.markReconcileFailed(task.fogUuid, error, transaction) - }) + if (await isFogDeleteReconcileTask(task, transaction)) { + await FogPlatformStatusManager.setPhase(task.fogUuid, 'Deleting', { + lastError: errorMessage + }, transaction) + } else { + await FogPlatformService.markReconcileFailed(task.fogUuid, error, transaction) + } + }, { priority: PRIORITY_BACKGROUND, label: 'platformReconcile.fogTaskFailure' }) } async function handleServiceTaskFailure (task, error) { @@ -163,7 +196,7 @@ async function handleServiceTaskFailure (task, error) { const nextAttempts = (task.attempts != null ? task.attempts : 0) + 1 const isPermanent = nextAttempts >= maxAttempts - await databaseProvider.sequelize.transaction(async (transaction) => { + await runInTransaction(async (transaction) => { await ServicePlatformReconcileTaskManager.recordServiceTaskFailure( task.id, errorMessage, @@ -181,11 +214,13 @@ async function handleServiceTaskFailure (task, error) { transaction ) } - }) + }, { priority: PRIORITY_BACKGROUND, label: 'platformReconcile.serviceTaskFailure' }) } module.exports = { run, processNextFogTask, - processNextServiceTask + processNextServiceTask, + runFogReconcileForTask, + isFogDeleteReconcileTask } diff --git a/src/jobs/reconcile-outbox-drainer-job.js b/src/jobs/reconcile-outbox-drainer-job.js new file mode 100644 index 00000000..6d6aee59 --- /dev/null +++ b/src/jobs/reconcile-outbox-drainer-job.js @@ -0,0 +1,78 @@ +const config = require('../config') +const logger = require('../logger') +const ReconcileOutboxManager = require('../data/managers/reconcile-outbox-manager') +const FogPlatformReconcileTaskManager = require('../data/managers/fog-platform-reconcile-task-manager') +const ServicePlatformReconcileTaskManager = require('../data/managers/service-platform-reconcile-task-manager') +const NatsService = require('../services/nats-service') +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') + +const DEFAULT_BATCH_SIZE = 32 + +async function run () { + try { + await drainOnce() + } catch (error) { + logger.error('Reconcile outbox drainer error:', error) + } finally { + const intervalSeconds = config.get('settings.reconcileOutboxDrainerIntervalSeconds', 1) + setTimeout(run, intervalSeconds * 1000) + } +} + +async function drainRow (row, transaction) { + const payload = ReconcileOutboxManager.parsePayload(row) + if (!payload) { + throw new Error(`Outbox row ${row.id} has empty payload`) + } + + switch (row.kind) { + case 'nats': + await NatsService.enqueueReconcileTask(payload, transaction) + break + case 'fog_platform': + await FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask(payload, transaction) + break + case 'service_platform': + await ServicePlatformReconcileTaskManager.enqueueServicePlatformReconcileTask(payload, transaction) + break + default: + throw new Error(`Unknown reconcile outbox kind: ${row.kind}`) + } +} + +async function drainOnce () { + const batchSize = config.get('settings.reconcileOutboxDrainerBatchSize', DEFAULT_BATCH_SIZE) + + return runInTransaction(async (transaction) => { + const rows = await ReconcileOutboxManager.claimUnprocessed(batchSize, transaction) + if (!rows.length) { + return { processed: 0, failed: 0 } + } + + let processed = 0 + let failed = 0 + + for (const row of rows) { + try { + await drainRow(row, transaction) + await ReconcileOutboxManager.markProcessed(row.id, transaction) + processed += 1 + } catch (error) { + failed += 1 + logger.error(`Reconcile outbox drain failed for row ${row.id}: ${error.message}`) + await ReconcileOutboxManager.markFailed(row.id, error.message, transaction) + } + } + + if (processed > 0 || failed > 0) { + logger.debug('Reconcile outbox drainer batch complete', { processed, failed }) + } + + return { processed, failed } + }, { priority: PRIORITY_BACKGROUND, label: 'reconcileOutboxDrainer' }) +} + +module.exports = { + run, + drainOnce +} diff --git a/src/jobs/stopped-app-status-job.js b/src/jobs/stopped-app-status-job.js index 36057e86..00d752c7 100644 --- a/src/jobs/stopped-app-status-job.js +++ b/src/jobs/stopped-app-status-job.js @@ -1,4 +1,5 @@ const TransactionDecorator = require('../decorators/transaction-decorator') +const { PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') const MicroserviceManager = require('../data/managers/microservice-manager') const MicroserviceStatusManager = require('../data/managers/microservice-status-manager') @@ -13,8 +14,14 @@ const scheduleTime = Config.get('settings.fogStatusUpdateInterval') * 1000 async function run () { try { - const _updateStoppedApplicationMicroserviceStatus = TransactionDecorator.generateTransaction(updateStoppedApplicationMicroserviceStatus) - const _updateStoppedMicroserviceStatus = TransactionDecorator.generateTransaction(updateStoppedMicroserviceStatus) + const _updateStoppedApplicationMicroserviceStatus = TransactionDecorator.generateTransaction( + updateStoppedApplicationMicroserviceStatus, + { priority: PRIORITY_BACKGROUND, label: 'stoppedAppStatus.application' } + ) + const _updateStoppedMicroserviceStatus = TransactionDecorator.generateTransaction( + updateStoppedMicroserviceStatus, + { priority: PRIORITY_BACKGROUND, label: 'stoppedAppStatus.microservice' } + ) // Handle microservices from deactivated applications await _updateStoppedApplicationMicroserviceStatus() diff --git a/src/jobs/ws-session-reconcile-job.js b/src/jobs/ws-session-reconcile-job.js index b019d6a7..7b9a7b2e 100644 --- a/src/jobs/ws-session-reconcile-job.js +++ b/src/jobs/ws-session-reconcile-job.js @@ -10,6 +10,7 @@ const MicroserviceManager = require('../data/managers/microservice-manager') const ChangeTrackingService = require('../services/change-tracking-service') const FogManager = require('../data/managers/iofog-manager') const TransactionDecorator = require('../decorators/transaction-decorator') +const { PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') function getIntervalMs () { const seconds = process.env.WS_SESSION_RECONCILE_INTERVAL_SECONDS || @@ -31,7 +32,7 @@ async function run () { } } -async function reconcileStaleSessions () { +async function reconcileStaleSessionsInTransaction (transaction) { const wsServer = WebSocketServer.getInstance() const execSessionManager = wsServer.execSessionManager const logSessionManager = wsServer.logSessionManager @@ -45,22 +46,18 @@ async function reconcileStaleSessions () { let execCleaned = 0 let logCleaned = 0 - await TransactionDecorator.generateTransaction(async (transaction) => { - const execRows = await MicroserviceExecSessionManager.findAll({ - status: { [Op.in]: ['PENDING', 'ACTIVE'] } - }, transaction) - - for (const row of execRows) { - const sessionId = row.sessionId - const microserviceUuid = row.microserviceUuid - if (!sessionId || !microserviceUuid) continue + const execRows = await MicroserviceExecSessionManager.findAll({ + status: { [Op.in]: ['PENDING', 'ACTIVE'] } + }, transaction) - if (execSessionManager.getExecSession(sessionId)) continue + for (const row of execRows) { + const sessionId = row.sessionId + const microserviceUuid = row.microserviceUuid + if (!sessionId || !microserviceUuid) continue - const age = now - new Date(row.updatedAt).getTime() - const threshold = row.status === 'PENDING' ? execPendingTimeout : execMaxDuration - if (age < threshold) continue + if (execSessionManager.getExecSession(sessionId)) continue + if (!row.userConnected && !row.agentConnected) { await MicroserviceExecSessionManager.deleteBySessionId(sessionId, transaction) const microservice = await MicroserviceManager.findOne({ uuid: microserviceUuid }, transaction) @@ -73,25 +70,46 @@ async function reconcileStaleSessions () { } execCleaned++ - logger.info('Reconciled stale exec session row:' + JSON.stringify({ + logger.info('Reconciled orphaned exec session row:' + JSON.stringify({ sessionId, microserviceUuid, - status: row.status, - ageMs: age + status: row.status })) + continue } - const msLogRows = await MicroserviceLogStatusManager.findAll({ - status: { [Op.in]: ['PENDING', 'ACTIVE'] } - }, transaction) + const age = now - new Date(row.updatedAt).getTime() + const threshold = row.status === 'PENDING' ? execPendingTimeout : execMaxDuration + if (age < threshold) continue + + await MicroserviceExecSessionManager.deleteBySessionId(sessionId, transaction) - for (const row of msLogRows) { - if (logSessionManager.getLogSession(row.sessionId)) continue + const microservice = await MicroserviceManager.findOne({ uuid: microserviceUuid }, transaction) + if (microservice) { + await ChangeTrackingService.update( + microservice.iofogUuid, + ChangeTrackingService.events.microserviceExecSessions, + transaction + ) + } + + execCleaned++ + logger.info('Reconciled stale exec session row:' + JSON.stringify({ + sessionId, + microserviceUuid, + status: row.status, + ageMs: age + })) + } - const age = now - new Date(row.updatedAt).getTime() - const threshold = row.status === 'PENDING' ? logPendingTimeout : logIdleTimeout - if (age < threshold) continue + const msLogRows = await MicroserviceLogStatusManager.findAll({ + status: { [Op.in]: ['PENDING', 'ACTIVE'] } + }, transaction) + for (const row of msLogRows) { + if (logSessionManager.getLogSession(row.sessionId)) continue + + if (!row.userConnected && !row.agentConnected) { await MicroserviceLogStatusManager.delete({ sessionId: row.sessionId }, transaction) logCleaned++ @@ -104,25 +122,46 @@ async function reconcileStaleSessions () { ) } - logger.info('Reconciled stale microservice log row:' + JSON.stringify({ + logger.info('Reconciled orphaned microservice log row:' + JSON.stringify({ sessionId: row.sessionId, microserviceUuid: row.microserviceUuid, - status: row.status, - ageMs: age + status: row.status })) + continue + } + + const age = now - new Date(row.updatedAt).getTime() + const threshold = row.status === 'PENDING' ? logPendingTimeout : logIdleTimeout + if (age < threshold) continue + + await MicroserviceLogStatusManager.delete({ sessionId: row.sessionId }, transaction) + logCleaned++ + + const microservice = await MicroserviceManager.findOne({ uuid: row.microserviceUuid }, transaction) + if (microservice) { + await ChangeTrackingService.update( + microservice.iofogUuid, + ChangeTrackingService.events.microserviceLogs, + transaction + ) } - const fogLogRows = await FogLogStatusManager.findAll({ - status: { [Op.in]: ['PENDING', 'ACTIVE'] } - }, transaction) + logger.info('Reconciled stale microservice log row:' + JSON.stringify({ + sessionId: row.sessionId, + microserviceUuid: row.microserviceUuid, + status: row.status, + ageMs: age + })) + } - for (const row of fogLogRows) { - if (logSessionManager.getLogSession(row.sessionId)) continue + const fogLogRows = await FogLogStatusManager.findAll({ + status: { [Op.in]: ['PENDING', 'ACTIVE'] } + }, transaction) - const age = now - new Date(row.updatedAt).getTime() - const threshold = row.status === 'PENDING' ? logPendingTimeout : logIdleTimeout - if (age < threshold) continue + for (const row of fogLogRows) { + if (logSessionManager.getLogSession(row.sessionId)) continue + if (!row.userConnected && !row.agentConnected) { await FogLogStatusManager.delete({ sessionId: row.sessionId }, transaction) logCleaned++ @@ -135,20 +174,53 @@ async function reconcileStaleSessions () { ) } - logger.info('Reconciled stale fog log row:' + JSON.stringify({ + logger.info('Reconciled orphaned fog log row:' + JSON.stringify({ sessionId: row.sessionId, iofogUuid: row.iofogUuid, - status: row.status, - ageMs: age + status: row.status })) + continue + } + + const age = now - new Date(row.updatedAt).getTime() + const threshold = row.status === 'PENDING' ? logPendingTimeout : logIdleTimeout + if (age < threshold) continue + + await FogLogStatusManager.delete({ sessionId: row.sessionId }, transaction) + logCleaned++ + + const fog = await FogManager.findOne({ uuid: row.iofogUuid }, transaction) + if (fog) { + await ChangeTrackingService.update( + fog.uuid, + ChangeTrackingService.events.fogLogs, + transaction + ) } - })() + + logger.info('Reconciled stale fog log row:' + JSON.stringify({ + sessionId: row.sessionId, + iofogUuid: row.iofogUuid, + status: row.status, + ageMs: age + })) + } if (execCleaned > 0 || logCleaned > 0) { logger.info(`WS session reconcile completed: ${execCleaned} exec, ${logCleaned} log rows cleaned`) } } +const _reconcileStaleSessions = TransactionDecorator.generateTransaction( + reconcileStaleSessionsInTransaction, + { priority: PRIORITY_BACKGROUND, label: 'ws.sessionReconcile' } +) + +async function reconcileStaleSessions () { + await _reconcileStaleSessions() +} + module.exports = { - run + run, + reconcileStaleSessionsInTransaction } diff --git a/src/lib/rbac/middleware.js b/src/lib/rbac/middleware.js index e1cbf344..505b52a0 100644 --- a/src/lib/rbac/middleware.js +++ b/src/lib/rbac/middleware.js @@ -7,6 +7,7 @@ const config = require('../../config') const db = require('../../data/models') const { getOidcSettings } = require('../../config/oidc') const { PASSWORD_CHANGE_REQUIRED_CLAIM } = require('../../services/auth-token-service') +const { runInTransaction } = require('../../helpers/transaction-runner') const PASSWORD_CHANGE_ALLOWLIST = [ { method: 'GET', path: '/api/v3/user/profile' }, @@ -318,17 +319,16 @@ function requirePermission (resource, verb) { const finalVerb = verb || routeDef.verb const resourceName = routeDef.resourceName - // Get database transaction (create a fake transaction for read-only operations) - const transaction = { fakeTransaction: true } - - // Authorize - const authResult = await authorizer.authorize( - subjects, - routeDef.apiGroup || '', - finalResource, - finalVerb, - resourceName, - transaction + const authResult = await runInTransaction( + (transaction) => authorizer.authorize( + subjects, + routeDef.apiGroup || '', + finalResource, + finalVerb, + resourceName, + transaction + ), + { label: 'rbac-authorize' } ) if (!authResult.allowed) { @@ -410,17 +410,16 @@ async function authorizeWebSocket (req, token) { subjects: subjects }) - // Get database transaction - const transaction = { fakeTransaction: true } - - // Authorize - const authResult = await authorizer.authorize( - subjects, - routeDef.apiGroup || '', - routeDef.resource, - routeDef.verb, - routeDef.resourceName, - transaction + const authResult = await runInTransaction( + (transaction) => authorizer.authorize( + subjects, + routeDef.apiGroup || '', + routeDef.resource, + routeDef.verb, + routeDef.resourceName, + transaction + ), + { label: 'rbac-authorize-ws' } ) logger.debug(`WebSocket authorization result:`, { @@ -572,17 +571,16 @@ function protect (_roles) { return callback() } - // Get database transaction - const transaction = { fakeTransaction: true } - - // Authorize - const authResult = await authorizer.authorize( - subjects, - routeDef.apiGroup || '', - routeDef.resource, - routeDef.verb, - routeDef.resourceName, - transaction + const authResult = await runInTransaction( + (transaction) => authorizer.authorize( + subjects, + routeDef.apiGroup || '', + routeDef.resource, + routeDef.verb, + routeDef.resourceName, + transaction + ), + { label: 'rbac-protect' } ) if (!authResult.allowed) { diff --git a/src/middlewares/event-audit-middleware.js b/src/middlewares/event-audit-middleware.js index 4aee3f6f..e5494545 100644 --- a/src/middlewares/event-audit-middleware.js +++ b/src/middlewares/event-audit-middleware.js @@ -44,11 +44,11 @@ function eventAuditMiddleware (req, res, next) { // Fire and forget - never await EventService.createHttpEvent(req, res, startTime).catch(err => { // Silent error handling - never throw - logger.error('Event logging failed (non-blocking):', err) + logger.error({ err }, 'Event logging failed (non-blocking)') }) } catch (error) { // Catch any synchronous errors - logger.error('Event logging setup failed (non-blocking):', error) + logger.error({ err: error }, 'Event logging setup failed (non-blocking)') // Don't throw - request already completed } }) diff --git a/src/routes/network-topology.js b/src/routes/network-topology.js new file mode 100644 index 00000000..e595fe5d --- /dev/null +++ b/src/routes/network-topology.js @@ -0,0 +1,84 @@ +const constants = require('../helpers/constants') +const NetworkTopologyController = require('../controllers/network-topology-controller') +const ResponseDecorator = require('../decorators/response-decorator') +const logger = require('../logger') +const Errors = require('../helpers/errors') +const rbacMiddleware = require('../lib/rbac/middleware') + +const defaultErrorCodes = [ + { + code: constants.HTTP_CODE_UNAUTHORIZED, + errors: [Errors.AuthenticationError] + }, + { + code: constants.HTTP_CODE_BAD_REQUEST, + errors: [Errors.ValidationError] + } +] + +const readWithNotFoundErrorCodes = [ + ...defaultErrorCodes, + { + code: constants.HTTP_CODE_NOT_FOUND, + errors: [Errors.NotFoundError] + } +] + +function createGetRoute (path, handler, errorCodes = defaultErrorCodes) { + return { + method: 'get', + path, + middleware: async (req, res) => { + logger.apiReq(req) + + await rbacMiddleware.protect()(req, res, async () => { + const endpoint = ResponseDecorator.handleErrors( + handler, + constants.HTTP_CODE_SUCCESS, + errorCodes + ) + const responseObject = await endpoint(req) + const user = req.kauth && req.kauth.grant && req.kauth.grant.access_token + ? req.kauth.grant.access_token.content.preferred_username + : 'system' + res + .status(responseObject.code) + .send(responseObject.body) + + logger.apiRes({ req, user, res, responseObject }) + }) + } + } +} + +module.exports = [ + createGetRoute('/api/v3/network-topology/summary', NetworkTopologyController.getSummaryEndPoint), + createGetRoute('/api/v3/network-topology/router/overview', NetworkTopologyController.getRouterOverviewEndPoint), + createGetRoute('/api/v3/network-topology/nats/overview', NetworkTopologyController.getNatsOverviewEndPoint), + createGetRoute('/api/v3/network-topology/router/nodes', NetworkTopologyController.listRouterNodesEndPoint), + createGetRoute('/api/v3/network-topology/nats/nodes', NetworkTopologyController.listNatsNodesEndPoint), + createGetRoute( + '/api/v3/network-topology/router/nodes/:id/connections', + NetworkTopologyController.getRouterNodeConnectionsEndPoint, + readWithNotFoundErrorCodes + ), + createGetRoute( + '/api/v3/network-topology/nats/nodes/:id/connections', + NetworkTopologyController.getNatsNodeConnectionsEndPoint, + readWithNotFoundErrorCodes + ), + createGetRoute( + '/api/v3/network-topology/router/nodes/:id', + NetworkTopologyController.getRouterNodeEndPoint, + readWithNotFoundErrorCodes + ), + createGetRoute( + '/api/v3/network-topology/nats/nodes/:id', + NetworkTopologyController.getNatsNodeEndPoint, + readWithNotFoundErrorCodes + ), + createGetRoute('/api/v3/network-topology/router/connections', NetworkTopologyController.listRouterConnectionsEndPoint), + createGetRoute('/api/v3/network-topology/nats/connections', NetworkTopologyController.listNatsConnectionsEndPoint), + createGetRoute('/api/v3/network-topology/router/subgraph', NetworkTopologyController.getRouterSubgraphEndPoint), + createGetRoute('/api/v3/network-topology/nats/subgraph', NetworkTopologyController.getNatsSubgraphEndPoint) +] diff --git a/src/schemas/network-topology.js b/src/schemas/network-topology.js new file mode 100644 index 00000000..346383ac --- /dev/null +++ b/src/schemas/network-topology.js @@ -0,0 +1,61 @@ +const networkTopologyListQuery = { + id: '/networkTopologyListQuery', + type: 'object', + properties: { + limit: { + anyOf: [ + { type: 'number', minimum: 1 }, + { type: 'string', pattern: '^\\d+$' } + ] + }, + offset: { + anyOf: [ + { type: 'number', minimum: 0 }, + { type: 'string', pattern: '^\\d+$' } + ] + }, + role: { + type: 'string', + enum: ['default', 'edge', 'interior', 'hub', 'leaf', 'server'] + }, + deploymentTarget: { + type: 'string', + enum: ['kubernetes', 'remote', 'edgelet'] + }, + search: { type: 'string', minLength: 1 } + }, + additionalProperties: false +} + +const networkTopologySubgraphQuery = { + id: '/networkTopologySubgraphQuery', + type: 'object', + properties: { + center: { type: 'string', minLength: 1 }, + depth: { + anyOf: [ + { type: 'number', minimum: 1, maximum: 2 }, + { type: 'string', pattern: '^[12]$' } + ] + }, + limit: { + anyOf: [ + { type: 'number', minimum: 1 }, + { type: 'string', pattern: '^\\d+$' } + ] + }, + offset: { + anyOf: [ + { type: 'number', minimum: 0 }, + { type: 'string', pattern: '^\\d+$' } + ] + } + }, + required: ['center'], + additionalProperties: false +} + +module.exports = { + mainSchemas: [networkTopologyListQuery, networkTopologySubgraphQuery], + innerSchemas: [] +} diff --git a/src/schemas/utils/utils.js b/src/schemas/utils/utils.js index 9fcd98e9..d4e6ec3c 100644 --- a/src/schemas/utils/utils.js +++ b/src/schemas/utils/utils.js @@ -6,5 +6,5 @@ module.exports = { colorRegex: '^(#([A-Fa-f0-9]{6}|[A-Fa-f0-9]{8}))|(rgb\(\s*(?:(\d{1,3})\s*,?){3}\))|(rgba\(\s*(?:(\d{1,3})\s*,?){4}\))|$', // https://semver.org/#is-there-a-suggested-regular-expression-regex-to-check-a-semver-string // https://regex101.com/r/vkijKf/380 - versionRegex: '^v?(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:[+]([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$' + versionRegex: '^v?(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$' } diff --git a/src/services/agent-service.js b/src/services/agent-service.js index 3a8ee3f0..ffd4f65e 100644 --- a/src/services/agent-service.js +++ b/src/services/agent-service.js @@ -645,7 +645,8 @@ const getAgentLogSessions = async function (fog, transaction) { const msSessions = await MicroserviceLogStatusManager.findAll( { microserviceUuid: { [Op.in]: microserviceUuids }, - status: { [Op.in]: ['PENDING', 'ACTIVE'] } + status: { [Op.in]: ['PENDING', 'ACTIVE'] }, + userConnected: true }, transaction ) @@ -664,7 +665,8 @@ const getAgentLogSessions = async function (fog, transaction) { const fogSessions = await FogLogStatusManager.findAll( { iofogUuid: fog.uuid, - status: { [Op.in]: ['PENDING', 'ACTIVE'] } + status: { [Op.in]: ['PENDING', 'ACTIVE'] }, + userConnected: true }, transaction ) @@ -697,7 +699,8 @@ const getAgentExecSessions = async function (fog, transaction) { const msSessions = await MicroserviceExecSessionManager.findAll( { microserviceUuid: { [Op.in]: microserviceUuids }, - status: { [Op.in]: ['PENDING', 'ACTIVE'] } + status: { [Op.in]: ['PENDING', 'ACTIVE'] }, + userConnected: true }, transaction ) @@ -790,7 +793,7 @@ module.exports = { updateHalUsbInfo: TransactionDecorator.generateTransaction(updateHalUsbInfo), deleteNode: TransactionDecorator.generateTransaction(deleteNode), getAgentLinkedVolumeMounts: TransactionDecorator.generateTransaction(getAgentLinkedVolumeMounts), - getControllerCA: TransactionDecorator.generateTransaction(getControllerCA), + getControllerCA, getAgentLogSessions: TransactionDecorator.generateTransaction(getAgentLogSessions), getAgentExecSessions: TransactionDecorator.generateTransaction(getAgentExecSessions) } diff --git a/src/services/amqp-relay-transport.js b/src/services/amqp-relay-transport.js index 0bfb95a3..56a1b6a3 100644 --- a/src/services/amqp-relay-transport.js +++ b/src/services/amqp-relay-transport.js @@ -51,6 +51,24 @@ class AmqpRelayTransport extends WsRelayTransport { return this._queueService.shouldUseQueue(execId) } + setExecUserDeliveryHook (execId, hook) { + if (typeof this._queueService.setExecUserDeliveryHook === 'function') { + this._queueService.setExecUserDeliveryHook(execId, hook) + } + } + + setExecAgentDeliveryHook (execId, hook) { + if (typeof this._queueService.setExecAgentDeliveryHook === 'function') { + this._queueService.setExecAgentDeliveryHook(execId, hook) + } + } + + setLogUserDeliveryHook (sessionId, hook) { + if (typeof this._queueService.setLogUserDeliveryHook === 'function') { + this._queueService.setLogUserDeliveryHook(sessionId, hook) + } + } + shouldUseRelayForLogs (sessionId) { return this._queueService.shouldUseQueueForLogs(sessionId) } diff --git a/src/services/application-service.js b/src/services/application-service.js index 74f9af52..c55254e9 100644 --- a/src/services/application-service.js +++ b/src/services/application-service.js @@ -15,21 +15,34 @@ const NatsAccountRuleManager = require('../data/managers/nats-account-rule-manag const NatsRuleJwtValidation = require('../helpers/nats-rule-jwt-validation') const NatsAuthService = require('./nats-auth-service') const logger = require('../logger') +const { schedulePostCommitBackground } = require('../helpers/transaction-runner') const onlyUnique = (value, index, self) => self.indexOf(value) === index function _scheduleApplicationNatsOrchestration (applicationId, reason) { - setImmediate(async () => { + schedulePostCommitBackground(`app-nats-orchestration-${applicationId}`, async (transaction) => { try { logger.info(`Starting background app NATS orchestration for app ${applicationId}: ${reason}`) + const suppressOutbox = { triggerReconcile: false } if (reason === 'nats-access-disabled') { - await MicroserviceService.reconcileNatsForApplication(applicationId) - await NatsAuthService.deleteAccountForApplication(applicationId) + await MicroserviceService.reconcileNatsForApplication(applicationId, transaction) + await NatsAuthService.deleteAccountForApplication(applicationId, transaction, suppressOutbox) } else { - await NatsAuthService.ensureAccountForApplication(applicationId) - await NatsAuthService.reissueAccountForApplication(applicationId) - await MicroserviceService.reconcileNatsForApplication(applicationId) + await NatsAuthService.ensureAccountForApplication(applicationId, transaction, suppressOutbox) + await NatsAuthService.reissueAccountForApplication(applicationId, transaction, suppressOutbox) + await MicroserviceService.reconcileNatsForApplication(applicationId, transaction) } + const mutationKind = reason === 'nats-access-disabled' + ? 'access-disable' + : reason === 'nats-access-enabled' + ? 'access-enable' + : 'rule-change' + const outboxReason = reason === 'nats-access-disabled' ? 'account-deleted' : 'account-created' + await NatsAuthService.enqueueNatsReconcileOutbox({ + reason: outboxReason, + applicationId, + mutationKind + }, transaction) logger.info(`Completed background app NATS orchestration for app ${applicationId}: ${reason}`) } catch (error) { logger.error(`Background app NATS orchestration failed for app ${applicationId}: ${error.message}`) @@ -446,14 +459,12 @@ async function _updateChangeTrackingsAndDeleteMicroservicesByApplicationId (cond } } -const bypassOptions = { bypassQueue: true } - module.exports = { - createApplicationEndPoint: TransactionDecorator.generateTransaction(createApplicationEndPoint, bypassOptions), - deleteApplicationEndPoint: TransactionDecorator.generateTransaction(deleteApplicationEndPoint, bypassOptions), - deleteSystemApplicationEndPoint: TransactionDecorator.generateTransaction(deleteSystemApplicationEndPoint, bypassOptions), - updateApplicationEndPoint: TransactionDecorator.generateTransaction(updateApplicationEndPoint, bypassOptions), - patchApplicationEndPoint: TransactionDecorator.generateTransaction(patchApplicationEndPoint, bypassOptions), + createApplicationEndPoint: TransactionDecorator.generateTransaction(createApplicationEndPoint), + deleteApplicationEndPoint: TransactionDecorator.generateTransaction(deleteApplicationEndPoint), + deleteSystemApplicationEndPoint: TransactionDecorator.generateTransaction(deleteSystemApplicationEndPoint), + updateApplicationEndPoint: TransactionDecorator.generateTransaction(updateApplicationEndPoint), + patchApplicationEndPoint: TransactionDecorator.generateTransaction(patchApplicationEndPoint), getUserApplicationsEndPoint: TransactionDecorator.generateTransaction(getUserApplicationsEndPoint), getSystemApplicationsEndPoint: TransactionDecorator.generateTransaction(getSystemApplicationsEndPoint), getAllApplicationsEndPoint: TransactionDecorator.generateTransaction(getAllApplicationsEndPoint), diff --git a/src/services/auth-bootstrap-service.js b/src/services/auth-bootstrap-service.js index 65345eac..fcf1f68f 100644 --- a/src/services/auth-bootstrap-service.js +++ b/src/services/auth-bootstrap-service.js @@ -8,6 +8,7 @@ const secretHelper = require('../helpers/secret-helper') const AuthPasswordService = require('./auth-password-service') const AuthPolicyService = require('./auth-policy-service') const AuthTokenService = require('./auth-token-service') +const { runInTransaction } = require('../helpers/transaction-runner') const { ADMIN_GROUP } = require('./auth-mfa-service') const SYSTEM_GROUPS = ['admin', 'sre', 'developer', 'viewer'] @@ -111,102 +112,88 @@ async function createBootstrapUser (normalizedUsername, plainPassword, transacti return user } -async function runBootstrap (outerTransaction) { - const transaction = outerTransaction || await db.sequelize.transaction() - const ownTransaction = !outerTransaction +async function runBootstrapInternal (transaction) { + await ensureSystemGroups(transaction) - try { - await ensureSystemGroups(transaction) + let meta = await db.AuthBootstrapMeta.findByPk(1, { + transaction, + lock: transaction.LOCK.UPDATE + }) + if (!meta) { + meta = await db.AuthBootstrapMeta.create({ id: 1 }, { transaction }) + } - let meta = await db.AuthBootstrapMeta.findByPk(1, { - transaction, - lock: transaction.LOCK.UPDATE - }) - if (!meta) { - meta = await db.AuthBootstrapMeta.create({ id: 1 }, { transaction }) - } + const existingBootstrap = await findBootstrapUser(transaction) + const { username, passwordRef, allowBootstrapLog } = getBootstrapConfig() - const existingBootstrap = await findBootstrapUser(transaction) - const { username, passwordRef, allowBootstrapLog } = getBootstrapConfig() - - if (!username || !passwordRef) { - if (existingBootstrap) { - logger.warn('Embedded auth bootstrap env missing; keeping existing bootstrap admin') - } else { - logger.warn('Embedded auth bootstrap skipped: OIDC_BOOTSTRAP_ADMIN_USERNAME and OIDC_BOOTSTRAP_ADMIN_PASSWORD are required for first boot') - } - if (ownTransaction) { - await transaction.commit() - } - return { skipped: true, reason: existingBootstrap ? 'env_missing_keep_existing' : 'missing_credentials' } + if (!username || !passwordRef) { + if (existingBootstrap) { + logger.warn('Embedded auth bootstrap env missing; keeping existing bootstrap admin') + } else { + logger.warn('Embedded auth bootstrap skipped: OIDC_BOOTSTRAP_ADMIN_USERNAME and OIDC_BOOTSTRAP_ADMIN_PASSWORD are required for first boot') } + return { skipped: true, reason: existingBootstrap ? 'env_missing_keep_existing' : 'missing_credentials' } + } - const plainPassword = await resolveBootstrapPassword(passwordRef) - if (!plainPassword) { - if (existingBootstrap) { - logger.warn('Embedded auth bootstrap password could not be resolved; keeping existing bootstrap admin') - } else { - logger.warn('Embedded auth bootstrap skipped: bootstrap admin password could not be resolved') - } - if (ownTransaction) { - await transaction.commit() - } - return { skipped: true, reason: existingBootstrap ? 'env_missing_keep_existing' : 'missing_credentials' } + const plainPassword = await resolveBootstrapPassword(passwordRef) + if (!plainPassword) { + if (existingBootstrap) { + logger.warn('Embedded auth bootstrap password could not be resolved; keeping existing bootstrap admin') + } else { + logger.warn('Embedded auth bootstrap skipped: bootstrap admin password could not be resolved') } + return { skipped: true, reason: existingBootstrap ? 'env_missing_keep_existing' : 'missing_credentials' } + } - const policy = await AuthPolicyService.getPolicy(transaction) - AuthPasswordService.validatePasswordComplexity(plainPassword, policy) - const normalizedUsername = normalizeBootstrapUsername(username) + const policy = await AuthPolicyService.getPolicy(transaction) + AuthPasswordService.validatePasswordComplexity(plainPassword, policy) + const normalizedUsername = normalizeBootstrapUsername(username) + + if (existingBootstrap) { + if (await bootstrapMatchesEnv(existingBootstrap, normalizedUsername, plainPassword)) { + await meta.update({ + completedAt: new Date(), + bootstrapAdminUserId: existingBootstrap.id + }, { transaction }) + return { skipped: true, reason: 'unchanged', userId: existingBootstrap.id, username: normalizedUsername } + } - if (existingBootstrap) { - if (await bootstrapMatchesEnv(existingBootstrap, normalizedUsername, plainPassword)) { - await meta.update({ - completedAt: new Date(), - bootstrapAdminUserId: existingBootstrap.id - }, { transaction }) - if (ownTransaction) { - await transaction.commit() - } - return { skipped: true, reason: 'unchanged', userId: existingBootstrap.id, username: normalizedUsername } - } - - logger.info(`Embedded auth bootstrap admin rotation: replacing ${existingBootstrap.email}`) - await hardDeleteBootstrapUser(existingBootstrap, transaction) - } else { - const conflictingUser = await db.AuthUser.findOne({ - where: { email: normalizedUsername, deletedAt: null }, - transaction - }) - if (conflictingUser) { - logger.warn(`Embedded auth bootstrap skipped: user ${normalizedUsername} already exists and is not bootstrap`) - await meta.update({ - completedAt: new Date(), - bootstrapAdminUserId: conflictingUser.id - }, { transaction }) - if (ownTransaction) { - await transaction.commit() - } - return { skipped: true, reason: 'user_exists', userId: conflictingUser.id } - } + logger.info(`Embedded auth bootstrap admin rotation: replacing ${existingBootstrap.email}`) + await hardDeleteBootstrapUser(existingBootstrap, transaction) + } else { + const conflictingUser = await db.AuthUser.findOne({ + where: { email: normalizedUsername, deletedAt: null }, + transaction + }) + if (conflictingUser) { + logger.warn(`Embedded auth bootstrap skipped: user ${normalizedUsername} already exists and is not bootstrap`) + await meta.update({ + completedAt: new Date(), + bootstrapAdminUserId: conflictingUser.id + }, { transaction }) + return { skipped: true, reason: 'user_exists', userId: conflictingUser.id } } + } - const user = await createBootstrapUser(normalizedUsername, plainPassword, transaction, allowBootstrapLog) + const user = await createBootstrapUser(normalizedUsername, plainPassword, transaction, allowBootstrapLog) - await meta.update({ - completedAt: new Date(), - bootstrapAdminUserId: user.id - }, { transaction }) + await meta.update({ + completedAt: new Date(), + bootstrapAdminUserId: user.id + }, { transaction }) - if (ownTransaction) { - await transaction.commit() - } - return { skipped: false, userId: user.id, username: normalizedUsername } - } catch (error) { - if (ownTransaction) { - await transaction.rollback() - } - throw error + return { skipped: false, userId: user.id, username: normalizedUsername } +} + +async function runBootstrap (outerTransaction) { + if (outerTransaction) { + return runBootstrapInternal(outerTransaction) } + + return runInTransaction( + (transaction) => runBootstrapInternal(transaction), + { label: 'auth.bootstrap' } + ) } module.exports = { diff --git a/src/services/auth-interaction-service.js b/src/services/auth-interaction-service.js index 82ff8263..2f6d9401 100644 --- a/src/services/auth-interaction-service.js +++ b/src/services/auth-interaction-service.js @@ -10,6 +10,7 @@ const AuthPasswordService = require('./auth-password-service') const AuthMfaService = require('./auth-mfa-service') const AuthUserService = require('./auth-user-service') const InteractionStateStore = require('./auth-interaction-state-store') +const { runInTransaction, PRIORITY_INTERACTIVE } = require('../helpers/transaction-runner') function ensureEmbeddedMode () { if (getAuthMode() !== 'embedded') { @@ -129,7 +130,7 @@ async function verifyLoginCredentials (credentials, transaction) { return authContext } -async function getStatus (uid, transaction) { +async function getStatus (uid) { ensureEmbeddedMode() await findInteraction(uid) @@ -138,32 +139,36 @@ async function getStatus (uid, transaction) { return { step: 'login' } } - const authContext = await loadAuthContextByUserId(state.userId, transaction) - if (!authContext) { - await clearInteractionState(uid) - throw new Errors.AuthenticationError('Interaction session not found or expired') - } + return runInTransaction(async (transaction) => { + const authContext = await loadAuthContextByUserId(state.userId, transaction) + if (!authContext) { + await clearInteractionState(uid) + throw new Errors.AuthenticationError('Interaction session not found or expired') + } - return { step: resolveNextStep(authContext, state) } + return { step: resolveNextStep(authContext, state) } + }, { priority: PRIORITY_INTERACTIVE, label: 'auth.interaction.status' }) } -async function submitLogin (uid, credentials, transaction) { +async function submitLogin (uid, credentials) { ensureEmbeddedMode() await findInteraction(uid) - const authContext = await verifyLoginCredentials(credentials, transaction) - const state = await setInteractionState(uid, { - userId: authContext.user.id, - mfaVerified: false, - enrollmentStarted: false, - enrollmentConfirmed: false, - passwordChanged: false - }) - - return { step: resolveNextStep(authContext, state) } + return runInTransaction(async (transaction) => { + const authContext = await verifyLoginCredentials(credentials, transaction) + const state = await setInteractionState(uid, { + userId: authContext.user.id, + mfaVerified: false, + enrollmentStarted: false, + enrollmentConfirmed: false, + passwordChanged: false + }) + + return { step: resolveNextStep(authContext, state) } + }, { priority: PRIORITY_INTERACTIVE, label: 'auth.interaction.login' }) } -async function submitMfa (uid, code, transaction) { +async function submitMfa (uid, code) { ensureEmbeddedMode() await findInteraction(uid) @@ -172,14 +177,16 @@ async function submitMfa (uid, code, transaction) { throw new Errors.InvalidCredentialsError() } - await AuthMfaService.verifyMfaCode(state.userId, code, transaction) - const nextState = await setInteractionState(uid, { mfaVerified: true }) - const authContext = await loadAuthContextByUserId(state.userId, transaction) + return runInTransaction(async (transaction) => { + await AuthMfaService.verifyMfaCode(state.userId, code, transaction) + const nextState = await setInteractionState(uid, { mfaVerified: true }) + const authContext = await loadAuthContextByUserId(state.userId, transaction) - return { step: resolveNextStep(authContext, nextState) } + return { step: resolveNextStep(authContext, nextState) } + }, { priority: PRIORITY_INTERACTIVE, label: 'auth.interaction.mfa' }) } -async function submitEnroll (uid, transaction) { +async function submitEnroll (uid) { ensureEmbeddedMode() await findInteraction(uid) @@ -188,17 +195,19 @@ async function submitEnroll (uid, transaction) { throw new Errors.InvalidCredentialsError() } - const enrollment = await AuthMfaService.enrollMfa(state.userId, transaction) - const nextState = await setInteractionState(uid, { enrollmentStarted: true }) + return runInTransaction(async (transaction) => { + const enrollment = await AuthMfaService.enrollMfa(state.userId, transaction) + const nextState = await setInteractionState(uid, { enrollmentStarted: true }) - return { - step: resolveNextStep(await loadAuthContextByUserId(state.userId, transaction), nextState), - secret: enrollment.secret, - otpauthUrl: enrollment.otpauthUrl - } + return { + step: resolveNextStep(await loadAuthContextByUserId(state.userId, transaction), nextState), + secret: enrollment.secret, + otpauthUrl: enrollment.otpauthUrl + } + }, { priority: PRIORITY_INTERACTIVE, label: 'auth.interaction.enroll' }) } -async function submitConfirmEnroll (uid, code, transaction) { +async function submitConfirmEnroll (uid, code) { ensureEmbeddedMode() await findInteraction(uid) @@ -207,20 +216,22 @@ async function submitConfirmEnroll (uid, code, transaction) { throw new Errors.InvalidCredentialsError() } - const result = await AuthMfaService.confirmMfa(state.userId, code, transaction) - const nextState = await setInteractionState(uid, { - enrollmentConfirmed: true, - mfaVerified: true - }) - const authContext = await loadAuthContextByUserId(state.userId, transaction) - - return { - step: resolveNextStep(authContext, nextState), - recoveryCodes: result.recoveryCodes - } + return runInTransaction(async (transaction) => { + const result = await AuthMfaService.confirmMfa(state.userId, code, transaction) + const nextState = await setInteractionState(uid, { + enrollmentConfirmed: true, + mfaVerified: true + }) + const authContext = await loadAuthContextByUserId(state.userId, transaction) + + return { + step: resolveNextStep(authContext, nextState), + recoveryCodes: result.recoveryCodes + } + }, { priority: PRIORITY_INTERACTIVE, label: 'auth.interaction.confirmEnroll' }) } -async function submitChangePassword (uid, credentials, transaction) { +async function submitChangePassword (uid, credentials) { ensureEmbeddedMode() await findInteraction(uid) @@ -229,28 +240,30 @@ async function submitChangePassword (uid, credentials, transaction) { throw new Errors.InvalidCredentialsError() } - const authContext = await loadAuthContextByUserId(state.userId, transaction) - if (!authContext) { - await clearInteractionState(uid) - throw new Errors.AuthenticationError('Interaction session not found or expired') - } + return runInTransaction(async (transaction) => { + const authContext = await loadAuthContextByUserId(state.userId, transaction) + if (!authContext) { + await clearInteractionState(uid) + throw new Errors.AuthenticationError('Interaction session not found or expired') + } - const step = resolveNextStep(authContext, state) - if (step !== 'change-password') { - throw new Errors.ValidationError(`Interaction step "${step}" is required before password change`) - } + const step = resolveNextStep(authContext, state) + if (step !== 'change-password') { + throw new Errors.ValidationError(`Interaction step "${step}" is required before password change`) + } - await AuthUserService.changePasswordWithCurrent( - state.userId, - credentials.currentPassword, - credentials.newPassword, - transaction - ) + await AuthUserService.changePasswordWithCurrent( + state.userId, + credentials.currentPassword, + credentials.newPassword, + transaction + ) - const nextState = await setInteractionState(uid, { passwordChanged: true }) - const updatedContext = await loadAuthContextByUserId(state.userId, transaction) + const nextState = await setInteractionState(uid, { passwordChanged: true }) + const updatedContext = await loadAuthContextByUserId(state.userId, transaction) - return { step: resolveNextStep(updatedContext, nextState) } + return { step: resolveNextStep(updatedContext, nextState) } + }, { priority: PRIORITY_INTERACTIVE, label: 'auth.interaction.changePassword' }) } async function buildConsentGrant (provider, interaction, accountId) { @@ -264,7 +277,7 @@ async function buildConsentGrant (provider, interaction, accountId) { return grantId } -async function complete (uid, req, res, transaction) { +async function complete (uid, req, res) { ensureEmbeddedMode() const interaction = await findInteraction(uid) @@ -273,16 +286,20 @@ async function complete (uid, req, res, transaction) { throw new Errors.InvalidCredentialsError() } - const authContext = await loadAuthContextByUserId(state.userId, transaction) - if (!authContext) { - await clearInteractionState(uid) - throw new Errors.AuthenticationError('Interaction session not found or expired') - } + const authContext = await runInTransaction(async (transaction) => { + const context = await loadAuthContextByUserId(state.userId, transaction) + if (!context) { + await clearInteractionState(uid) + throw new Errors.AuthenticationError('Interaction session not found or expired') + } - const step = resolveNextStep(authContext, state) - if (step !== 'complete') { - throw new Errors.ValidationError(`Interaction step "${step}" is required before completion`) - } + const step = resolveNextStep(context, state) + if (step !== 'complete') { + throw new Errors.ValidationError(`Interaction step "${step}" is required before completion`) + } + + return context + }, { priority: PRIORITY_INTERACTIVE, label: 'auth.interaction.complete.validate' }) const provider = getProvider() const grantId = await buildConsentGrant(provider, interaction, state.userId) @@ -291,7 +308,10 @@ async function complete (uid, req, res, transaction) { consent: { grantId } }) - await AuthPolicyService.resetFailedLogin(authContext.user, transaction) + await runInTransaction(async (transaction) => { + await AuthPolicyService.resetFailedLogin(authContext.user, transaction) + }, { priority: PRIORITY_INTERACTIVE, label: 'auth.interaction.complete.reset-login' }) + await clearInteractionState(uid) return { redirectTo, step: 'complete' } diff --git a/src/services/auth-oauth-service.js b/src/services/auth-oauth-service.js index 85570347..fbf72fec 100644 --- a/src/services/auth-oauth-service.js +++ b/src/services/auth-oauth-service.js @@ -18,6 +18,8 @@ const { } = require('../config/oidc') const { getPublicUrl, getConsoleUrl } = require('../config/auth-urls') const { getSessionStoreTtlMs } = require('../config/auth-session-store') +const { withTransaction } = require('../helpers/app-helper') +const { runInTransaction, PRIORITY_INTERACTIVE } = require('../helpers/transaction-runner') const AuthTokenService = require('./auth-token-service') const OAUTH_SESSION_KEY = 'controllerOauth' @@ -93,7 +95,7 @@ function linkExternalUserByEmail (tokenResponse) { return email } -async function resolveEmbeddedUserFromTokenResponse (tokenResponse) { +async function resolveEmbeddedUserFromTokenResponse (tokenResponse, transaction) { if (!tokenResponse.id_token) { throw new Errors.AuthenticationError('OAuth response missing id_token') } @@ -104,13 +106,13 @@ async function resolveEmbeddedUserFromTokenResponse (tokenResponse) { throw new Errors.AuthenticationError('OAuth response missing subject') } - const user = await db.AuthUser.findByPk(userId, { + const user = await db.AuthUser.findByPk(userId, withTransaction(transaction, { include: [{ model: db.AuthGroup, as: 'groups', through: { attributes: [] } }] - }) + })) if (!user || user.deletedAt) { throw new Errors.AuthenticationError('OAuth user not found') @@ -173,13 +175,15 @@ async function callback (req) { const consoleUrl = getConsoleUrl() if (getAuthMode() === 'embedded') { - const user = await resolveEmbeddedUserFromTokenResponse(tokenResponse) - const groupNames = (user.groups || []).map((group) => group.name) - const tokens = await AuthTokenService.issueTokenPair(user, groupNames) - return { - tokens, - consoleUrl - } + return runInTransaction(async (transaction) => { + const user = await resolveEmbeddedUserFromTokenResponse(tokenResponse, transaction) + const groupNames = (user.groups || []).map((group) => group.name) + const tokens = await AuthTokenService.issueTokenPair(user, groupNames, transaction) + return { + tokens, + consoleUrl + } + }, { priority: PRIORITY_INTERACTIVE, label: 'auth.oauth.callback.embedded' }) } linkExternalUserByEmail(tokenResponse) diff --git a/src/services/certificate-service.js b/src/services/certificate-service.js index ea4983f6..da7694f0 100644 --- a/src/services/certificate-service.js +++ b/src/services/certificate-service.js @@ -80,7 +80,7 @@ async function createCAEndpoint (caData, transaction) { try { const secretName = caData.type === 'self-signed' ? caData.name : caData.secretName - const existingSecret = await SecretService.getSecretEndpoint(secretName) + const existingSecret = await SecretService.getSecretEndpoint(secretName, transaction) if (caData.type === 'self-signed') { if (existingSecret) { throw new Errors.ConflictError(`CA with name ${secretName} already exists`) @@ -109,20 +109,20 @@ async function createCAEndpoint (caData, transaction) { if (caData.type === 'self-signed') { ca = await generateSelfSignedCA(caData.subject, caData.expiration) - await storeCA(ca, caData.name) + await storeCA(ca, caData.name, transaction) certDetails = parseCertificate(ca.cert) } else if (caData.type === 'k8s-secret') { // Import CA from Kubernetes secret - ca = await require('../utils/cert').getCAFromK8sSecret(caData.secretName) + ca = await require('../utils/cert').getCAFromK8sSecret(caData.secretName, transaction) certDetails = parseCertificate(ca.certificate) // Store the CA locally with the same name as the secret const checkedSecret = await SecretManager.findOne({ name: caData.secretName || caData.name }, transaction) if (!checkedSecret) { - await storeCA({ cert: ca.certificate, key: ca.key }, caData.secretName) + await storeCA({ cert: ca.certificate, key: ca.key }, caData.secretName, transaction) } } else if (caData.type === 'direct') { // Load from internal secret - const caObj = await require('../utils/cert').loadCA(caData.secretName) + const caObj = await require('../utils/cert').loadCA(caData.secretName, transaction) ca = await require('../utils/cert').getCAFromDirect(caObj) certDetails = parseCertificate(ca.certificate) } else { @@ -192,7 +192,7 @@ async function getCAEndpoint (name, transaction) { } // Get the actual cert data from the secret - const secret = await SecretService.getSecretEndpoint(name) + const secret = await SecretService.getSecretEndpoint(name, transaction) if (!secret || secret.type !== 'tls') { throw new Errors.NotFoundError(`CA with name ${name} not found`) @@ -280,7 +280,7 @@ async function _createCertificateEndpointInner (certData, transaction) { // Check if certificate already exists try { - const existingSecret = await SecretService.getSecretEndpoint(certData.name) + const existingSecret = await SecretService.getSecretEndpoint(certData.name, transaction) if (existingSecret) { throw new Errors.ConflictError(`Certificate with name ${certData.name} already exists`) } @@ -310,11 +310,12 @@ async function _createCertificateEndpointInner (certData, transaction) { subject: certData.subject, hosts: certData.hosts, expiration: certData.expiration, - ca: certData.ca + ca: certData.ca, + transaction }) // Get certificate details from newly created secret - const certSecret = await SecretService.getSecretEndpoint(certData.name) + const certSecret = await SecretService.getSecretEndpoint(certData.name, transaction) const certPem = Buffer.from(certSecret.data['tls.crt'], 'base64').toString() const certDetails = parseCertificate(certPem) @@ -367,7 +368,8 @@ async function _createCertificateEndpointInner (certData, transaction) { subject: certData.subject, hosts: certData.hosts, expiration: certData.expiration, - ca: certData.ca + ca: certData.ca, + transaction }) } catch (error) { logger.error(`Failed to generate certificate ${certData.name}:`, error.message) @@ -375,7 +377,7 @@ async function _createCertificateEndpointInner (certData, transaction) { } // Get certificate from secret to parse details - const certSecret = await SecretService.getSecretEndpoint(certData.name) + const certSecret = await SecretService.getSecretEndpoint(certData.name, transaction) const certPem = Buffer.from(certSecret.data['tls.crt'], 'base64').toString() const certDetails = parseCertificate(certPem) @@ -409,7 +411,7 @@ async function getCertificateEndpoint (name, transaction) { } // Get the actual cert data from the secret - const secret = await SecretService.getSecretEndpoint(name) + const secret = await SecretService.getSecretEndpoint(name, transaction) if (!secret || secret.type !== 'tls') { throw new Errors.NotFoundError(AppHelper.formatMessage(ErrorMessages.CERTIFICATE_NOT_FOUND, name)) @@ -508,7 +510,7 @@ async function renewCertificateEndpoint (name, transaction) { // Delete existing secret (if any) - we'll create a new one try { - await SecretService.deleteSecretEndpoint(name) + await SecretService.deleteSecretEndpoint(name, transaction) } catch (error) { // Ignore NotFoundError if (!(error instanceof Errors.NotFoundError)) { @@ -548,7 +550,7 @@ async function renewCertificateEndpoint (name, transaction) { } // Generate new certificate - await generateCertificate(renewalData) + await generateCertificate({ ...renewalData, transaction }) // Get the newly created secret const secretModel = await SecretManager.findOne({ name }, transaction) diff --git a/src/services/cluster-controller-service.js b/src/services/cluster-controller-service.js index 39b4b7e6..0ee792f3 100644 --- a/src/services/cluster-controller-service.js +++ b/src/services/cluster-controller-service.js @@ -85,8 +85,9 @@ async function updateHeartbeat (uuid, transaction) { ) } -async function listClusterControllers (transaction) { - const controllers = await ClusterControllerManager.findAll({}, transaction) +async function listClusterControllers (includeInactive, transaction) { + const where = includeInactive ? {} : { isActive: true } + const controllers = await ClusterControllerManager.findAll(where, transaction) return controllers.map(controller => ({ uuid: controller.uuid, host: controller.host, diff --git a/src/services/config-map-service.js b/src/services/config-map-service.js index 0c6ddd9f..767be600 100644 --- a/src/services/config-map-service.js +++ b/src/services/config-map-service.js @@ -120,7 +120,6 @@ async function deleteConfigMapEndpoint (configMapName, transaction) { await ConfigMapManager.deleteConfigMap(configMapName, transaction) await _deleteVolumeMountsUsingConfigMap(configMapName, transaction) - // Vault deletion is handled by ConfigMapManager.deleteConfigMap() return {} } diff --git a/src/services/controller-ms-service.js b/src/services/controller-ms-service.js index 9d2e92d5..af414d84 100644 --- a/src/services/controller-ms-service.js +++ b/src/services/controller-ms-service.js @@ -227,8 +227,11 @@ async function _updateImages (images, microserviceUuid, transaction) { await _createMicroserviceImages(microserviceUuid, images, transaction) } -async function _updatePorts (ports, microservice, transaction) { +async function _updatePorts (ports, microservice, fog, transaction) { await MicroservicePortService.deletePortMappings(microservice, transaction) + if (ports && ports.length) { + await MicroservicePortService.validatePortMappings({ ports, iofogUuid: fog.uuid }, transaction) + } for (const mapping of ports) { await MicroservicePortService.createPortMapping(microservice, mapping, transaction) } @@ -321,8 +324,7 @@ async function _updateControllerMicroservice (existing, registerData, fog, valid ) if (registerData.ports) { - await MicroservicePortService.validatePortMappings({ ports: registerData.ports, iofogUuid: fog.uuid }, transaction) - await _updatePorts(registerData.ports, updatedMicroservice, transaction) + await _updatePorts(registerData.ports, updatedMicroservice, fog, transaction) } if (registerData.volumeMappings) { @@ -382,8 +384,6 @@ async function registerControllerMicroservice (registerData, fog, transaction) { return { uuid: registerData.uuid } } -const bypassOptions = { bypassQueue: true } - module.exports = { - registerControllerMicroservice: TransactionDecorator.generateTransaction(registerControllerMicroservice, bypassOptions) + registerControllerMicroservice: TransactionDecorator.generateTransaction(registerControllerMicroservice) } diff --git a/src/services/event-service.js b/src/services/event-service.js index a6c31a1e..cdf26950 100644 --- a/src/services/event-service.js +++ b/src/services/event-service.js @@ -4,6 +4,7 @@ const logger = require('../logger') const Errors = require('../helpers/errors') const Validator = require('../schemas') const TransactionDecorator = require('../decorators/transaction-decorator') +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') /** * Extract resource type from URL path @@ -342,6 +343,13 @@ async function createEvent (eventData, transaction) { return EventManager.create(eventRecord, transaction) } +async function persistAuditEvent (eventData) { + return runInTransaction( + (transaction) => createEvent(eventData, transaction), + { priority: PRIORITY_BACKGROUND, label: 'event.audit' } + ) +} + /** * Create event from HTTP request/response * @param {object} req - Express request object @@ -390,9 +398,8 @@ async function createHttpEvent (req, res, startTime) { requestId: req.id || null } - // Use fake transaction for non-blocking event creation - await createEvent(eventData, { fakeTransaction: true }).catch(err => { - logger.error('Event logging failed (non-blocking):', err) + await persistAuditEvent(eventData).catch(err => { + logger.error({ err }, 'Event logging failed (non-blocking)') }) } @@ -431,9 +438,8 @@ async function createWsConnectEvent (connectionData) { requestId: null } - // Use fake transaction for non-blocking event creation - await createEvent(eventData, { fakeTransaction: true }).catch(err => { - logger.error('WebSocket connect event logging failed (non-blocking):', err) + await persistAuditEvent(eventData).catch(err => { + logger.error({ err }, 'WebSocket connect event logging failed (non-blocking)') }) } @@ -473,9 +479,8 @@ async function createWsDisconnectEvent (connectionData) { requestId: null } - // Use fake transaction for non-blocking event creation - await createEvent(eventData, { fakeTransaction: true }).catch(err => { - logger.error('WebSocket disconnect event logging failed (non-blocking):', err) + await persistAuditEvent(eventData).catch(err => { + logger.error({ err }, 'WebSocket disconnect event logging failed (non-blocking)') }) } @@ -635,7 +640,7 @@ async function deleteEvents (params = {}, context = {}, transaction) { const endpointType = request.path && request.path.startsWith('/api/v3/agent/') ? 'agent' : 'user' const actorId = extractActorId(request) - await createEvent({ + await persistAuditEvent({ timestamp: Date.now(), eventType: 'HTTP', endpointType, @@ -649,11 +654,11 @@ async function deleteEvents (params = {}, context = {}, transaction) { statusCode: 200, statusMessage: days === 0 ? `Deleted all ${deletedCount} events` : `Deleted ${deletedCount} events older than ${days} days`, requestId: request.id || null - }, { fakeTransaction: true }).catch(err => { - logger.error('Failed to create DELETE events audit record (non-blocking):', err) + }).catch(err => { + logger.error({ err }, 'Failed to create DELETE events audit record (non-blocking)') }) } catch (error) { - logger.error('Error creating DELETE events audit record (non-blocking):', error) + logger.error({ err: error }, 'Error creating DELETE events audit record (non-blocking)') } }) diff --git a/src/services/fog-platform-service.js b/src/services/fog-platform-service.js index 337c647f..9b9c2049 100644 --- a/src/services/fog-platform-service.js +++ b/src/services/fog-platform-service.js @@ -13,8 +13,11 @@ const NatsConnectionManager = require('../data/managers/nats-connection-manager' const ChangeTrackingService = require('./change-tracking-service') const IofogService = require('./iofog-service') const NatsService = require('./nats-service') +const ReconcileOutboxManager = require('../data/managers/reconcile-outbox-manager') const RouterService = require('./router-service') const ServiceBridgeConfig = require('./service-bridge-config') +const transactionRunner = require('../helpers/transaction-runner') +const { PRIORITY_BACKGROUND } = transactionRunner const logger = require('../logger') function buildFogDataFromSpecAndFog (fog, spec) { @@ -121,6 +124,88 @@ function topologyChanged (before, after) { before.upstreamNatsServers !== after.upstreamNatsServers } +function serializeEndpointSnapshot (snapshot) { + return JSON.stringify(snapshot || {}) +} + +function endpointsChanged (before, after) { + return serializeEndpointSnapshot(before) !== serializeEndpointSnapshot(after) +} + +async function captureEndpointSnapshot (fogUuid, fog, spec, transaction) { + const router = await RouterManager.findOne({ iofogUuid: fogUuid }, transaction) + const nats = await NatsInstanceManager.findByFog(fogUuid, transaction) + const host = spec.host != null ? spec.host : (fog ? fog.host : null) + + return { + host: host || '', + routerHost: router ? (router.host || '') : '', + natsHost: nats ? (nats.host || '') : '', + messagingPort: String(spec.messagingPort ?? (router ? router.messagingPort : '')), + interRouterPort: String(spec.interRouterPort ?? (router ? router.interRouterPort : '')), + edgeRouterPort: String(spec.edgeRouterPort ?? (router ? router.edgeRouterPort : '')), + natsServerPort: String(spec.natsServerPort ?? (nats ? nats.serverPort : '')), + natsLeafPort: String(spec.natsLeafPort ?? (nats ? nats.leafPort : '')), + natsClusterPort: String(spec.natsClusterPort ?? (nats ? nats.clusterPort : '')), + natsMqttPort: String(spec.natsMqttPort ?? (nats ? nats.mqttPort : '')), + natsHttpPort: String(spec.natsHttpPort ?? (nats ? nats.httpPort : '')) + } +} + +async function getDownstreamFogUuidsForUpstream (fogUuid, transaction) { + const downstreamUuids = new Set() + + const upstreamRouter = await RouterManager.findOne({ iofogUuid: fogUuid }, transaction) + if (upstreamRouter) { + const downstreamConnections = await RouterConnectionManager.findAllWithRouters( + { destRouter: upstreamRouter.id }, + transaction + ) + for (const connection of downstreamConnections || []) { + if (connection.source && connection.source.iofogUuid) { + downstreamUuids.add(connection.source.iofogUuid) + } + } + } + + const upstreamNats = await NatsInstanceManager.findByFog(fogUuid, transaction) + if (upstreamNats) { + const downstreamConnections = await NatsConnectionManager.findAllWithNats( + { destNats: upstreamNats.id }, + transaction + ) + for (const connection of downstreamConnections || []) { + if (connection.source && connection.source.iofogUuid) { + downstreamUuids.add(connection.source.iofogUuid) + } + } + } + + return [...downstreamUuids] +} + +async function resolveNatsConfigFromSpec (fogUuid, spec, transaction) { + const natsConfig = buildNatsConfig(spec) + if (spec.upstreamNatsServers !== undefined) { + return natsConfig + } + + const defaultHub = await NatsInstanceManager.findOne({ isHub: true }, transaction) + const nats = await NatsInstanceManager.findByFog(fogUuid, transaction) + if (!nats) { + return natsConfig + } + + const connections = await NatsConnectionManager.findAllWithNats({ sourceNats: nats.id }, transaction) + if (connections && connections.length > 0) { + natsConfig.upstreamNatsServers = connections.map( + (connection) => _getNatsUuid(connection.dest, defaultHub) + ) + } + + return natsConfig +} + function truncateErrorMessage (errorMessage, maxLength = 200) { return errorMessage.length > maxLength ? errorMessage.slice(0, maxLength) : errorMessage } @@ -146,155 +231,263 @@ function buildReadyConditions (spec, router, nats) { ] } -async function reconcileFog (fogUuid, transaction) { - const startedAt = Date.now() - let generation = null - let phase = 'Progressing' +async function reconcileFogPrepare (fogUuid, transaction) { + const fog = await FogManager.findOneWithTags({ uuid: fogUuid }, transaction) + if (!fog) { + throw new Errors.NotFoundError(AppHelper.formatMessage(ErrorMessages.INVALID_IOFOG_UUID, fogUuid)) + } - try { - const fog = await FogManager.findOneWithTags({ uuid: fogUuid }, transaction) - if (!fog) { - throw new Errors.NotFoundError(AppHelper.formatMessage(ErrorMessages.INVALID_IOFOG_UUID, fogUuid)) - } + const parsedSpec = await FogPlatformSpecManager.getParsedSpec(fogUuid, transaction) + if (!parsedSpec) { + throw new Errors.NotFoundError(`Fog platform spec not found for fog ${fogUuid}`) + } - const parsedSpec = await FogPlatformSpecManager.getParsedSpec(fogUuid, transaction) - if (!parsedSpec) { - throw new Errors.NotFoundError(`Fog platform spec not found for fog ${fogUuid}`) + const status = await FogPlatformStatusManager.getParsedStatus(fogUuid, transaction) + if (status && status.phase === 'Deleting') { + return { + skipped: true, + reason: 'deleting', + generation: parsedSpec.generation, + phase: status.phase } + } - const status = await FogPlatformStatusManager.getParsedStatus(fogUuid, transaction) - if (status && status.phase === 'Deleting') { - logger.info('fogPlatformReconcile skipped delete-owned fog', { - fogUuid, - generation: parsedSpec.generation, - phase: status.phase, - durationMs: Date.now() - startedAt - }) - return { skipped: true, reason: 'deleting' } - } + const generation = parsedSpec.generation + const spec = parsedSpec.spec + const fogData = buildFogDataFromSpecAndFog(fog, spec) + const topologyBefore = await captureTopologySnapshot(fogUuid, transaction) + const endpointsBefore = await captureEndpointSnapshot(fogUuid, fog, spec, transaction) - generation = parsedSpec.generation - const spec = parsedSpec.spec - const fogData = buildFogDataFromSpecAndFog(fog, spec) - const topologyBefore = await captureTopologySnapshot(fogUuid, transaction) + await FogPlatformStatusManager.setPhase(fogUuid, 'Progressing', { lastError: null }, transaction) + validateSystemFogInvariants(fog, spec) - await FogPlatformStatusManager.setPhase(fogUuid, 'Progressing', {}, transaction) - validateSystemFogInvariants(fog, spec) + const router = await RouterManager.findOne({ iofogUuid: fogUuid }, transaction) + const oldRouterMode = router ? (router.isEdge ? 'edge' : 'interior') : 'none' + const isRouterModeChanged = spec.routerMode !== oldRouterMode && + (spec.routerMode === 'none' || oldRouterMode === 'none') + const isHostChanged = spec.host != null && spec.host !== fog.host + const shouldRecreateCerts = isRouterModeChanged || isHostChanged - const router = await RouterManager.findOne({ iofogUuid: fogUuid }, transaction) - const oldRouterMode = router ? (router.isEdge ? 'edge' : 'interior') : 'none' - const isRouterModeChanged = spec.routerMode !== oldRouterMode && - (spec.routerMode === 'none' || oldRouterMode === 'none') - const isHostChanged = spec.host != null && spec.host !== fog.host - const shouldRecreateCerts = isRouterModeChanged || isHostChanged + return { + fog, + spec, + fogData, + generation, + topologyBefore, + endpointsBefore, + shouldRecreateCerts, + isHostChanged, + natsConfig: await resolveNatsConfigFromSpec(fogUuid, spec, transaction), + isFirstReconcile: !status || status.observedGeneration === 0, + router + } +} - await IofogService._handleRouterCertificates(fogData, fogUuid, shouldRecreateCerts, transaction) - if (shouldRecreateCerts) { - await ChangeTrackingService.update(fogUuid, ChangeTrackingService.events.volumeMounts, transaction) - } +async function reconcileFogCertPrep (fogUuid, prep) { + await transactionRunner.runInTransaction( + (transaction) => IofogService._handleRouterCertificates( + prep.fogData, + fogUuid, + prep.shouldRecreateCerts, + transaction + ), + { priority: PRIORITY_BACKGROUND, label: 'fogPlatform.certPrep' } + ) + + if (prep.shouldRecreateCerts) { + await transactionRunner.runInTransaction( + (transaction) => ChangeTrackingService.update( + fogUuid, + ChangeTrackingService.events.volumeMounts, + transaction + ), + { priority: PRIORITY_BACKGROUND, label: 'fogPlatform.certPrepVolumeMounts' } + ) + } - const natsConfig = buildNatsConfig(spec) - if (spec.natsMode === 'none') { - await NatsService.cleanupNatsForFog(fog, transaction) - await IofogService._deleteNatsMicroserviceByFog(fogData, transaction) + if (prep.isHostChanged && prep.spec.natsMode !== 'none') { + await transactionRunner.runInTransaction( + (transaction) => IofogService._reconcileNatsCertificatesOnHostChange(prep.fog, transaction), + { priority: PRIORITY_BACKGROUND, label: 'fogPlatform.certPrepNatsHost' } + ) + } +} + +async function reconcileFogNats (fogUuid, prep) { + if (prep.spec.natsMode === 'none') { + await NatsService.cleanupNatsForFogPhased(prep.fog) + await transactionRunner.runInTransaction(async (transaction) => { + await IofogService._deleteNatsMicroserviceByFog(prep.fogData, transaction) await ChangeTrackingService.update(fogUuid, ChangeTrackingService.events.microserviceList, transaction) + }, { priority: PRIORITY_BACKGROUND, label: 'fogPlatform.natsCleanup' }) + } else { + await NatsService.ensureNatsForFogPhased(prep.fog, prep.natsConfig) + } +} + +async function reconcileFogPlatform (fogUuid, prep, transaction) { + const { fog, spec, fogData, router } = prep + let networkRouter = null + + if (spec.routerMode === 'none') { + networkRouter = await RouterService.getNetworkRouter(spec.networkRouter, transaction) + if (!networkRouter) { + throw new Errors.NotFoundError(AppHelper.formatMessage( + ErrorMessages.INVALID_ROUTER, + spec.networkRouter || Constants.DEFAULT_ROUTER_NAME + )) + } + if (router) { + await IofogService._deleteFogRouter(fogData, transaction) + } + await FogManager.update({ uuid: fogUuid }, { routerId: networkRouter.id }, transaction) + } else { + const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) + const upstreamConnections = router + ? await RouterConnectionManager.findAllWithRouters({ sourceRouter: router.id }, transaction) + : [] + let upstreamRoutersIofogUuid + if (spec.upstreamRouters !== undefined) { + upstreamRoutersIofogUuid = spec.upstreamRouters + } else if (upstreamConnections && upstreamConnections.length > 0) { + upstreamRoutersIofogUuid = upstreamConnections.map( + (connection) => _getRouterUuid(connection.dest, defaultRouter) + ) } else { - if (isHostChanged) { - await IofogService._reconcileNatsCertificatesOnHostChange(fog, transaction) - } - await NatsService.ensureNatsForFog(fog, natsConfig, transaction) + upstreamRoutersIofogUuid = undefined } - - let networkRouter = null - if (spec.routerMode === 'none') { - networkRouter = await RouterService.getNetworkRouter(spec.networkRouter, transaction) - if (!networkRouter) { - throw new Errors.NotFoundError(AppHelper.formatMessage( - ErrorMessages.INVALID_ROUTER, - spec.networkRouter || Constants.DEFAULT_ROUTER_NAME - )) - } - if (router) { - await IofogService._deleteFogRouter(fogData, transaction) - } - await FogManager.update({ uuid: fogUuid }, { routerId: networkRouter.id }, transaction) + const upstreamRouters = await RouterService.validateAndReturnUpstreamRouters( + upstreamRoutersIofogUuid, + fog.isSystem, + defaultRouter, + transaction + ) + + const host = spec.host || (router ? router.host : null) + if (!router) { + networkRouter = await RouterService.createRouterForFog(fogData, fogUuid, upstreamRouters, transaction) } else { - const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) - const upstreamConnections = router - ? await RouterConnectionManager.findAllWithRouters({ sourceRouter: router.id }, transaction) - : [] - const upstreamRoutersIofogUuid = spec.upstreamRouters || (upstreamConnections || []) - .map((connection) => connection.dest.iofogUuid) - const upstreamRouters = await RouterService.validateAndReturnUpstreamRouters( - upstreamRoutersIofogUuid, - fog.isSystem, - defaultRouter, - transaction - ) - - const host = spec.host || (router ? router.host : null) - if (!router) { - networkRouter = await RouterService.createRouterForFog(fogData, fogUuid, upstreamRouters, transaction) - } else { - networkRouter = await RouterService.updateRouter(router, { - messagingPort: spec.messagingPort || router.messagingPort, - interRouterPort: spec.interRouterPort || router.interRouterPort, - edgeRouterPort: spec.edgeRouterPort || router.edgeRouterPort, - isEdge: spec.routerMode === 'edge', - host - }, upstreamRouters, spec.containerEngine || fog.containerEngine, transaction) - } - - const baseRouterConfig = await IofogService._getRouterMicroserviceConfig(fogUuid, transaction) - await ServiceBridgeConfig.recomputeServiceBridgeConfig(fogUuid, baseRouterConfig, transaction) + networkRouter = await RouterService.updateRouter(router, { + messagingPort: spec.messagingPort || router.messagingPort, + interRouterPort: spec.interRouterPort || router.interRouterPort, + edgeRouterPort: spec.edgeRouterPort || router.edgeRouterPort, + isEdge: spec.routerMode === 'edge', + host + }, upstreamRouters, spec.containerEngine || fog.containerEngine, transaction) } - if (spec.host && spec.host !== fog.host) { - await IofogService._updateMicroserviceExtraHosts(fogUuid, spec.host, transaction) - } + const activeRouterId = networkRouter.id ?? router.id + const baseRouterConfig = await RouterService.buildFreshRouterMicroserviceConfig( + activeRouterId, + spec.containerEngine || fog.containerEngine, + transaction + ) + await ServiceBridgeConfig.recomputeServiceBridgeConfig(fogUuid, baseRouterConfig, transaction) + } - if (fog.abstractedHardwareEnabled === true && spec.abstractedHardwareEnabled === false) { - await IofogService._deleteHalMicroserviceByFog(fogData, transaction) - await ChangeTrackingService.update(fogUuid, ChangeTrackingService.events.microserviceCommon, transaction) - } else if (fog.abstractedHardwareEnabled === false && spec.abstractedHardwareEnabled === true) { - await IofogService._createHalMicroserviceForFog(fogData, fog, transaction) - await ChangeTrackingService.update(fogUuid, ChangeTrackingService.events.microserviceCommon, transaction) - } + if (spec.host && spec.host !== fog.host) { + await IofogService._updateMicroserviceExtraHosts(fogUuid, spec.host, transaction) + } - if (fog.bluetoothEnabled === true && spec.bluetoothEnabled === false) { - await IofogService._deleteBluetoothMicroserviceByFog(fogData, transaction) - await ChangeTrackingService.update(fogUuid, ChangeTrackingService.events.microserviceCommon, transaction) - } else if (fog.bluetoothEnabled === false && spec.bluetoothEnabled === true) { - await IofogService._createBluetoothMicroserviceForFog(fogData, fog, transaction) - await ChangeTrackingService.update(fogUuid, ChangeTrackingService.events.microserviceCommon, transaction) - } + if (fog.abstractedHardwareEnabled === true && spec.abstractedHardwareEnabled === false) { + await IofogService._deleteHalMicroserviceByFog(fogData, transaction) + await ChangeTrackingService.update(fogUuid, ChangeTrackingService.events.microserviceCommon, transaction) + } else if (fog.abstractedHardwareEnabled === false && spec.abstractedHardwareEnabled === true) { + await IofogService._createHalMicroserviceForFog(fogData, fog, transaction) + await ChangeTrackingService.update(fogUuid, ChangeTrackingService.events.microserviceCommon, transaction) + } - const isFirstReconcile = !status || status.observedGeneration === 0 - if (isFirstReconcile) { - await ChangeTrackingService.create(fogUuid, transaction) - } + if (fog.bluetoothEnabled === true && spec.bluetoothEnabled === false) { + await IofogService._deleteBluetoothMicroserviceByFog(fogData, transaction) await ChangeTrackingService.update(fogUuid, ChangeTrackingService.events.microserviceCommon, transaction) + } else if (fog.bluetoothEnabled === false && spec.bluetoothEnabled === true) { + await IofogService._createBluetoothMicroserviceForFog(fogData, fog, transaction) + await ChangeTrackingService.update(fogUuid, ChangeTrackingService.events.microserviceCommon, transaction) + } + + if (prep.isFirstReconcile) { + await ChangeTrackingService.create(fogUuid, transaction) + } + await ChangeTrackingService.update(fogUuid, ChangeTrackingService.events.microserviceCommon, transaction) + + return { networkRouter } +} + +async function reconcileFogFinalize (fogUuid, prep, platformResult, transaction) { + const { spec, generation, topologyBefore } = prep - const routerAfter = await RouterManager.findOne({ iofogUuid: fogUuid }, transaction) - const natsAfter = await NatsInstanceManager.findByFog(fogUuid, transaction) - const topologyAfter = await captureTopologySnapshot(fogUuid, transaction) + const routerAfter = await RouterManager.findOne({ iofogUuid: fogUuid }, transaction) + const natsAfter = await NatsInstanceManager.findByFog(fogUuid, transaction) + const topologyAfter = await captureTopologySnapshot(fogUuid, transaction) - if (topologyChanged(topologyBefore, topologyAfter)) { - await NatsService.enqueueReconcileTask({ - reason: 'cluster-routes-changed', - fogUuids: [fogUuid] + if (topologyChanged(topologyBefore, topologyAfter)) { + await ReconcileOutboxManager.enqueueNats({ + reason: 'cluster-routes-changed', + fogUuids: [fogUuid] + }, transaction) + } + + const endpointsAfter = await captureEndpointSnapshot(fogUuid, prep.fog, prep.spec, transaction) + if (endpointsChanged(prep.endpointsBefore, endpointsAfter)) { + const downstreamUuids = await getDownstreamFogUuidsForUpstream(fogUuid, transaction) + for (const downstreamUuid of downstreamUuids) { + await ReconcileOutboxManager.enqueueFogPlatform({ + fogUuid: downstreamUuid, + reason: 'spec-changed' }, transaction) } + } - phase = 'Ready' - await FogPlatformStatusManager.setPhase(fogUuid, 'Ready', { - observedGeneration: generation, - lastError: null, - conditions: buildReadyConditions(spec, routerAfter, natsAfter) - }, transaction) + await FogPlatformStatusManager.setPhase(fogUuid, 'Ready', { + observedGeneration: generation, + lastError: null, + conditions: buildReadyConditions(spec, routerAfter, natsAfter) + }, transaction) + + await FogManager.update({ uuid: fogUuid }, { warningMessage: 'HEALTHY' }, transaction) + + return { + networkRouterId: (platformResult.networkRouter && platformResult.networkRouter.id) || (routerAfter && routerAfter.id) || null + } +} + +async function reconcileFog (fogUuid) { + const startedAt = Date.now() + let generation = null + let phase = 'Progressing' + + try { + const prep = await transactionRunner.runInTransaction( + (transaction) => reconcileFogPrepare(fogUuid, transaction), + { priority: PRIORITY_BACKGROUND, label: 'fogPlatform.prepare' } + ) - await FogManager.update({ uuid: fogUuid }, { warningMessage: 'HEALTHY' }, transaction) + if (prep.skipped) { + logger.info('fogPlatformReconcile skipped delete-owned fog', { + fogUuid, + generation: prep.generation, + phase: prep.phase, + durationMs: Date.now() - startedAt + }) + return { skipped: true, reason: prep.reason } + } + generation = prep.generation + + await reconcileFogCertPrep(fogUuid, prep) + await reconcileFogNats(fogUuid, prep) + + const platformResult = await transactionRunner.runInTransaction( + (transaction) => reconcileFogPlatform(fogUuid, prep, transaction), + { priority: PRIORITY_BACKGROUND, label: 'fogPlatform.platform' } + ) + + const finalizeResult = await transactionRunner.runInTransaction( + (transaction) => reconcileFogFinalize(fogUuid, prep, platformResult, transaction), + { priority: PRIORITY_BACKGROUND, label: 'fogPlatform.finalize' } + ) + + phase = 'Ready' logger.info('fogPlatformReconcile completed', { fogUuid, generation, @@ -306,7 +499,7 @@ async function reconcileFog (fogUuid, transaction) { fogUuid, generation, phase, - networkRouterId: networkRouter ? networkRouter.id : null + networkRouterId: finalizeResult.networkRouterId } } catch (error) { logger.error('fogPlatformReconcile failed', { @@ -332,12 +525,15 @@ async function reconcileFogDelete (fogUuid, transaction) { return { skipped: true, reason: 'not-found' } } + logger.info('fogPlatformReconcile delete starting', { fogUuid }) + const parsedSpec = await FogPlatformSpecManager.getParsedSpec(fogUuid, transaction) const fogData = parsedSpec ? buildFogDataFromSpecAndFog(fog, parsedSpec.spec) : { uuid: fogUuid, name: fog.name, containerEngine: fog.containerEngine } await IofogService._deleteFogRouter(fogData, transaction) + logger.info('fogPlatformReconcile delete router removed', { fogUuid }) await IofogService._processDeleteCommand(fog, transaction) logger.info('fogPlatformReconcile delete completed', { @@ -349,14 +545,21 @@ async function reconcileFogDelete (fogUuid, transaction) { return { fogUuid, deleted: true } } -const bypassOptions = { bypassQueue: true } - module.exports = { buildFogDataFromSpecAndFog, validateSystemFogInvariants, captureTopologySnapshot, + captureEndpointSnapshot, + endpointsChanged, + getDownstreamFogUuidsForUpstream, + resolveNatsConfigFromSpec, topologyChanged, markReconcileFailed, - reconcileFog: TransactionDecorator.generateTransaction(reconcileFog, bypassOptions), - reconcileFogDelete: TransactionDecorator.generateTransaction(reconcileFogDelete, bypassOptions) + reconcileFogPrepare, + reconcileFogCertPrep, + reconcileFogNats, + reconcileFogPlatform, + reconcileFogFinalize, + reconcileFog, + reconcileFogDelete: TransactionDecorator.generateTransaction(reconcileFogDelete) } diff --git a/src/services/iofog-service.js b/src/services/iofog-service.js index 875d5c22..affde2fa 100644 --- a/src/services/iofog-service.js +++ b/src/services/iofog-service.js @@ -50,7 +50,7 @@ const FogPublicKeyManager = require('../data/managers/iofog-public-key-manager') const { getServiceAnnotationTag } = require('../config/flavor') const FogPlatformSpecManager = require('../data/managers/fog-platform-spec-manager') const FogPlatformStatusManager = require('../data/managers/fog-platform-status-manager') -const FogPlatformReconcileTaskManager = require('../data/managers/fog-platform-reconcile-task-manager') +const ReconcileOutboxManager = require('../data/managers/reconcile-outbox-manager') const { buildPlatformSpecFromFogData, mergePlatformSpecPatch @@ -413,14 +413,19 @@ async function createFogEndPoint (fogData, isCLI, transaction) { let defaultRouter if (fogData.routerMode === 'none') { - const networkRouter = await RouterService.getNetworkRouter(fogData.networkRouter) + const networkRouter = await RouterService.getNetworkRouter(fogData.networkRouter, transaction) if (!networkRouter) { throw new Errors.NotFoundError(AppHelper.formatMessage(ErrorMessages.INVALID_ROUTER, !fogData.networkRouter ? Constants.DEFAULT_ROUTER_NAME : fogData.networkRouter)) } createFogData.routerId = networkRouter.id } else { defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) - await RouterService.validateAndReturnUpstreamRouters(fogData.upstreamRouters, fogData.isSystem, defaultRouter) + await RouterService.validateAndReturnUpstreamRouters( + fogData.upstreamRouters, + fogData.isSystem, + defaultRouter, + transaction + ) } const fog = await FogManager.create(createFogData, transaction) @@ -431,7 +436,7 @@ async function createFogEndPoint (fogData, isCLI, transaction) { const platformSpec = buildPlatformSpecFromFogData(fogData, { applyCreateDefaults: true }) const { generation } = await FogPlatformSpecManager.upsertSpec(fog.uuid, platformSpec, transaction) await FogPlatformStatusManager.ensurePending(fog.uuid, transaction) - await FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask({ + await ReconcileOutboxManager.enqueueFogPlatform({ fogUuid: fog.uuid, reason: 'spec-changed', specGeneration: generation @@ -450,7 +455,7 @@ async function _setTags (fogModel, tagsArray, transaction) { } tags.push(tagModel) } - await fogModel.setTags(tags) + await fogModel.setTags(tags, { transaction }) } } @@ -562,10 +567,12 @@ async function updateFogEndPoint (fogData, isCLI, transaction) { await FogManager.update(queryFogData, updateFogData, transaction) await ChangeTrackingService.update(fogData.uuid, ChangeTrackingService.events.config, transaction) - const mergedSpec = mergePlatformSpecPatch(parsedSpec ? parsedSpec.spec : {}, fogData) + const existingSpec = parsedSpec ? parsedSpec.spec : {} + const mergedSpec = mergePlatformSpecPatch(existingSpec, fogData) + const { generation } = await FogPlatformSpecManager.upsertSpec(fogData.uuid, mergedSpec, transaction) await FogPlatformStatusManager.ensurePending(fogData.uuid, transaction) - await FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask({ + await ReconcileOutboxManager.enqueueFogPlatform({ fogUuid: fogData.uuid, reason: 'spec-changed', specGeneration: generation @@ -659,7 +666,7 @@ async function deleteFogEndPoint (fogData, isCLI, transaction) { } await FogPlatformStatusManager.setPhase(fogData.uuid, 'Deleting', {}, transaction) - await FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask({ + await ReconcileOutboxManager.enqueueFogPlatform({ fogUuid: fogData.uuid, reason: 'delete' }, transaction) @@ -679,7 +686,7 @@ async function reconcileFogEndpoint (fogData, transaction) { } const parsedSpec = await FogPlatformSpecManager.getParsedSpec(fogData.uuid, transaction) - await FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask({ + await ReconcileOutboxManager.enqueueFogPlatform({ fogUuid: fogData.uuid, reason: 'manual-retry', specGeneration: parsedSpec ? parsedSpec.generation : null @@ -1063,6 +1070,8 @@ function _filterFogs (fogs, filters) { } async function _processDeleteCommand (fog, transaction) { + await NatsService.cleanupNatsForFog(fog, transaction) + const microservices = await MicroserviceManager.findAll({ iofogUuid: fog.uuid }, transaction) for (const microservice of microservices) { await MicroserviceService.deleteMicroserviceWithRoutesAndPortMappings(microservice, transaction) @@ -1091,7 +1100,6 @@ async function _processDeleteCommand (fog, transaction) { await SecretManager.delete({ name: secretName }, transaction) } } - await NatsService.cleanupNatsForFog(fog, transaction) const fogPublicKey = await FogPublicKeyManager.findByFogUuid(fog.uuid, transaction) if (fogPublicKey) { await FogKeyService.deletePublicKey(fog.uuid, transaction) @@ -1527,13 +1535,11 @@ async function _updateImages (images, microserviceUuid, transaction) { return _createMicroserviceImages({ uuid: microserviceUuid }, images, transaction) } -const bypassOptions = { bypassQueue: true } - module.exports = { - createFogEndPoint: TransactionDecorator.generateTransaction(createFogEndPoint, bypassOptions), - updateFogEndPoint: TransactionDecorator.generateTransaction(updateFogEndPoint, bypassOptions), - deleteFogEndPoint: TransactionDecorator.generateTransaction(deleteFogEndPoint, bypassOptions), - reconcileFogEndpoint: TransactionDecorator.generateTransaction(reconcileFogEndpoint, bypassOptions), + createFogEndPoint: TransactionDecorator.generateTransaction(createFogEndPoint), + updateFogEndPoint: TransactionDecorator.generateTransaction(updateFogEndPoint), + deleteFogEndPoint: TransactionDecorator.generateTransaction(deleteFogEndPoint), + reconcileFogEndpoint: TransactionDecorator.generateTransaction(reconcileFogEndpoint), getFogEndPoint: TransactionDecorator.generateTransaction(getFogEndPoint), getFogListEndPoint: TransactionDecorator.generateTransaction(getFogListEndPoint), generateProvisioningKeyEndPoint: TransactionDecorator.generateTransaction(generateProvisioningKeyEndPoint), @@ -1547,21 +1553,21 @@ module.exports = { enableNodeExecEndPoint: TransactionDecorator.generateTransaction(enableNodeExecEndPoint), disableNodeExecEndPoint: TransactionDecorator.generateTransaction(disableNodeExecEndPoint), _extractServiceTags, - _findMatchingServices: TransactionDecorator.generateTransaction(_findMatchingServices), + _findMatchingServices, _buildTcpListenerForFog, - _getRouterMicroserviceConfig: TransactionDecorator.generateTransaction(_getRouterMicroserviceConfig), - _extractExistingTcpConnectors: TransactionDecorator.generateTransaction(_extractExistingTcpConnectors), + _getRouterMicroserviceConfig, + _extractExistingTcpConnectors, _mergeTcpConnector, _mergeTcpListener, checkKubernetesEnvironment, - _handleRouterCertificates: TransactionDecorator.generateTransaction(_handleRouterCertificates), - _deleteFogRouter: TransactionDecorator.generateTransaction(_deleteFogRouter), - _processDeleteCommand: TransactionDecorator.generateTransaction(_processDeleteCommand), - _reconcileNatsCertificatesOnHostChange: TransactionDecorator.generateTransaction(_reconcileNatsCertificatesOnHostChange), - _deleteNatsMicroserviceByFog: TransactionDecorator.generateTransaction(_deleteNatsMicroserviceByFog), - _createHalMicroserviceForFog: TransactionDecorator.generateTransaction(_createHalMicroserviceForFog), - _deleteHalMicroserviceByFog: TransactionDecorator.generateTransaction(_deleteHalMicroserviceByFog), - _createBluetoothMicroserviceForFog: TransactionDecorator.generateTransaction(_createBluetoothMicroserviceForFog), - _deleteBluetoothMicroserviceByFog: TransactionDecorator.generateTransaction(_deleteBluetoothMicroserviceByFog), - _updateMicroserviceExtraHosts: TransactionDecorator.generateTransaction(_updateMicroserviceExtraHosts) + _handleRouterCertificates, + _deleteFogRouter, + _processDeleteCommand, + _reconcileNatsCertificatesOnHostChange, + _deleteNatsMicroserviceByFog, + _createHalMicroserviceForFog, + _deleteHalMicroserviceByFog, + _createBluetoothMicroserviceForFog, + _deleteBluetoothMicroserviceByFog, + _updateMicroserviceExtraHosts } diff --git a/src/services/microservice-ports/microservice-port.js b/src/services/microservice-ports/microservice-port.js index 9e541633..27ecedc6 100644 --- a/src/services/microservice-ports/microservice-port.js +++ b/src/services/microservice-ports/microservice-port.js @@ -14,9 +14,11 @@ async function _checkForDuplicatePorts (agent, localPort, transaction) { throw new Errors.ValidationError(AppHelper.formatMessage(ErrorMessages.PORT_RESERVED, localPort)) } - const microservices = await agent.getMicroservice() + // Read within the caller transaction so uncommitted deletes are visible (Plan 19). + const assocOptions = transaction != null ? { transaction } : undefined + const microservices = await agent.getMicroservice(assocOptions) for (const microservice of microservices) { - const ports = await microservice.getPorts() + const ports = await microservice.getPorts(assocOptions) if (ports.find(port => port.portExternal === localPort)) { throw new Errors.ValidationError(AppHelper.formatMessage(ErrorMessages.PORT_NOT_AVAILABLE, localPort)) } diff --git a/src/services/microservices-service.js b/src/services/microservices-service.js index db10325f..7245327e 100644 --- a/src/services/microservices-service.js +++ b/src/services/microservices-service.js @@ -35,7 +35,6 @@ const SecretManager = require('../data/managers/secret-manager') const VolumeMountService = require('./volume-mount-service') const RbacServiceAccountManager = require('../data/managers/rbac-service-account-manager') const RbacRoleManager = require('../data/managers/rbac-role-manager') -const RbacCacheVersionManager = require('../data/managers/rbac-cache-version-manager') const NatsAuthService = require('./nats-auth-service') const NatsUserRuleManager = require('../data/managers/nats-user-rule-manager') const NatsRuleJwtValidation = require('../helpers/nats-rule-jwt-validation') @@ -67,28 +66,19 @@ async function _createOrUpdateServiceAccountForMicroservice (microserviceUuid, m throw new Errors.ValidationError(`Referenced role '${roleName}' does not exist`) } - const roleRef = { - kind: 'Role', - name: roleName - } - - const existingServiceAccount = await RbacServiceAccountManager.findOneByMicroserviceUuid(microserviceUuid, transaction) - - if (existingServiceAccount) { - await RbacServiceAccountManager.update({ id: existingServiceAccount.id }, { roleRef, name: microserviceName }, transaction) - await RbacCacheVersionManager.incrementVersion(transaction) - return RbacServiceAccountManager.findOne({ id: existingServiceAccount.id }, transaction) - } - const microservice = await MicroserviceManager.findOne({ uuid: microserviceUuid }, transaction) if (!microservice || microservice.applicationId == null) { throw new Errors.ValidationError('Microservice or application not found for service account creation') } + return RbacServiceAccountManager.createServiceAccount({ microserviceUuid, applicationId: microservice.applicationId, name: microserviceName, - roleRef + roleRef: { + kind: 'Role', + name: roleName + } }, transaction) } @@ -1434,19 +1424,21 @@ async function updateMicroserviceEndPoint (microserviceUuid, microserviceData, i throw error } - const shouldEnableNats = microserviceData.natsAccess === true - const shouldDisableNats = microserviceData.natsAccess === false && microservice.natsAccess + const natsAccessInPatch = Object.prototype.hasOwnProperty.call(microserviceData, 'natsAccess') + const shouldEnableNats = natsAccessInPatch && microserviceData.natsAccess === true && !microservice.natsAccess + const shouldDisableNats = natsAccessInPatch && microserviceData.natsAccess === false && microservice.natsAccess const natsRuleChanged = Object.prototype.hasOwnProperty.call(microserviceData, 'natsRuleId') && microserviceData.natsRuleId !== microservice.natsRuleId - if (shouldEnableNats) { - if (natsRuleChanged) { - await NatsAuthService.reissueUserForMicroservice(updatedMicroservice.uuid, transaction) - } - await _ensureNatsCredsForMicroservice(updatedMicroservice, transaction) - } else if (shouldDisableNats) { + if (shouldDisableNats) { await _detachNatsCredsForMicroservice(microservice, transaction) await NatsAuthService.revokeMicroserviceUser(microservice.uuid, transaction) + } else if (microservice.natsAccess || shouldEnableNats) { + if (natsRuleChanged || shouldEnableNats) { + const mutationKind = shouldEnableNats ? 'access-enable' : 'rule-change' + await NatsAuthService.reissueUserForMicroservice(updatedMicroservice.uuid, transaction, { mutationKind }) + } + await _ensureNatsCredsForMicroservice(updatedMicroservice, transaction) } if (changeTrackingEnabled) { @@ -1717,7 +1709,7 @@ async function createPortMappingEndPoint (microserviceUuid, portMappingData, isC if (!agent) { throw new Errors.ValidationError(AppHelper.formatMessage(ErrorMessages.INVALID_IOFOG_UUID, microservice.iofogUuid)) } - await MicroservicePortService.validatePortMapping(agent, portMappingData, {}, transaction) + await MicroservicePortService.validatePortMapping(agent, portMappingData, transaction) return MicroservicePortService.createPortMapping(microservice, portMappingData, transaction) } @@ -1738,7 +1730,7 @@ async function createSystemPortMappingEndPoint (microserviceUuid, portMappingDat if (!agent) { throw new Errors.ValidationError(AppHelper.formatMessage(ErrorMessages.INVALID_IOFOG_UUID, microservice.iofogUuid)) } - await MicroservicePortService.validatePortMapping(agent, portMappingData, {}, transaction) + await MicroservicePortService.validatePortMapping(agent, portMappingData, transaction) return MicroservicePortService.createPortMapping(microservice, portMappingData, transaction) } @@ -2713,30 +2705,28 @@ async function reconcileNatsForApplication (applicationId, transaction) { return } const microservices = await MicroserviceManager.findAll({ applicationId }, transaction) + const reconcileTriggerOptions = { triggerReconcile: false } for (const microservice of microservices) { if (!application.natsAccess || !microservice.natsAccess) { if (microservice.natsUserId || microservice.natsCredsSecretName || microservice.natsAccess) { - await NatsAuthService.revokeMicroserviceUser(microservice.uuid, transaction) + await NatsAuthService.revokeMicroserviceUser(microservice.uuid, transaction, reconcileTriggerOptions) await _detachNatsCredsForMicroservice(microservice, transaction) } continue } - const reconcileTriggerOptions = { triggerReconcile: false } await NatsAuthService.reissueUserForMicroservice(microservice.uuid, transaction, reconcileTriggerOptions) const refreshed = await MicroserviceManager.findOne({ uuid: microservice.uuid }, transaction) await _ensureNatsCredsForMicroservice(refreshed || microservice, transaction) } } -const bypassOptions = { bypassQueue: true } - module.exports = { - createMicroserviceEndPoint: TransactionDecorator.generateTransaction(createMicroserviceEndPoint, bypassOptions), + createMicroserviceEndPoint: TransactionDecorator.generateTransaction(createMicroserviceEndPoint), createPortMappingEndPoint: TransactionDecorator.generateTransaction(createPortMappingEndPoint), createSystemPortMappingEndPoint: TransactionDecorator.generateTransaction(createSystemPortMappingEndPoint), createVolumeMappingEndPoint: TransactionDecorator.generateTransaction(createVolumeMappingEndPoint), createSystemVolumeMappingEndPoint: TransactionDecorator.generateTransaction(createSystemVolumeMappingEndPoint), - deleteMicroserviceEndPoint: TransactionDecorator.generateTransaction(deleteMicroserviceEndPoint, bypassOptions), + deleteMicroserviceEndPoint: TransactionDecorator.generateTransaction(deleteMicroserviceEndPoint), deleteMicroserviceWithRoutesAndPortMappings, deleteNotRunningMicroservices, deletePortMappingEndPoint: TransactionDecorator.generateTransaction(deletePortMappingEndPoint), @@ -2751,8 +2741,8 @@ module.exports = { listMicroservicesEndPoint: TransactionDecorator.generateTransaction(listMicroservicesEndPoint), listSystemMicroservicesEndPoint: TransactionDecorator.generateTransaction(listSystemMicroservicesEndPoint), listVolumeMappingsEndPoint: TransactionDecorator.generateTransaction(listVolumeMappingsEndPoint), - updateMicroserviceEndPoint: TransactionDecorator.generateTransaction(updateMicroserviceEndPoint, bypassOptions), - updateSystemMicroserviceEndPoint: TransactionDecorator.generateTransaction(updateSystemMicroserviceEndPoint, bypassOptions), + updateMicroserviceEndPoint: TransactionDecorator.generateTransaction(updateMicroserviceEndPoint), + updateSystemMicroserviceEndPoint: TransactionDecorator.generateTransaction(updateSystemMicroserviceEndPoint), updateMicroserviceConfigEndPoint: TransactionDecorator.generateTransaction(updateMicroserviceConfigEndPoint), getMicroserviceConfigEndPoint: TransactionDecorator.generateTransaction(getMicroserviceConfigEndPoint), getSystemMicroserviceConfigEndPoint: TransactionDecorator.generateTransaction(getSystemMicroserviceConfigEndPoint), @@ -2765,7 +2755,7 @@ module.exports = { updateChangeTracking: _updateChangeTracking, startMicroserviceEndPoint: TransactionDecorator.generateTransaction(startMicroserviceEndPoint), stopMicroserviceEndPoint: TransactionDecorator.generateTransaction(stopMicroserviceEndPoint), - reconcileNatsForApplication: TransactionDecorator.generateTransaction(reconcileNatsForApplication, bypassOptions), + reconcileNatsForApplication: TransactionDecorator.generateTransaction(reconcileNatsForApplication), injectServiceAccountVolume: _injectServiceAccountVolume, stripUserServiceAccountVolumeMappings: _stripUserServiceAccountVolumeMappings, createOrUpdateServiceAccountForMicroservice: _createOrUpdateServiceAccountForMicroservice diff --git a/src/services/nats-api-service.js b/src/services/nats-api-service.js index 8d07cb98..ab7c7b50 100644 --- a/src/services/nats-api-service.js +++ b/src/services/nats-api-service.js @@ -578,30 +578,28 @@ async function deleteUserRule (ruleName, transaction) { await NatsUserRuleManager.delete({ id: rule.id }, transaction) } -const bypassOptions = { bypassQueue: true } - module.exports = { - getOperator: TransactionDecorator.generateTransaction(getOperator, bypassOptions), - rotateOperator: TransactionDecorator.generateTransaction(rotateOperator, bypassOptions), - getBootstrap: TransactionDecorator.generateTransaction(getBootstrap, bypassOptions), - getHub: TransactionDecorator.generateTransaction(getHub, bypassOptions), - upsertHub: TransactionDecorator.generateTransaction(upsertHub, bypassOptions), + getOperator: TransactionDecorator.generateTransaction(getOperator), + rotateOperator: TransactionDecorator.generateTransaction(rotateOperator), + getBootstrap: TransactionDecorator.generateTransaction(getBootstrap), + getHub: TransactionDecorator.generateTransaction(getHub), + upsertHub: TransactionDecorator.generateTransaction(upsertHub), listAccounts: TransactionDecorator.generateTransaction(listAccounts), getAccount: TransactionDecorator.generateTransaction(getAccount), - ensureAccount: TransactionDecorator.generateTransaction(ensureAccount, bypassOptions), + ensureAccount: TransactionDecorator.generateTransaction(ensureAccount), listAllUsers: TransactionDecorator.generateTransaction(listAllUsers), listUsers: TransactionDecorator.generateTransaction(listUsers), - createUser: TransactionDecorator.generateTransaction(createUser, bypassOptions), + createUser: TransactionDecorator.generateTransaction(createUser), getUserCreds: TransactionDecorator.generateTransaction(getUserCreds), - deleteUser: TransactionDecorator.generateTransaction(deleteUser, bypassOptions), - createMqttBearer: TransactionDecorator.generateTransaction(createMqttBearer, bypassOptions), - deleteMqttBearer: TransactionDecorator.generateTransaction(deleteMqttBearer, bypassOptions), + deleteUser: TransactionDecorator.generateTransaction(deleteUser), + createMqttBearer: TransactionDecorator.generateTransaction(createMqttBearer), + deleteMqttBearer: TransactionDecorator.generateTransaction(deleteMqttBearer), listAccountRules: TransactionDecorator.generateTransaction(listAccountRules), - createAccountRule: TransactionDecorator.generateTransaction(createAccountRule, bypassOptions), - updateAccountRule: TransactionDecorator.generateTransaction(updateAccountRule, bypassOptions), - deleteAccountRule: TransactionDecorator.generateTransaction(deleteAccountRule, bypassOptions), + createAccountRule: TransactionDecorator.generateTransaction(createAccountRule), + updateAccountRule: TransactionDecorator.generateTransaction(updateAccountRule), + deleteAccountRule: TransactionDecorator.generateTransaction(deleteAccountRule), listUserRules: TransactionDecorator.generateTransaction(listUserRules), - createUserRule: TransactionDecorator.generateTransaction(createUserRule, bypassOptions), - updateUserRule: TransactionDecorator.generateTransaction(updateUserRule, bypassOptions), - deleteUserRule: TransactionDecorator.generateTransaction(deleteUserRule, bypassOptions) + createUserRule: TransactionDecorator.generateTransaction(createUserRule), + updateUserRule: TransactionDecorator.generateTransaction(updateUserRule), + deleteUserRule: TransactionDecorator.generateTransaction(deleteUserRule) } diff --git a/src/services/nats-auth-service.js b/src/services/nats-auth-service.js index 30892991..4b4e5543 100644 --- a/src/services/nats-auth-service.js +++ b/src/services/nats-auth-service.js @@ -12,6 +12,8 @@ const NatsAccountRuleManager = require('../data/managers/nats-account-rule-manag const NatsUserRuleManager = require('../data/managers/nats-user-rule-manager') const MicroserviceManager = require('../data/managers/microservice-manager') const TransactionDecorator = require('../decorators/transaction-decorator') +const ReconcileOutboxManager = require('../data/managers/reconcile-outbox-manager') +const { runInTransaction, PRIORITY_BACKGROUND, schedulePostCommitBackground } = require('../helpers/transaction-runner') const logger = require('../logger') const NatsSystemRules = require('../config/nats-system-rules') const { slugifyName } = require('../helpers/system-naming') @@ -225,35 +227,29 @@ function _normalizeSystemUserRuleForPersistence (rule) { } /** - * NATS reconciliation is triggered in two ways: - * (A) From this module: _triggerResolverArtifactsReconcile calls NatsService.enqueueReconcileTask (fire-and-forget). - * Call sites: ensureOperator, rotateOperator, ensureSystemAccount, createUserForAccount, ensureAccountForApplication, - * createAccountForApplication, ensureUserForMicroservice, createMqttBearerUser, ensureLeafSystemAccount, - * reissueAccountForApplication, reissueUserForMicroservice, deleteAccountForApplication, revokeMicroserviceUser, - * reissueForAccountRule, reissueForUserRule, revokeUserByAccountAndName, deleteLeafSystemArtifactsForFog, etc. - * (B) From nats-service: enqueueReconcileTask(..., transaction) inside ensureNatsForFog (cluster-routes-changed) and - * cleanupNatsForFog (server-deleted). - * All API endpoints that trigger reconciliation use the transaction-queue bypass (bypassQueue: true) so requests - * do not wait behind long-running reconcile jobs. + * NATS reconciliation is scheduled via ReconcileOutbox in the same transaction as auth mutations. + * The outbox drainer upserts NatsReconcileTask rows. Cluster-route changes in nats-service also + * enqueue outbox rows in the same transaction. */ -function _triggerResolverArtifactsReconcile (triggerOptions = {}) { +async function _enqueueNatsReconcileOutbox (triggerOptions = {}, transaction) { if (triggerOptions.triggerReconcile === false) { - return + return null } - const NatsService = require('./nats-service') - if (NatsService && typeof NatsService.enqueueReconcileTask === 'function') { - const options = { reason: 'auth-mutation', ...triggerOptions } - NatsService.enqueueReconcileTask(options).catch((err) => { - logger.error(`NATS reconcile enqueue failed: ${err.message}`) - }) + const payload = { reason: 'auth-mutation', ...triggerOptions } + if (transaction) { + return ReconcileOutboxManager.enqueueNats(payload, transaction) } + return runInTransaction( + (tx) => ReconcileOutboxManager.enqueueNats(payload, tx), + { priority: PRIORITY_BACKGROUND, label: 'natsAuth.outboxEnqueue' } + ) } function _runBackgroundTask (label, task) { - setImmediate(async () => { + schedulePostCommitBackground(label, async (transaction) => { try { logger.info(`Starting background NATS task: ${label}`) - await task() + await task(transaction) logger.info(`Completed background NATS task: ${label}`) } catch (error) { logger.error(`Background NATS task failed (${label}): ${error.message}`) @@ -289,9 +285,14 @@ async function _upsertOpaqueSecret (name, data, transaction) { } } +const Transaction = require('sequelize/lib/transaction') + function _triggerOptionsFromArgs (args) { const second = args[0] - return (second && typeof second === 'object' && !second.fakeTransaction) ? second : {} + if (second instanceof Transaction) { + return {} + } + return (second && typeof second === 'object') ? second : {} } async function ensureOperator (transaction, ...rest) { @@ -314,7 +315,7 @@ async function ensureOperator (transaction, ...rest) { jwt: operatorJwt, seedSecretName: OPERATOR_SEED_SECRET }, transaction) - _triggerResolverArtifactsReconcile(options) + await _enqueueNatsReconcileOutbox(options, transaction) return created } @@ -357,7 +358,7 @@ async function rotateOperator (transaction) { await NatsAccountManager.update({ id: account.id }, { jwt: newAccountJwt }, transaction) } - _triggerResolverArtifactsReconcile() + await _enqueueNatsReconcileOutbox({}, transaction) return NatsOperatorManager.findOne({ id: existing.id }, transaction) } @@ -391,7 +392,7 @@ async function ensureSystemAccount (transaction, ...rest) { isLeafSystem: false, applicationId: null }, transaction) - _triggerResolverArtifactsReconcile({ ...options, reason: 'system-account-created' }) + await _enqueueNatsReconcileOutbox({ ...options, reason: 'system-account-created' }, transaction) return created } @@ -425,7 +426,7 @@ async function ensureLeafSystemAccount (fog, transaction) { isLeafSystem: true, applicationId: null }, transaction) - _triggerResolverArtifactsReconcile({ fogUuids: [fog.uuid] }) + await _enqueueNatsReconcileOutbox({ fogUuids: [fog.uuid] }, transaction) return created } @@ -439,7 +440,7 @@ async function ensureSysUserForServer (options = {}, transaction) { ? { user: existingUser } : await createUserForAccount(account.id, sysUserName, null, null, null, transaction) if (created && fog && fog.uuid) { - _triggerResolverArtifactsReconcile({ fogUuids: [fog.uuid] }) + await _enqueueNatsReconcileOutbox({ fogUuids: [fog.uuid] }, transaction) } return { account, user } } @@ -452,7 +453,7 @@ async function ensureLeafSystemAccountUser (fog, transaction) { return { account, user: existing } } const result = await createUserForAccount(account.id, userName, null, null, null, transaction) - _triggerResolverArtifactsReconcile({ fogUuids: [fog.uuid] }) + await _enqueueNatsReconcileOutbox({ fogUuids: [fog.uuid] }, transaction) return result } @@ -506,7 +507,7 @@ async function deleteLeafSystemArtifactsForFog (fog, transaction) { } } await NatsAccountManager.delete({ id: account.id }, transaction) - _triggerResolverArtifactsReconcile({ fogUuids: [fog.uuid] }) + await _enqueueNatsReconcileOutbox({ fogUuids: [fog.uuid] }, transaction) } /** @@ -530,7 +531,7 @@ async function deleteServerSysUserForFog (fog, isHub, transaction) { } } await NatsUserManager.delete({ id: user.id }, transaction) - _triggerResolverArtifactsReconcile({ fogUuids: [fog.uuid] }) + await _enqueueNatsReconcileOutbox({ fogUuids: [fog.uuid] }, transaction) } async function ensureControllerNatsAccount (transaction, ...rest) { @@ -559,7 +560,7 @@ async function ensureControllerNatsAccount (transaction, ...rest) { null, transaction ) - _triggerResolverArtifactsReconcile(options) + await _enqueueNatsReconcileOutbox(options, transaction) return result } @@ -601,11 +602,12 @@ async function ensureControllerNatsAccount (transaction, ...rest) { null, transaction ) - _triggerResolverArtifactsReconcile(options) + await _enqueueNatsReconcileOutbox(options, transaction) return result } -async function ensureAccountForApplication (applicationId, transaction) { +async function ensureAccountForApplication (applicationId, transaction, ...rest) { + const options = _triggerOptionsFromArgs(rest) await ensureDefaultRules(transaction) const existing = await NatsAccountManager.findOne({ applicationId }, transaction) if (existing) { @@ -642,7 +644,14 @@ async function ensureAccountForApplication (applicationId, transaction) { isSystem: false, isLeafSystem: false }, transaction) - _triggerResolverArtifactsReconcile({ reason: 'account-created', applicationId: application.id }) + if (options.triggerReconcile !== false) { + await _enqueueNatsReconcileOutbox({ + reason: 'account-created', + applicationId: application.id, + mutationKind: options.mutationKind || 'access-enable', + ...options + }, transaction) + } return created } @@ -692,7 +701,7 @@ async function ensureUserForMicroservice (microservice, transaction) { natsUserRuleId: userRule ? userRule.id : null }, transaction) - _triggerResolverArtifactsReconcile({ fogUuids: [microservice.iofogUuid] }) + await _enqueueNatsReconcileOutbox({ fogUuids: [microservice.iofogUuid] }, transaction) return { account, user: natsUser } } @@ -840,7 +849,8 @@ async function createUserForAccount (accountId, userName, expiresIn, natsRuleNam return { account, user: natsUser } } -async function reissueAccountForApplication (applicationId, transaction) { +async function reissueAccountForApplication (applicationId, transaction, ...rest) { + const options = _triggerOptionsFromArgs(rest) await ensureDefaultRules(transaction) const application = await ApplicationManager.findOne({ id: applicationId }, transaction) if (!application) { @@ -869,7 +879,14 @@ async function reissueAccountForApplication (applicationId, transaction) { account.jwt ) await NatsAccountManager.update({ id: account.id }, { jwt: accountJwt }, transaction) - _triggerResolverArtifactsReconcile({ reason: 'account-created', applicationId }) + if (options.triggerReconcile !== false) { + await _enqueueNatsReconcileOutbox({ + reason: 'account-created', + applicationId, + mutationKind: options.mutationKind || 'rule-change', + ...options + }, transaction) + } return NatsAccountManager.findOne({ id: account.id }, transaction) } @@ -913,7 +930,13 @@ async function reissueUserForMicroservice (microserviceUuid, transaction, ...res microserviceUuid: microservice.uuid, natsUserRuleId: currentRuleId }, transaction) - _triggerResolverArtifactsReconcile({ reason: 'account-created', applicationId: microservice.applicationId, ...options }) + await _enqueueNatsReconcileOutbox({ + reason: 'account-created', + applicationId: microservice.applicationId, + microserviceUuid: microservice.uuid, + mutationKind: options.mutationKind || 'access-enable', + ...options + }, transaction) return NatsUserManager.findOne({ microserviceUuid: microservice.uuid }, transaction) } @@ -922,7 +945,13 @@ async function reissueUserForMicroservice (microserviceUuid, transaction, ...res if (oldAccount) { await _addRevocationToAccount(oldAccount, existingUser.publicKey, transaction) if (options.triggerReconcile !== false && oldAccount.applicationId != null) { - _triggerResolverArtifactsReconcile({ reason: 'account-created', applicationId: oldAccount.applicationId, ...options }) + await _enqueueNatsReconcileOutbox({ + reason: 'account-created', + applicationId: oldAccount.applicationId, + microserviceUuid: microservice.uuid, + mutationKind: options.mutationKind || 'rule-change', + ...options + }, transaction) } } const accountSeed = await _loadSeedFromSecret(account.seedSecretName, transaction) @@ -944,7 +973,13 @@ async function reissueUserForMicroservice (microserviceUuid, transaction, ...res accountId: account.id, natsUserRuleId: currentRuleId }, transaction) - _triggerResolverArtifactsReconcile({ reason: 'account-created', applicationId: microservice.applicationId, ...options }) + await _enqueueNatsReconcileOutbox({ + reason: 'account-created', + applicationId: microservice.applicationId, + microserviceUuid: microservice.uuid, + mutationKind: options.mutationKind || 'rule-change', + ...options + }, transaction) return NatsUserManager.findOne({ microserviceUuid: microservice.uuid }, transaction) } @@ -954,11 +989,25 @@ async function reissueUserForMicroservice (microserviceUuid, transaction, ...res const operatorSeed = await _loadSeedFromSecret(operator.seedSecretName, transaction) const operatorKp = fromSeed(new TextEncoder().encode(operatorSeed)) await _reissueOneUserForRule(existingUser, userRule.id, operatorKp, transaction) - _triggerResolverArtifactsReconcile({ reason: 'account-created', applicationId: microservice.applicationId, ...options }) + await _enqueueNatsReconcileOutbox({ + reason: 'account-created', + applicationId: microservice.applicationId, + microserviceUuid: microservice.uuid, + mutationKind: options.mutationKind || 'rule-change', + ...options + }, transaction) return NatsUserManager.findOne({ microserviceUuid: microservice.uuid }, transaction) } - _triggerResolverArtifactsReconcile({ reason: 'account-created', applicationId: microservice.applicationId, ...options }) + if (options.triggerReconcile !== false) { + await _enqueueNatsReconcileOutbox({ + reason: 'account-created', + applicationId: microservice.applicationId, + microserviceUuid: microservice.uuid, + mutationKind: options.mutationKind, + ...options + }, transaction) + } return NatsUserManager.findOne({ microserviceUuid: microservice.uuid }, transaction) } @@ -991,6 +1040,16 @@ async function ensureLeafUserForAccount (accountId, fogName, transaction, natsIn async function reissueForAccountRule (accountRuleId, transaction) { const rule = await NatsAccountRuleManager.findOne({ id: accountRuleId }, transaction) const applications = await ApplicationManager.findAll({ natsRuleId: accountRuleId }, transaction) + if (rule && rule.name === NatsSystemRules.APPLICATION_ACCOUNT_RULE_NAME) { + const defaultRuleApps = await ApplicationManager.findAll({ natsAccess: true, natsRuleId: null }, transaction) + const seenAppIds = new Set((applications || []).map((app) => app.id)) + for (const app of defaultRuleApps || []) { + if (!seenAppIds.has(app.id)) { + applications.push(app) + seenAppIds.add(app.id) + } + } + } logger.info(`Reissuing account JWTs for rule ${accountRuleId}`) for (const app of applications) { const account = await NatsAccountManager.findOne({ applicationId: app.id }, transaction) @@ -1034,7 +1093,11 @@ async function reissueForAccountRule (accountRuleId, transaction) { await NatsAccountManager.update({ id: relayAccount.id }, { jwt: accountJwt }, transaction) } } - _triggerResolverArtifactsReconcile({ reason: 'account-rule-updated', accountRuleId }) + await _enqueueNatsReconcileOutbox({ + reason: 'account-rule-updated', + accountRuleId, + mutationKind: 'rule-content-update' + }, transaction) } /** @@ -1090,7 +1153,18 @@ async function _reissueOneUserForRule (user, userRuleId, operatorKp, transaction } async function reissueForUserRule (userRuleId, transaction) { + const userRule = await NatsUserRuleManager.findOne({ id: userRuleId }, transaction) const microservices = await MicroserviceManager.findAll({ natsRuleId: userRuleId }, transaction) + if (userRule && userRule.name === NatsSystemRules.MICROSERVICE_USER_RULE_NAME) { + const defaultRuleMicroservices = await MicroserviceManager.findAll({ natsAccess: true, natsRuleId: null }, transaction) + const seenMsUuids = new Set((microservices || []).map((ms) => ms.uuid)) + for (const ms of defaultRuleMicroservices || []) { + if (!seenMsUuids.has(ms.uuid)) { + microservices.push(ms) + seenMsUuids.add(ms.uuid) + } + } + } logger.info(`Reissuing user JWTs for rule ${userRuleId}`) const operator = await ensureOperator(transaction) const operatorSeed = await _loadSeedFromSecret(operator.seedSecretName, transaction) @@ -1110,10 +1184,15 @@ async function reissueForUserRule (userRuleId, transaction) { await _reissueOneUserForRule(user, userRuleId, operatorKp, transaction) processedUserIds.add(user.id) } - _triggerResolverArtifactsReconcile({ reason: 'user-rule-updated', userRuleId }) + await _enqueueNatsReconcileOutbox({ + reason: 'user-rule-updated', + userRuleId, + mutationKind: 'rule-content-update' + }, transaction) } -async function revokeMicroserviceUser (microserviceUuid, transaction) { +async function revokeMicroserviceUser (microserviceUuid, transaction, ...rest) { + const options = _triggerOptionsFromArgs(rest) const user = await NatsUserManager.findOne({ microserviceUuid }, transaction) if (!user) { return @@ -1151,10 +1230,18 @@ async function revokeMicroserviceUser (microserviceUuid, transaction) { // best-effort secret cleanup } await NatsUserManager.delete({ id: user.id }, transaction) - _triggerResolverArtifactsReconcile({ reason: 'account-created', applicationId: account.applicationId }) + if (options.triggerReconcile !== false) { + await _enqueueNatsReconcileOutbox({ + reason: 'account-created', + applicationId: account.applicationId, + microserviceUuid, + mutationKind: 'access-disable' + }, transaction) + } } -async function deleteAccountForApplication (applicationId, transaction) { +async function deleteAccountForApplication (applicationId, transaction, ...rest) { + const options = _triggerOptionsFromArgs(rest) const account = await NatsAccountManager.findOne({ applicationId }, transaction) if (!account) { return @@ -1176,7 +1263,14 @@ async function deleteAccountForApplication (applicationId, transaction) { // best-effort cleanup } await NatsAccountManager.delete({ id: account.id }, transaction) - _triggerResolverArtifactsReconcile({ reason: 'account-deleted', applicationId }) + if (options.triggerReconcile !== false) { + await _enqueueNatsReconcileOutbox({ + reason: 'account-deleted', + applicationId, + mutationKind: options.mutationKind || 'access-disable', + ...options + }, transaction) + } } async function revokeUserByAccountAndName (accountId, userName, transaction) { @@ -1233,48 +1327,47 @@ async function revokeUserByAccountAndName (accountId, userName, transaction) { // best-effort secret cleanup } await NatsUserManager.delete({ id: user.id }, transaction) - _triggerResolverArtifactsReconcile( - account.applicationId != null ? { reason: 'account-created', applicationId: account.applicationId } : {} + await _enqueueNatsReconcileOutbox( + account.applicationId != null ? { reason: 'account-created', applicationId: account.applicationId } : {}, + transaction ) } function scheduleRotateOperator () { - _runBackgroundTask('rotate-operator', async () => { - await module.exports.rotateOperator() + _runBackgroundTask('rotate-operator', async (transaction) => { + await module.exports.rotateOperator(transaction) }) return { scheduled: true } } function scheduleReissueForAccountRule (accountRuleId) { - _runBackgroundTask(`reissue-account-rule-${accountRuleId}`, async () => { - await module.exports.reissueForAccountRule(accountRuleId) + _runBackgroundTask(`reissue-account-rule-${accountRuleId}`, async (transaction) => { + await module.exports.reissueForAccountRule(accountRuleId, transaction) }) - _triggerResolverArtifactsReconcile({ reason: 'account-rule-updated', accountRuleId }) return { scheduled: true } } function scheduleReissueForUserRule (userRuleId) { - _runBackgroundTask(`reissue-user-rule-${userRuleId}`, async () => { - await module.exports.reissueForUserRule(userRuleId) + _runBackgroundTask(`reissue-user-rule-${userRuleId}`, async (transaction) => { + await module.exports.reissueForUserRule(userRuleId, transaction) }) - _triggerResolverArtifactsReconcile({ reason: 'user-rule-updated', userRuleId }) return { scheduled: true } } function scheduleReissueAccountsForApplications (applicationIds = []) { - _runBackgroundTask(`reissue-accounts-${applicationIds.length}`, async () => { + _runBackgroundTask(`reissue-accounts-${applicationIds.length}`, async (transaction) => { for (const applicationId of applicationIds) { - await module.exports.reissueAccountForApplication(applicationId) + await module.exports.reissueAccountForApplication(applicationId, transaction) } }) return { scheduled: true } } function scheduleReissueUsersForMicroservices (microserviceUuids = []) { - _runBackgroundTask(`reissue-users-${microserviceUuids.length}`, async () => { + _runBackgroundTask(`reissue-users-${microserviceUuids.length}`, async (transaction) => { for (const microserviceUuid of microserviceUuids) { const reconcileTriggerOptions = { triggerReconcile: false } - await module.exports.reissueUserForMicroservice(microserviceUuid, reconcileTriggerOptions) + await module.exports.reissueUserForMicroservice(microserviceUuid, transaction, reconcileTriggerOptions) } }) return { scheduled: true } @@ -1316,5 +1409,6 @@ module.exports = { scheduleReissueForAccountRule, scheduleReissueForUserRule, scheduleReissueAccountsForApplications, - scheduleReissueUsersForMicroservices + scheduleReissueUsersForMicroservices, + enqueueNatsReconcileOutbox: _enqueueNatsReconcileOutbox } diff --git a/src/services/nats-relay-connection-manager.js b/src/services/nats-relay-connection-manager.js index a85a0d25..2b2d139c 100644 --- a/src/services/nats-relay-connection-manager.js +++ b/src/services/nats-relay-connection-manager.js @@ -8,6 +8,7 @@ const NatsAccountManager = require('../data/managers/nats-account-manager') const NatsUserManager = require('../data/managers/nats-user-manager') const NatsAuthService = require('./nats-auth-service') const SecretService = require('./secret-service') +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') const NATS_DEFAULT_PORT = 4222 @@ -16,7 +17,6 @@ class NatsRelayConnectionManager { this._connectFn = deps.connectFn || connect this._config = deps.config || config this.maxReconnectAttempts = deps.maxReconnectAttempts ?? -1 - this.fakeTransaction = { fakeTransaction: true } this.connection = null this.connectionPromise = null this.cachedHubRecord = null @@ -210,7 +210,10 @@ class NatsRelayConnectionManager { if (this.cachedHubRecord) { return this.cachedHubRecord } - const hub = await NatsInstanceManager.findOne({ isHub: true }, this.fakeTransaction) + const hub = await runInTransaction( + (transaction) => NatsInstanceManager.findOne({ isHub: true }, transaction), + { priority: PRIORITY_BACKGROUND, label: 'nats-relay-hub-record' } + ) if (!hub) { throw new Error('NATS hub not found. Ensure a hub NatsInstances row with isHub=true exists.') } @@ -240,53 +243,59 @@ class NatsRelayConnectionManager { } async _ensureControllerNatsAccount () { - const hub = await NatsInstanceManager.findOne({ isHub: true }, this.fakeTransaction) - if (!hub) { - return - } - await NatsAuthService.ensureControllerNatsAccount() + await runInTransaction(async (transaction) => { + const hub = await NatsInstanceManager.findOne({ isHub: true }, transaction) + if (!hub) { + return + } + await NatsAuthService.ensureControllerNatsAccount(transaction, { triggerReconcile: false }) + }, { priority: PRIORITY_BACKGROUND, label: 'nats-relay-ensure-controller-account' }) } async _fetchControllerRelayCreds () { - const account = await NatsAccountManager.findOne({ - name: NatsAuthService.CONTROLLER_NATS_ACCOUNT_NAME, - applicationId: null, - isSystem: false, - isLeafSystem: false - }, this.fakeTransaction) - - let credsSecretName = NatsAuthService.controllerNatsCredsSecretName() - if (account) { - const user = await NatsUserManager.findOne({ - accountId: account.id, - name: NatsAuthService.CONTROLLER_NATS_USER_NAME - }, this.fakeTransaction) - if (user && user.credsSecretName) { - credsSecretName = user.credsSecretName + return runInTransaction(async (transaction) => { + const foundAccount = await NatsAccountManager.findOne({ + name: NatsAuthService.CONTROLLER_NATS_ACCOUNT_NAME, + applicationId: null, + isSystem: false, + isLeafSystem: false + }, transaction) + + let foundUser = null + if (foundAccount) { + foundUser = await NatsUserManager.findOne({ + accountId: foundAccount.id, + name: NatsAuthService.CONTROLLER_NATS_USER_NAME + }, transaction) } - } - const secret = await this._safeGetSecret(credsSecretName) - if (!secret || !secret.data) { - throw new Error(`Controller relay NATS creds secret not found: ${credsSecretName}`) - } + let credsSecretName = NatsAuthService.controllerNatsCredsSecretName() + if (foundUser && foundUser.credsSecretName) { + credsSecretName = foundUser.credsSecretName + } - const credsKey = Object.keys(secret.data).find((key) => key.endsWith('.creds')) || 'creds' - const raw = secret.data[credsKey] - if (!raw) { - throw new Error(`Missing creds payload in secret ${credsSecretName}`) - } + const secret = await this._safeGetSecret(credsSecretName, transaction) + if (!secret || !secret.data) { + throw new Error(`Controller relay NATS creds secret not found: ${credsSecretName}`) + } + + const credsKey = Object.keys(secret.data).find((key) => key.endsWith('.creds')) || 'creds' + const raw = secret.data[credsKey] + if (!raw) { + throw new Error(`Missing creds payload in secret ${credsSecretName}`) + } - const credsText = typeof raw === 'string' - ? raw - : Buffer.from(raw, 'base64').toString('utf8') + const credsText = typeof raw === 'string' + ? raw + : Buffer.from(raw, 'base64').toString('utf8') - return new TextEncoder().encode(credsText) + return new TextEncoder().encode(credsText) + }, { priority: PRIORITY_BACKGROUND, label: 'nats-relay-fetch-creds-db' }) } - async _safeGetSecret (name) { + async _safeGetSecret (name, transaction) { try { - return await SecretService.getSecretEndpoint(name) + return await SecretService.getSecretEndpoint(name, transaction) } catch (error) { if (error.name === 'NotFoundError') { logger.debug({ secret: name }, '[NATS][RELAY] Secret not found') diff --git a/src/services/nats-relay-transport-impl.js b/src/services/nats-relay-transport-impl.js index 3929b488..babb747a 100644 --- a/src/services/nats-relay-transport-impl.js +++ b/src/services/nats-relay-transport-impl.js @@ -110,6 +110,27 @@ class NatsRelayTransportImpl { return this.execBridges.has(execId) } + setExecUserDeliveryHook (execId, hook) { + const bridge = this.execBridges.get(execId) + if (bridge) { + bridge.onUserRelayDelivery = hook + } + } + + setExecAgentDeliveryHook (execId, hook) { + const bridge = this.execBridges.get(execId) + if (bridge) { + bridge.onAgentRelayDelivery = hook + } + } + + setLogUserDeliveryHook (sessionId, hook) { + const bridge = this.logBridges.get(sessionId) + if (bridge) { + bridge.onUserRelayDelivery = hook + } + } + async publishToAgent (execId, buffer, options = {}) { await this._publishExec(execId, execAgentSubject(execId), buffer, options) } @@ -391,6 +412,26 @@ class NatsRelayTransportImpl { side, messageSize: body.length }) + if (side === 'user' && currentBridge.onUserRelayDelivery) { + try { + currentBridge.onUserRelayDelivery(body) + } catch (error) { + logger.warn('[NATS][RELAY] Exec user relay delivery hook failed', { + execId: bridge.execId, + error: error.message + }) + } + } + if (side === 'agent' && currentBridge.onAgentRelayDelivery) { + try { + currentBridge.onAgentRelayDelivery(body) + } catch (error) { + logger.warn('[NATS][RELAY] Exec agent relay delivery hook failed', { + execId: bridge.execId, + error: error.message + }) + } + } } else { logger.debug('[NATS][RELAY] No socket available for exec delivery', { execId: bridge.execId, @@ -413,6 +454,10 @@ class NatsRelayTransportImpl { closeAck }) + if (closeAck) { + return + } + if (ws && ws.readyState === WebSocket.OPEN) { try { const reason = closeInitiator === 'agent' ? 'Agent closed connection' : 'User closed connection' @@ -424,9 +469,18 @@ class NatsRelayTransportImpl { error: error.message }) } + } else if (bridge && bridge.cleanupCallback) { + try { + await bridge.cleanupCallback(execId) + } catch (error) { + logger.error('[NATS][RELAY] Error in cleanup callback during CLOSE handling', { + execId, + error: error.message + }) + } } - if (!closeAck && this.execBridges.has(execId)) { + if (this.execBridges.has(execId)) { const ackSide = side === 'user' ? 'agent' : 'user' try { const hdrs = natsHeaders() @@ -442,17 +496,6 @@ class NatsRelayTransportImpl { }) } } - - if (bridge && bridge.cleanupCallback) { - try { - await bridge.cleanupCallback(execId) - } catch (error) { - logger.error('[NATS][RELAY] Error in cleanup callback during CLOSE handling', { - execId, - error: error.message - }) - } - } } async _ensureLogUserSubscription (bridge, userWs, nc) { @@ -525,6 +568,16 @@ class NatsRelayTransportImpl { ws.send(body, { binary: true }) currentBridge.pendingBytes = Math.max(0, currentBridge.pendingBytes - body.length) currentBridge.pendingMessages = Math.max(0, currentBridge.pendingMessages - 1) + if (currentBridge.onUserRelayDelivery) { + try { + currentBridge.onUserRelayDelivery(body) + } catch (error) { + logger.warn('[NATS][RELAY] Log user relay delivery hook failed', { + sessionId: bridge.sessionId, + error: error.message + }) + } + } } else { currentBridge.pendingBytes = Math.max(0, currentBridge.pendingBytes - body.length) currentBridge.pendingMessages = Math.max(0, currentBridge.pendingMessages - 1) diff --git a/src/services/nats-relay-transport.js b/src/services/nats-relay-transport.js index e7d47e51..4fc3a432 100644 --- a/src/services/nats-relay-transport.js +++ b/src/services/nats-relay-transport.js @@ -50,6 +50,24 @@ class NatsRelayTransport extends WsRelayTransport { return this._impl.shouldUseRelay(execId) } + setExecUserDeliveryHook (execId, hook) { + if (typeof this._impl.setExecUserDeliveryHook === 'function') { + this._impl.setExecUserDeliveryHook(execId, hook) + } + } + + setExecAgentDeliveryHook (execId, hook) { + if (typeof this._impl.setExecAgentDeliveryHook === 'function') { + this._impl.setExecAgentDeliveryHook(execId, hook) + } + } + + setLogUserDeliveryHook (sessionId, hook) { + if (typeof this._impl.setLogUserDeliveryHook === 'function') { + this._impl.setLogUserDeliveryHook(sessionId, hook) + } + } + shouldUseRelayForLogs (sessionId) { return this._impl.shouldUseRelayForLogs(sessionId) } diff --git a/src/services/nats-service.js b/src/services/nats-service.js index c33d88dd..7a52447c 100644 --- a/src/services/nats-service.js +++ b/src/services/nats-service.js @@ -2,6 +2,7 @@ const fs = require('fs') const path = require('path') const crypto = require('crypto') const AppHelper = require('../helpers/app-helper') +const { isTest } = AppHelper const Errors = require('../helpers/errors') const ErrorMessages = require('../helpers/error-messages') const ConfigMapManager = require('../data/managers/config-map-manager') @@ -22,17 +23,27 @@ const NatsInstanceManager = require('../data/managers/nats-instance-manager') const NatsConnectionManager = require('../data/managers/nats-connection-manager') const NatsAccountManager = require('../data/managers/nats-account-manager') const NatsReconcileTaskManager = require('../data/managers/nats-reconcile-task-manager') +const ReconcileOutboxManager = require('../data/managers/reconcile-outbox-manager') const NatsUserManager = require('../data/managers/nats-user-manager') const ApplicationManager = require('../data/managers/application-manager') +const NatsAccountRuleManager = require('../data/managers/nats-account-rule-manager') +const NatsUserRuleManager = require('../data/managers/nats-user-rule-manager') const NatsAuthService = require('./nats-auth-service') +const NatsSystemRules = require('../config/nats-system-rules') const ChangeTrackingService = require('./change-tracking-service') const MicroservicesService = require('./microservices-service') const FogManager = require('../data/managers/iofog-manager') -const databaseProvider = require('../data/providers/database-factory') const config = require('../config') const Constants = require('../helpers/constants') const { ensureSystemApplication, getSystemMicroserviceName, slugifyName } = require('../helpers/system-naming') const TransactionDecorator = require('../decorators/transaction-decorator') +const { isSequelizeTransaction } = require('../helpers/sequelize-transaction') +const { + runInTransaction, + PRIORITY_INTERACTIVE, + PRIORITY_BACKGROUND, + getActiveTransaction +} = require('../helpers/transaction-runner') const { buildNatsServerCertificateHostList, buildNatsMqttCertificateHostList @@ -584,7 +595,7 @@ async function _computeClusterRoutesForInstance (natsInstance, transaction) { return routes } -async function _patchK8sHubConfigMapClusterRoutes (desiredControllerRoutes, transaction) { +async function _patchK8sHubConfigMapClusterRoutesExternal (desiredControllerRoutes) { const configMap = await K8sClient.getConfigMap(K8S_NATS_SERVER_CONFIG_MAP, { ignoreNotFound: true }) if (!configMap || !configMap.data) { logger.debug(`Hub ConfigMap ${K8S_NATS_SERVER_CONFIG_MAP} not found or empty (expected before operator creates it)`) @@ -614,6 +625,89 @@ async function _patchK8sHubConfigMapClusterRoutes (desiredControllerRoutes, tran await K8sClient.patchConfigMap(K8S_NATS_SERVER_CONFIG_MAP, { data: { [configKey]: newContent } }, { ignoreNotFound: true }) } +async function _patchK8sJwtBundleExternal (fullServerJwtBundle) { + const existing = await K8sClient.getConfigMap(K8S_NATS_JWT_BUNDLE_CONFIG_MAP, { ignoreNotFound: true }) + const existingData = existing && existing.data ? existing.data : null + const newHash = _configMapDataHash(fullServerJwtBundle) + const unchanged = existingData && _configMapDataHash(existingData) === newHash + if (!unchanged) { + await K8sClient.patchConfigMap(K8S_NATS_JWT_BUNDLE_CONFIG_MAP, { data: fullServerJwtBundle }, { ignoreNotFound: true }) + } +} + +async function _rolloutNatsStatefulSetExternal () { + await K8sClient.rolloutStatefulSet('nats') +} + +function _resolveParentTransaction (maybeTransaction, explicitlyPassed) { + if (explicitlyPassed && isSequelizeTransaction(maybeTransaction)) { + return maybeTransaction + } + const active = getActiveTransaction() + if (active) { + return active + } + if (isTest() && explicitlyPassed && maybeTransaction != null && typeof maybeTransaction === 'object') { + return maybeTransaction + } + return null +} + +function _scheduleK8sAfterCommit (transaction, fn) { + const run = () => Promise.resolve(fn()).catch((err) => { + logger.warn(`Deferred NATS K8s work failed: ${err.message}`) + }) + + if (transaction && typeof transaction.afterCommit === 'function') { + transaction.afterCommit(run) + return + } + + if (isTest()) { + return run() + } +} + +async function _applyEnsureNatsK8sExternal (k8sHubPatch) { + if (!k8sHubPatch) { + return + } + try { + await _patchK8sHubConfigMapClusterRoutesExternal(k8sHubPatch) + } catch (err) { + logger.warn(`Failed to patch Kubernetes NATS hub ConfigMap cluster routes: ${err.message}`) + } +} + +async function _applyCleanupNatsK8sExternal (k8sCleanup) { + if (!k8sCleanup) { + return + } + try { + await _patchK8sHubConfigMapClusterRoutesExternal(k8sCleanup.desiredControllerRoutes) + if (k8sCleanup.rollout) { + try { + await _rolloutNatsStatefulSetExternal() + } catch (rolloutErr) { + logger.warn(`Failed to rollout NATS StatefulSet after hub ConfigMap patch: ${rolloutErr.message}`) + } + } + } catch (err) { + logger.warn(`Failed to patch Kubernetes NATS hub ConfigMap cluster routes after cleanup: ${err.message}`) + } +} + +async function _applyK8sJwtBundleExternal (fullServerJwtBundle) { + if (!fullServerJwtBundle) { + return + } + try { + await _patchK8sJwtBundleExternal(fullServerJwtBundle) + } catch (err) { + logger.warn(`Failed to patch Kubernetes NATS hub JWT bundle ConfigMap: ${err.message}`) + } +} + function _clusterConfigRequiresRebuild (oldRoutes, newRoutes) { const oldLen = (oldRoutes && oldRoutes.length) || 0 const newLen = (newRoutes && newRoutes.length) || 0 @@ -889,7 +983,17 @@ async function _removeLeafOnlyArtifactsForFog (fog, microservice, transaction) { await NatsAuthService.deleteLeafSystemArtifactsForFog(fog, transaction) } -async function ensureNatsForFog (fog, natsConfig, transaction) { +async function ensureNatsForFogCertPrepDb (fog, natsConfig, transaction) { + const mode = (natsConfig && natsConfig.mode) || 'leaf' + if (mode === 'none') { + return null + } + const { serverCertName, mqttCertName } = await _ensureNatsCertificates(fog, transaction) + const jetstreamKey = await _ensureJetstreamKey(fog, transaction) + return { serverCertName, mqttCertName, jetstreamKey } +} + +async function _resolveNatsEnsureContext (fog, natsConfig, transaction) { const mode = (natsConfig && natsConfig.mode) || 'leaf' if (mode === 'none') { return null @@ -912,13 +1016,33 @@ async function ensureNatsForFog (fog, natsConfig, transaction) { const mqttPort = (natsConfig && natsConfig.mqttPort) || DEFAULT_MQTT_PORT const httpPort = (natsConfig && natsConfig.httpPort) || DEFAULT_HTTP_PORT - const { serverCertName, mqttCertName } = await _ensureNatsCertificates(fog, transaction) - const configMapName = natsConfigMapName(fog) - const configKey = NATS_CONFIG_KEY - const template = !isLeaf ? readTemplate('server.conf') : readTemplate('leaf.conf') - const certName = serverCertName + return { + mode, + isHub, + isLeaf, + serverPort, + leafPort, + clusterPort, + mqttPort, + httpPort, + configMapName: natsConfigMapName(fog), + configKey: NATS_CONFIG_KEY, + template: !isLeaf ? readTemplate('server.conf') : readTemplate('leaf.conf'), + jwtBundleConfigMapName: isLeaf ? natsJwtBundleConfigMap(fog) : K8S_NATS_JWT_BUNDLE_CONFIG_MAP + } +} + +async function ensureNatsForFogAuthPrepDb (fog, natsConfig, prep, transaction) { + if (!prep) { + return null + } + const ctx = await _resolveNatsEnsureContext(fog, natsConfig, transaction) + if (!ctx) { + return null + } + + const { isHub, isLeaf, jwtBundleConfigMapName } = ctx - const jwtBundleConfigMapName = isLeaf ? natsJwtBundleConfigMap(fog) : K8S_NATS_JWT_BUNDLE_CONFIG_MAP if (isLeaf) { const jwtBundle = await _buildJwtBundle(fog, true, transaction) await _ensureConfigMap(natsJwtBundleConfigMap(fog), jwtBundle, transaction) @@ -927,11 +1051,44 @@ async function ensureNatsForFog (fog, natsConfig, transaction) { await _ensureConfigMap(K8S_NATS_JWT_BUNDLE_CONFIG_MAP, fullJwtBundle, transaction) } + let sysCredsSecretName = null + if (isHub) { + const { user: hubSysUser } = await NatsAuthService.ensureSysUserForServer({ isHub: true }, transaction) + sysCredsSecretName = hubSysUser.credsSecretName + } else if (!isLeaf) { + const { user: serverSysUser } = await NatsAuthService.ensureSysUserForServer({ isHub: false, fog }, transaction) + sysCredsSecretName = serverSysUser.credsSecretName + } + + return { ...ctx, sysCredsSecretName, jwtBundleConfigMapName } +} + +async function ensureNatsForFogTopologyDb (fog, natsConfig, prep, authCtx, transaction) { + if (!prep || !authCtx) { + return null + } + + const { serverCertName, mqttCertName, jetstreamKey } = prep + const { + mode, + isHub, + isLeaf, + serverPort, + leafPort, + clusterPort, + mqttPort, + httpPort, + configMapName, + configKey, + template, + jwtBundleConfigMapName, + sysCredsSecretName + } = authCtx + const certName = serverCertName + const microserviceResult = await _ensureNatsMicroservice(fog, mode, transaction) const microservice = microserviceResult let anyVolumeMappingCreated = !!(microservice && microservice._volumeMappingCreated) - - const jetstreamKey = await _ensureJetstreamKey(fog, transaction) const sysAccountName = isLeaf ? NatsAuthService.leafSystemAccountName(fog) : NatsAuthService.SYSTEM_ACCOUNT_NAME const sysUserName = isLeaf ? NatsAuthService.leafSystemAccountUserName(fog) : NatsAuthService.sysUserNameForServer(isHub, fog) const sysCredPath = `${NATS_CREDS_DIR}/${slugifyName(sysAccountName)}/${slugifyName(sysUserName)}.creds` @@ -1050,13 +1207,9 @@ async function ensureNatsForFog (fog, natsConfig, transaction) { transaction ) + let k8sHubPatch = null if (_isKubernetesControlPlane() && !savedInstance.isLeaf && savedInstance.host) { - try { - const desiredControllerRoutes = await _getControllerManagedClusterRoutes(transaction) - await _patchK8sHubConfigMapClusterRoutes(desiredControllerRoutes, transaction) - } catch (err) { - logger.warn(`Failed to patch Kubernetes NATS hub ConfigMap cluster routes: ${err.message}`) - } + k8sHubPatch = await _getControllerManagedClusterRoutes(transaction) } if (!savedInstance.isLeaf) { @@ -1072,7 +1225,7 @@ async function ensureNatsForFog (fog, natsConfig, transaction) { const otherInstances = (allServerInstancesNow || []).filter(i => i.id !== savedInstance.id) if (otherInstances.length > 0) { const otherFogUuids = otherInstances.map((i) => i.iofogUuid).filter(Boolean) - await enqueueReconcileTask({ reason: 'cluster-routes-changed', fogUuids: otherFogUuids }, transaction) + await ReconcileOutboxManager.enqueueNats({ reason: 'cluster-routes-changed', fogUuids: otherFogUuids }, transaction) } } @@ -1091,28 +1244,20 @@ async function ensureNatsForFog (fog, natsConfig, transaction) { anyVolumeMappingCreated = (await _ensureVolumeMapping(microservice.uuid, certName, `${NATS_CERTS_DIR}/${certName}`, 'ro', 'volumeMount', transaction)) || anyVolumeMappingCreated anyVolumeMappingCreated = (await _ensureVolumeMapping(microservice.uuid, mqttCertName, `${NATS_CERTS_DIR}/${mqttCertName}`, 'ro', 'volumeMount', transaction)) || anyVolumeMappingCreated - if (isHub) { - const { user: hubSysUser } = await NatsAuthService.ensureSysUserForServer({ isHub: true }, transaction) - const credsSecretName = hubSysUser.credsSecretName - await _ensureVolumeMount(credsSecretName, { secretName: credsSecretName }, transaction) - await VolumeMountService.linkVolumeMountEndpoint(credsSecretName, [fog.uuid], transaction) - anyVolumeMappingCreated = (await _ensureVolumeMapping(microservice.uuid, credsSecretName, NATS_CREDS_DIR, 'ro', 'volumeMount', transaction)) || anyVolumeMappingCreated - } else if (!savedInstance.isLeaf) { - const { user: serverSysUser } = await NatsAuthService.ensureSysUserForServer({ isHub: false, fog }, transaction) - const credsSecretName = serverSysUser.credsSecretName - await _ensureVolumeMount(credsSecretName, { secretName: credsSecretName }, transaction) - await VolumeMountService.linkVolumeMountEndpoint(credsSecretName, [fog.uuid], transaction) - anyVolumeMappingCreated = (await _ensureVolumeMapping(microservice.uuid, credsSecretName, NATS_CREDS_DIR, 'ro', 'volumeMount', transaction)) || anyVolumeMappingCreated - } else { - const sysCredsSecretName = await _getSysUserCredsSecretNameForFog(fog, false, transaction) - if (sysCredsSecretName) { + if (sysCredsSecretName) { + await _ensureVolumeMount(sysCredsSecretName, { secretName: sysCredsSecretName }, transaction) + await VolumeMountService.linkVolumeMountEndpoint(sysCredsSecretName, [fog.uuid], transaction) + anyVolumeMappingCreated = (await _ensureVolumeMapping(microservice.uuid, sysCredsSecretName, NATS_CREDS_DIR, 'ro', 'volumeMount', transaction)) || anyVolumeMappingCreated + } else if (isLeaf) { + const leafSysCredsSecretName = await _getSysUserCredsSecretNameForFog(fog, false, transaction) + if (leafSysCredsSecretName) { await VolumeMappingManager.delete({ microserviceUuid: microservice.uuid, - hostDestination: sysCredsSecretName, + hostDestination: leafSysCredsSecretName, type: 'volumeMount' }, transaction) try { - await VolumeMountService.unlinkVolumeMountEndpoint(sysCredsSecretName, [fog.uuid], transaction) + await VolumeMountService.unlinkVolumeMountEndpoint(leafSysCredsSecretName, [fog.uuid], transaction) } catch (err) { if (err.name !== 'NotFoundError') { throw err @@ -1130,10 +1275,69 @@ async function ensureNatsForFog (fog, natsConfig, transaction) { } await ChangeTrackingService.update(fog.uuid, ChangeTrackingService.events.microserviceList, transaction) - return microservice + return { microservice, k8sHubPatch } +} + +/** @deprecated Use phased ensure; kept for grep gates and direct unit tests */ +async function ensureNatsForFogDbMutation (fog, natsConfig, prep, transaction) { + const authCtx = await ensureNatsForFogAuthPrepDb(fog, natsConfig, prep, transaction) + if (!authCtx) { + return null + } + return ensureNatsForFogTopologyDb(fog, natsConfig, prep, authCtx, transaction) +} + +/** @deprecated Use phased ensure; kept for grep gates and direct unit tests */ +async function ensureNatsForFogDb (fog, natsConfig, transaction) { + const prep = await ensureNatsForFogCertPrepDb(fog, natsConfig, transaction) + if (!prep) { + return null + } + return ensureNatsForFogDbMutation(fog, natsConfig, prep, transaction) +} + +async function _ensureNatsForFogPhased (fog, natsConfig, { priority = PRIORITY_INTERACTIVE } = {}) { + const mode = (natsConfig && natsConfig.mode) || 'leaf' + if (mode === 'none') { + return null + } + logger.info(`NATS ensure certPrep starting for fog ${fog.uuid}`) + const prep = await runInTransaction( + (transaction) => ensureNatsForFogCertPrepDb(fog, natsConfig, transaction), + { priority, label: 'nats.ensure.certPrep' } + ) + if (!prep) { + return null + } + logger.info(`NATS ensure authPrep starting for fog ${fog.uuid}`) + const authCtx = await runInTransaction( + (transaction) => ensureNatsForFogAuthPrepDb(fog, natsConfig, prep, transaction), + { priority, label: 'nats.ensure.authPrep' } + ) + if (!authCtx) { + return null + } + logger.info(`NATS ensure topology starting for fog ${fog.uuid}`) + const result = await runInTransaction( + (transaction) => ensureNatsForFogTopologyDb(fog, natsConfig, prep, authCtx, transaction), + { priority, label: 'nats.ensure.topology' } + ) + await _applyEnsureNatsK8sExternal(result && result.k8sHubPatch) + return result && result.microservice +} + +async function ensureNatsForFogPhased (fog, natsConfig) { + return _ensureNatsForFogPhased(fog, natsConfig, { priority: PRIORITY_BACKGROUND }) +} + +async function ensureNatsForFog (...args) { + const fog = args[0] + const natsConfig = args[1] + // Parent transaction arg ignored — phased short txs (Plan 19-I-B / R131) + return _ensureNatsForFogPhased(fog, natsConfig, { priority: PRIORITY_INTERACTIVE }) } -async function cleanupNatsForFog (fog, transaction) { +async function cleanupNatsForFogDb (fog, transaction) { const natsInstance = await NatsInstanceManager.findByFog(fog.uuid, transaction) const mountNames = [ natsConfigMapName(fog), @@ -1181,28 +1385,22 @@ async function cleanupNatsForFog (fog, transaction) { const wasLeaf = !!(natsInstance && natsInstance.isLeaf) const wasServer = !!(natsInstance && !natsInstance.isLeaf) const wasHub = !!(natsInstance && natsInstance.isHub) + let k8sCleanup = null if (natsInstance) { await NatsConnectionManager.delete({ sourceNats: natsInstance.id }, transaction) await NatsConnectionManager.delete({ destNats: natsInstance.id }, transaction) await NatsInstanceManager.delete({ id: natsInstance.id }, transaction) if (_isKubernetesControlPlane() && !natsInstance.isLeaf) { - try { - const desiredControllerRoutes = await _getControllerManagedClusterRoutes(transaction) - await _patchK8sHubConfigMapClusterRoutes(desiredControllerRoutes, transaction) - try { - await K8sClient.rolloutStatefulSet('nats') - } catch (rolloutErr) { - logger.warn(`Failed to rollout NATS StatefulSet after hub ConfigMap patch: ${rolloutErr.message}`) - } - } catch (err) { - logger.warn(`Failed to patch Kubernetes NATS hub ConfigMap cluster routes after cleanup: ${err.message}`) + k8sCleanup = { + desiredControllerRoutes: await _getControllerManagedClusterRoutes(transaction), + rollout: true } } if (!natsInstance.isLeaf) { const remainingServers = await NatsInstanceManager.findAll({ isLeaf: false }, transaction) if (remainingServers && remainingServers.length > 0) { const remainingFogUuids = remainingServers.map((s) => s.iofogUuid).filter(Boolean) - await enqueueReconcileTask({ reason: 'server-deleted', fogUuids: remainingFogUuids }, transaction) + await ReconcileOutboxManager.enqueueNats({ reason: 'server-deleted', fogUuids: remainingFogUuids }, transaction) } } } @@ -1265,6 +1463,32 @@ async function cleanupNatsForFog (fog, transaction) { if (wasServer) { await NatsAuthService.deleteServerSysUserForFog(fog, wasHub, transaction) } + + return { k8sCleanup } +} + +async function _cleanupNatsForFogPhased (fog, { priority = PRIORITY_INTERACTIVE } = {}) { + const result = await runInTransaction( + (transaction) => cleanupNatsForFogDb(fog, transaction), + { priority, label: 'nats.cleanupForFog' } + ) + await _applyCleanupNatsK8sExternal(result.k8sCleanup) +} + +async function cleanupNatsForFogPhased (fog) { + return _cleanupNatsForFogPhased(fog, { priority: PRIORITY_BACKGROUND }) +} + +async function cleanupNatsForFog (...args) { + const fog = args[0] + const parentTx = _resolveParentTransaction(args[1], args.length > 1) + if (parentTx) { + const result = await cleanupNatsForFogDb(fog, parentTx) + _scheduleK8sAfterCommit(parentTx, () => _applyCleanupNatsK8sExternal(result && result.k8sCleanup)) + return result + } + // No parent transaction — short tx + post-tx K8s (Plan 19-I-B / R131) + return _cleanupNatsForFogPhased(fog, { priority: PRIORITY_INTERACTIVE }) } function _getAffectedFogUuidsForApplication (applicationId, natsInstanceByFog, microservicesByFog) { @@ -1278,10 +1502,14 @@ function _getAffectedFogUuidsForApplication (applicationId, natsInstanceByFog, m return out } -function _getAffectedFogUuidsForAccountRule (accountRuleId, natsInstanceByFog, microservicesByFog, applicationsWithNatsById) { +function _getAffectedFogUuidsForAccountRule (accountRuleId, natsInstanceByFog, microservicesByFog, applicationsWithNatsById, defaultAccountRuleId) { const appIds = [] for (const [appId, app] of applicationsWithNatsById) { - if (app.natsRuleId === accountRuleId) appIds.push(appId) + if (app.natsRuleId === accountRuleId) { + appIds.push(appId) + } else if (defaultAccountRuleId && accountRuleId === defaultAccountRuleId && app.natsRuleId == null && app.natsAccess) { + appIds.push(appId) + } } const out = new Set() for (const [fogUuid, ni] of natsInstanceByFog) { @@ -1298,7 +1526,19 @@ async function _getAffectedFogUuidsForUserRule (userRuleId, natsInstanceByFog, t for (const [fogUuid, ni] of natsInstanceByFog) { if (!ni.isLeaf) out.add(fogUuid) } - const microservicesWithRule = await MicroserviceManager.findAll({ natsRuleId: userRuleId }, transaction) + const userRule = await NatsUserRuleManager.findOne({ id: userRuleId }, transaction) + let microservicesWithRule = await MicroserviceManager.findAll({ natsRuleId: userRuleId }, transaction) + if (userRule && userRule.name === NatsSystemRules.MICROSERVICE_USER_RULE_NAME) { + const defaultRuleMicroservices = await MicroserviceManager.findAll({ natsAccess: true, natsRuleId: null }, transaction) + const seenMsUuids = new Set((microservicesWithRule || []).map((ms) => ms.uuid)) + microservicesWithRule = [...(microservicesWithRule || [])] + for (const ms of defaultRuleMicroservices || []) { + if (!seenMsUuids.has(ms.uuid)) { + microservicesWithRule.push(ms) + seenMsUuids.add(ms.uuid) + } + } + } for (const ms of microservicesWithRule || []) { if (ms.iofogUuid) out.add(ms.iofogUuid) } @@ -1315,7 +1555,7 @@ async function _getAffectedFogUuidsForUserRule (userRuleId, natsInstanceByFog, t return out } -async function _reconcileResolverArtifactsOnce (options = {}, transaction) { +async function _reconcileResolverArtifactsOnceDb (options = {}, transaction) { const NatsAuthServiceRuntime = require('./nats-auth-service') const fogs = await FogManager.findAll({}, transaction) @@ -1347,13 +1587,23 @@ async function _reconcileResolverArtifactsOnce (options = {}, transaction) { let candidateFogs const fogFilter = Array.isArray(options.fogUuids) && options.fogUuids.length > 0 ? new Set(options.fogUuids) : null const reason = options.reason || 'auth-mutation' + const defaultAccountRule = await NatsAccountRuleManager.findOne({ + name: NatsSystemRules.APPLICATION_ACCOUNT_RULE_NAME + }, transaction) + const defaultAccountRuleId = defaultAccountRule ? defaultAccountRule.id : null if (fogFilter) { candidateFogs = fogs.filter((fog) => fogFilter.has(fog.uuid)) } else if ((reason === 'account-created' || reason === 'account-deleted') && options.applicationId != null) { const affected = _getAffectedFogUuidsForApplication(options.applicationId, natsInstanceByFog, microservicesByFog) candidateFogs = fogs.filter((f) => affected.has(f.uuid)) } else if (reason === 'account-rule-updated' && options.accountRuleId != null) { - const affected = _getAffectedFogUuidsForAccountRule(options.accountRuleId, natsInstanceByFog, microservicesByFog, applicationsWithNatsById) + const affected = _getAffectedFogUuidsForAccountRule( + options.accountRuleId, + natsInstanceByFog, + microservicesByFog, + applicationsWithNatsById, + defaultAccountRuleId + ) candidateFogs = fogs.filter((f) => affected.has(f.uuid)) } else if (reason === 'user-rule-updated' && options.userRuleId != null) { const affected = await _getAffectedFogUuidsForUserRule(options.userRuleId, natsInstanceByFog, transaction) @@ -1412,12 +1662,22 @@ async function _reconcileResolverArtifactsOnce (options = {}, transaction) { const fogMicroservices = microservicesByFog.get(fog.uuid) || [] if (!skipReissueForAccountDeleted) { + const affectedAppIds = new Set() for (const microservice of fogMicroservices) { if (!microservice.natsAccess || !microservice.applicationId) continue const app = applicationsWithNatsById.get(microservice.applicationId) if (!app || !app.natsAccess) continue + affectedAppIds.add(microservice.applicationId) await NatsAuthServiceRuntime.reissueUserForMicroservice(microservice.uuid, transaction, reconcileTriggerOptions) } + if (affectedAppIds.size > 0) { + const refreshedAccounts = await NatsAccountManager.findAll({ + applicationId: { [Op.in]: [...affectedAppIds] } + }, transaction) + for (const account of refreshedAccounts || []) { + accountByAppId.set(account.applicationId, account) + } + } } const natsInstance = natsInstanceByFog.get(fog.uuid) @@ -1474,18 +1734,8 @@ async function _reconcileResolverArtifactsOnce (options = {}, transaction) { } } - if (_isKubernetesControlPlane()) { - try { - const existing = await K8sClient.getConfigMap(K8S_NATS_JWT_BUNDLE_CONFIG_MAP, { ignoreNotFound: true }) - const existingData = existing && existing.data ? existing.data : null - const newHash = _configMapDataHash(fullServerJwtBundle) - const unchanged = existingData && _configMapDataHash(existingData) === newHash - if (!unchanged) { - await K8sClient.patchConfigMap(K8S_NATS_JWT_BUNDLE_CONFIG_MAP, { data: fullServerJwtBundle }, { ignoreNotFound: true }) - } - } catch (err) { - logger.warn(`Failed to patch Kubernetes NATS hub JWT bundle ConfigMap: ${err.message}`) - } + return { + fullServerJwtBundle: _isKubernetesControlPlane() ? fullServerJwtBundle : null } } @@ -1513,6 +1763,10 @@ async function _computeAffectedFogUuidsForEnqueue (options, transaction) { } } const fogUuids = fogs.map((f) => f.uuid) + const defaultAccountRule = await NatsAccountRuleManager.findOne({ + name: NatsSystemRules.APPLICATION_ACCOUNT_RULE_NAME + }, transaction) + const defaultAccountRuleId = defaultAccountRule ? defaultAccountRule.id : null if (reason === 'server-deleted' || reason === 'cluster-routes-changed') { return [] } @@ -1531,7 +1785,13 @@ async function _computeAffectedFogUuidsForEnqueue (options, transaction) { return fogUuids.filter((u) => affected.has(u)) } if (reason === 'account-rule-updated' && options.accountRuleId != null) { - const affected = _getAffectedFogUuidsForAccountRule(options.accountRuleId, natsInstanceByFog, microservicesByFog, applicationsWithNatsById) + const affected = _getAffectedFogUuidsForAccountRule( + options.accountRuleId, + natsInstanceByFog, + microservicesByFog, + applicationsWithNatsById, + defaultAccountRuleId + ) return fogUuids.filter((u) => affected.has(u)) } if (reason === 'user-rule-updated' && options.userRuleId != null) { @@ -1550,9 +1810,6 @@ function _chunkFogUuids (fogUuids, chunkSize) { } async function enqueueReconcileTask (options = {}, transaction) { - if (transaction.fakeTransaction) { - return databaseProvider.sequelize.transaction((t) => enqueueReconcileTask(options, t)) - } const reason = REASON_VALUES.includes(options.reason) ? options.reason : 'auth-mutation' const applicationId = options.applicationId != null ? options.applicationId : null const accountRuleId = options.accountRuleId != null ? options.accountRuleId : null @@ -1596,18 +1853,43 @@ async function claimNextTask (controllerUuid, stalenessSeconds) { return NatsReconcileTaskManager.claimNext(controllerUuid, stalenessSeconds) } -async function reconcileResolverArtifacts (options = {}, transaction) { +async function _reconcileResolverArtifactsDbLoop (options, transaction) { + let fullServerJwtBundle = null + do { + natsReconcilePending = false + const result = await _reconcileResolverArtifactsOnceDb(options, transaction) + if (result && result.fullServerJwtBundle) { + fullServerJwtBundle = result.fullServerJwtBundle + } + } while (natsReconcilePending) + return { fullServerJwtBundle } +} + +async function reconcileResolverArtifacts (...args) { if (natsReconcileRunning) { natsReconcilePending = true return { scheduled: true } } + const options = args[0] || {} + const maybeTransaction = args.length > 1 ? args[args.length - 1] : undefined + const parentTx = _resolveParentTransaction(maybeTransaction, args.length > 1) + natsReconcileRunning = true try { - do { - natsReconcilePending = false - await _reconcileResolverArtifactsOnce(options, transaction) - } while (natsReconcilePending) + if (parentTx) { + const result = await _reconcileResolverArtifactsDbLoop(options, parentTx) + if (result.fullServerJwtBundle) { + _scheduleK8sAfterCommit(parentTx, () => _applyK8sJwtBundleExternal(result.fullServerJwtBundle)) + } + return { scheduled: false } + } + + const result = await runInTransaction( + (transaction) => _reconcileResolverArtifactsDbLoop(options, transaction), + { priority: PRIORITY_BACKGROUND, label: 'nats.reconcileResolverArtifacts' } + ) + await _applyK8sJwtBundleExternal(result.fullServerJwtBundle) return { scheduled: false } } finally { natsReconcileRunning = false @@ -1643,17 +1925,30 @@ function scheduleResolverArtifactsReconcile (options = {}) { return { scheduled: true } } +function scheduleEnsureNatsK8sAfterCommit (transaction, k8sHubPatch) { + _scheduleK8sAfterCommit(transaction, () => _applyEnsureNatsK8sExternal(k8sHubPatch)) +} + function normalizeJetstreamSize (value, defaultValue) { return _normalizeJetstreamSize(value, defaultValue) } module.exports = { - ensureNatsForFog: TransactionDecorator.generateTransaction(ensureNatsForFog), - reconcileResolverArtifacts: TransactionDecorator.generateTransaction(reconcileResolverArtifacts), + ensureNatsForFog, + ensureNatsForFogPhased, + ensureNatsForFogDb, + ensureNatsForFogCertPrepDb, + ensureNatsForFogAuthPrepDb, + ensureNatsForFogTopologyDb, + ensureNatsForFogDbMutation, + scheduleEnsureNatsK8sAfterCommit, + reconcileResolverArtifacts, scheduleResolverArtifactsReconcile, enqueueReconcileTask: TransactionDecorator.generateTransaction(enqueueReconcileTask), claimNextTask, - cleanupNatsForFog: TransactionDecorator.generateTransaction(cleanupNatsForFog), + cleanupNatsForFog, + cleanupNatsForFogPhased, + cleanupNatsForFogDb, ensureLeafCredsForFog: TransactionDecorator.generateTransaction(ensureLeafCredsForFog), isReconcileRunning, setReconcilePending, diff --git a/src/services/network-topology-service.js b/src/services/network-topology-service.js new file mode 100644 index 00000000..7a4d8968 --- /dev/null +++ b/src/services/network-topology-service.js @@ -0,0 +1,682 @@ +const config = require('../config') +const Constants = require('../helpers/constants') +const Errors = require('../helpers/errors') +const RouterManager = require('../data/managers/router-manager') +const RouterConnectionManager = require('../data/managers/router-connection-manager') +const NatsInstanceManager = require('../data/managers/nats-instance-manager') +const NatsConnectionManager = require('../data/managers/nats-connection-manager') +const FogManager = require('../data/managers/iofog-manager') +const TransactionDecorator = require('../decorators/transaction-decorator') +const Validator = require('../schemas') +const { Op } = require('sequelize') + +function _routerModel () { + return RouterManager.getEntity() +} + +function _routerConnectionModel () { + return RouterConnectionManager.getEntity() +} + +function _natsInstanceModel () { + return NatsInstanceManager.getEntity() +} + +function _natsConnectionModel () { + return NatsConnectionManager.getEntity() +} + +const DEFAULT_LIMIT = 100 +const MAX_LIMIT = 500 +const DEFAULT_SUBGRAPH_DEPTH = 1 +const MAX_SUBGRAPH_DEPTH = 2 +const MAX_SUBGRAPH_LIMIT = 200 + +function _isKubernetesControlPlane () { + const controlPlane = process.env.CONTROL_PLANE || config.get('app.ControlPlane') + return controlPlane && String(controlPlane).toLowerCase() === 'kubernetes' +} + +function _getControlPlane () { + return _isKubernetesControlPlane() ? 'kubernetes' : 'remote' +} + +function _parseLimitOffset (query) { + let limit = DEFAULT_LIMIT + if (query.limit !== undefined && query.limit !== null && query.limit !== '') { + const parsedLimit = parseInt(query.limit, 10) + if (!isNaN(parsedLimit) && parsedLimit > 0) { + limit = Math.min(parsedLimit, MAX_LIMIT) + } + } + + let offset = 0 + if (query.offset !== undefined && query.offset !== null && query.offset !== '') { + const parsedOffset = parseInt(query.offset, 10) + if (!isNaN(parsedOffset) && parsedOffset >= 0) { + offset = parsedOffset + } + } + + return { limit, offset } +} + +async function _validateListQuery (query) { + await Validator.validate(query || {}, Validator.schemas.networkTopologyListQuery) + return _parseLimitOffset(query || {}) +} + +async function _validateSubgraphQuery (query) { + await Validator.validate(query || {}, Validator.schemas.networkTopologySubgraphQuery) + const { limit } = _parseLimitOffset(query || {}) + let depth = DEFAULT_SUBGRAPH_DEPTH + if (query.depth !== undefined && query.depth !== null && query.depth !== '') { + const parsedDepth = parseInt(query.depth, 10) + if (!isNaN(parsedDepth) && parsedDepth > 0) { + depth = Math.min(parsedDepth, MAX_SUBGRAPH_DEPTH) + } + } + const nodeLimit = Math.min(limit, MAX_SUBGRAPH_LIMIT) + return { + center: query.center, + depth, + limit: nodeLimit + } +} + +function _getRouterNodeId (router, defaultRouter) { + return (defaultRouter && router.id === defaultRouter.id) + ? Constants.DEFAULT_ROUTER_NAME + : router.iofogUuid +} + +function _getNatsNodeId (nats, defaultHub) { + return (defaultHub && nats.id === defaultHub.id) + ? Constants.DEFAULT_NATS_HUB_NAME + : nats.iofogUuid +} + +function _routerDeploymentTarget (router, defaultRouter) { + if (defaultRouter && router.id === defaultRouter.id) { + return _isKubernetesControlPlane() ? 'kubernetes' : 'remote' + } + return 'edgelet' +} + +function _natsDeploymentTarget (nats, defaultHub) { + if (defaultHub && nats.id === defaultHub.id) { + return _isKubernetesControlPlane() ? 'kubernetes' : 'remote' + } + return 'edgelet' +} + +function _routerRole (router, defaultRouter) { + if (defaultRouter && router.id === defaultRouter.id) { + return 'default' + } + return router.isEdge ? 'edge' : 'interior' +} + +function _natsRole (nats, defaultHub) { + if (defaultHub && nats.id === defaultHub.id) { + return 'hub' + } + return nats.isLeaf ? 'leaf' : 'server' +} + +function _routerMode (router) { + return router.isEdge ? 'edge' : 'interior' +} + +function _natsMode (nats) { + return nats.isLeaf ? 'leaf' : 'server' +} + +function _routerDisplayName (router, defaultRouter, fog) { + const deploymentTarget = _routerDeploymentTarget(router, defaultRouter) + if (deploymentTarget === 'kubernetes') { + return 'Kubernetes Router' + } + if (deploymentTarget === 'remote') { + return 'Default Router' + } + return fog ? fog.name : router.iofogUuid +} + +function _natsDisplayName (nats, defaultHub, fog) { + const deploymentTarget = _natsDeploymentTarget(nats, defaultHub) + if (deploymentTarget === 'kubernetes') { + return 'Kubernetes NATS Hub' + } + if (deploymentTarget === 'remote') { + return 'Default NATS Hub' + } + return fog ? fog.name : nats.iofogUuid +} + +async function _loadFogMap (iofogUuids, transaction) { + const uuids = [...new Set((iofogUuids || []).filter(Boolean))] + if (!uuids.length) { + return new Map() + } + + const fogs = await FogManager.findAll({ uuid: { [Op.in]: uuids } }, transaction) + + return new Map(fogs.map((fog) => [fog.uuid, fog])) +} + +function _formatRouterListNode (router, defaultRouter, fogMap) { + const fog = router.iofogUuid ? fogMap.get(router.iofogUuid) : null + return { + id: _getRouterNodeId(router, defaultRouter), + iofogUuid: router.iofogUuid, + fogName: fog ? fog.name : null, + host: router.host || (fog ? fog.host : null) || null, + deploymentTarget: _routerDeploymentTarget(router, defaultRouter), + displayName: _routerDisplayName(router, defaultRouter, fog), + role: _routerRole(router, defaultRouter), + mode: _routerMode(router) + } +} + +function _formatRouterDetailNode (router, defaultRouter, fogMap) { + return { + ..._formatRouterListNode(router, defaultRouter, fogMap), + messagingPort: router.messagingPort, + edgeRouterPort: router.edgeRouterPort, + interRouterPort: router.interRouterPort, + isDefault: !!(defaultRouter && router.id === defaultRouter.id) + } +} + +function _formatNatsListNode (nats, defaultHub, fogMap) { + const fog = nats.iofogUuid ? fogMap.get(nats.iofogUuid) : null + return { + id: _getNatsNodeId(nats, defaultHub), + iofogUuid: nats.iofogUuid, + fogName: fog ? fog.name : null, + host: nats.host || (fog ? fog.host : null) || null, + deploymentTarget: _natsDeploymentTarget(nats, defaultHub), + displayName: _natsDisplayName(nats, defaultHub, fog), + role: _natsRole(nats, defaultHub), + mode: _natsMode(nats) + } +} + +function _formatNatsDetailNode (nats, defaultHub, fogMap) { + return { + ..._formatNatsListNode(nats, defaultHub, fogMap), + serverPort: nats.serverPort, + leafPort: nats.leafPort, + clusterPort: nats.clusterPort, + mqttPort: nats.mqttPort, + httpPort: nats.httpPort, + jsStorageSize: nats.jsStorageSize, + jsMemoryStoreSize: nats.jsMemoryStoreSize, + isHub: !!(defaultHub && nats.id === defaultHub.id) + } +} + +function _formatRouterConnection (connection, defaultRouter) { + return { + id: connection.id, + source: _getRouterNodeId(connection.source, defaultRouter), + dest: _getRouterNodeId(connection.dest, defaultRouter) + } +} + +function _formatNatsConnection (connection, defaultHub) { + return { + id: connection.id, + source: _getNatsNodeId(connection.source, defaultHub), + dest: _getNatsNodeId(connection.dest, defaultHub) + } +} + +async function _buildRouterWhere (query, transaction) { + const where = {} + + if (query.role === 'default') { + where.isDefault = true + } else if (query.role === 'edge') { + where.isEdge = true + where.isDefault = false + } else if (query.role === 'interior') { + where.isEdge = false + where.isDefault = false + } + + if (query.deploymentTarget === 'edgelet') { + where.iofogUuid = { [Op.ne]: null } + } else if (query.deploymentTarget === 'kubernetes') { + if (!_isKubernetesControlPlane()) { + return { where: { id: -1 }, empty: true } + } + where.isDefault = true + } else if (query.deploymentTarget === 'remote') { + if (_isKubernetesControlPlane()) { + return { where: { id: -1 }, empty: true } + } + where.isDefault = true + } + + if (query.search) { + const matchingFogs = await FogManager.findAll({ name: { [Op.like]: `${query.search}%` } }, transaction) + const uuids = matchingFogs.map((fog) => fog.uuid) + if (!_applyFogUuidSearchFilter(where, uuids)) { + return { where: { id: -1 }, empty: true } + } + } + + return { where, empty: false } +} + +function _applyFogUuidSearchFilter (where, uuids) { + if (!uuids.length) { + return null + } + where.iofogUuid = where.iofogUuid + ? { [Op.and]: [where.iofogUuid, { [Op.in]: uuids }] } + : { [Op.in]: uuids } + return where +} + +async function _buildNatsWhere (query, transaction) { + const defaultHub = await NatsInstanceManager.findOne({ isHub: true }, transaction) + const where = {} + + if (query.role === 'hub') { + where.isHub = true + } else if (query.role === 'leaf') { + where.isLeaf = true + where.isHub = false + } else if (query.role === 'server') { + where.isLeaf = false + where.isHub = false + } + + if (query.deploymentTarget === 'edgelet') { + where.iofogUuid = { [Op.ne]: null } + } else if (query.deploymentTarget === 'kubernetes') { + if (!_isKubernetesControlPlane() || !defaultHub) { + return { where: { id: -1 }, empty: true } + } + where.id = defaultHub.id + } else if (query.deploymentTarget === 'remote') { + if (_isKubernetesControlPlane() || !defaultHub) { + return { where: { id: -1 }, empty: true } + } + where.id = defaultHub.id + } + + if (query.search) { + const matchingFogs = await FogManager.findAll({ name: { [Op.like]: `${query.search}%` } }, transaction) + const uuids = matchingFogs.map((fog) => fog.uuid) + if (!_applyFogUuidSearchFilter(where, uuids)) { + return { where: { id: -1 }, empty: true } + } + } + + return { where, empty: false } +} + +async function _findRouterByNodeId (nodeId, transaction) { + const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) + if (nodeId === Constants.DEFAULT_ROUTER_NAME) { + if (!defaultRouter) { + throw new Errors.NotFoundError(`Router node '${nodeId}' not found`) + } + return { router: defaultRouter, defaultRouter } + } + + const router = await RouterManager.findOne({ iofogUuid: nodeId }, transaction) + if (!router) { + throw new Errors.NotFoundError(`Router node '${nodeId}' not found`) + } + return { router, defaultRouter } +} + +async function _findNatsByNodeId (nodeId, transaction) { + const defaultHub = await NatsInstanceManager.findOne({ isHub: true }, transaction) + if (nodeId === Constants.DEFAULT_NATS_HUB_NAME) { + if (!defaultHub) { + throw new Errors.NotFoundError(`NATS node '${nodeId}' not found`) + } + return { nats: defaultHub, defaultHub } + } + + const nats = await NatsInstanceManager.findOne({ iofogUuid: nodeId }, transaction) + if (!nats) { + throw new Errors.NotFoundError(`NATS node '${nodeId}' not found`) + } + return { nats, defaultHub } +} + +async function _countRouterRoles (transaction) { + const [defaultCount, edgeCount, interiorCount] = await Promise.all([ + _routerModel().count({ where: { isDefault: true }, transaction }), + _routerModel().count({ where: { isEdge: true, isDefault: false }, transaction }), + _routerModel().count({ where: { isEdge: false, isDefault: false }, transaction }) + ]) + return { default: defaultCount, edge: edgeCount, interior: interiorCount } +} + +async function _countNatsRoles (transaction) { + const [hubCount, leafCount, serverCount] = await Promise.all([ + _natsInstanceModel().count({ where: { isHub: true }, transaction }), + _natsInstanceModel().count({ where: { isLeaf: true, isHub: false }, transaction }), + _natsInstanceModel().count({ where: { isLeaf: false, isHub: false }, transaction }) + ]) + return { hub: hubCount, leaf: leafCount, server: serverCount } +} + +function _buildSpokeGroups (connections, getNodeId, getRole, defaultNodeId) { + const groups = new Map() + for (const connection of connections || []) { + const upstreamOf = getNodeId(connection.dest) + if (upstreamOf !== defaultNodeId) { + continue + } + const role = getRole(connection.source) + const key = `${upstreamOf}:${role}` + groups.set(key, (groups.get(key) || 0) + 1) + } + + return [...groups.entries()].map(([key, count]) => { + const [upstreamOf, role] = key.split(':') + return { upstreamOf, role, count } + }) +} + +async function getSummary (_req, transaction) { + const [routerTotalNodes, routerTotalConnections, natsTotalNodes, natsTotalConnections, routerByRole, natsByRole] = await Promise.all([ + _routerModel().count({ transaction }), + _routerConnectionModel().count({ transaction }), + _natsInstanceModel().count({ transaction }), + _natsConnectionModel().count({ transaction }), + _countRouterRoles(transaction), + _countNatsRoles(transaction) + ]) + + return { + controlPlane: _getControlPlane(), + router: { + totalNodes: routerTotalNodes, + totalConnections: routerTotalConnections, + byRole: routerByRole + }, + nats: { + totalNodes: natsTotalNodes, + totalConnections: natsTotalConnections, + byRole: natsByRole + } + } +} + +async function getRouterOverview (_req, transaction) { + const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) + const interiorRouters = await RouterManager.findAll({ isDefault: false, isEdge: false }, transaction) + const fogMap = await _loadFogMap( + interiorRouters.map((router) => router.iofogUuid).concat(defaultRouter ? [defaultRouter.iofogUuid] : []), + transaction + ) + + let spokeGroups = [] + if (defaultRouter) { + const connections = await RouterConnectionManager.findAllWithRouters({ destRouter: defaultRouter.id }, transaction) + spokeGroups = _buildSpokeGroups( + connections, + (router) => _getRouterNodeId(router, defaultRouter), + (router) => _routerRole(router, defaultRouter), + Constants.DEFAULT_ROUTER_NAME + ) + } + + return { + defaultNode: defaultRouter ? _formatRouterListNode(defaultRouter, defaultRouter, fogMap) : null, + interiorNodes: interiorRouters.map((router) => _formatRouterListNode(router, defaultRouter, fogMap)), + spokeGroups + } +} + +async function getNatsOverview (_req, transaction) { + const defaultHub = await NatsInstanceManager.findOne({ isHub: true }, transaction) + const serverNodes = await NatsInstanceManager.findAll({ isLeaf: false, isHub: false }, transaction) + const fogMap = await _loadFogMap( + serverNodes.map((nats) => nats.iofogUuid).concat(defaultHub ? [defaultHub.iofogUuid] : []), + transaction + ) + + let spokeGroups = [] + if (defaultHub) { + const connections = await NatsConnectionManager.findAllWithNats({ destNats: defaultHub.id }, transaction) + spokeGroups = _buildSpokeGroups( + connections, + (nats) => _getNatsNodeId(nats, defaultHub), + (nats) => _natsRole(nats, defaultHub), + Constants.DEFAULT_NATS_HUB_NAME + ) + } + + return { + defaultNode: defaultHub ? _formatNatsListNode(defaultHub, defaultHub, fogMap) : null, + serverNodes: serverNodes.map((nats) => _formatNatsListNode(nats, defaultHub, fogMap)), + spokeGroups + } +} + +async function listRouterNodes (req, transaction) { + const { limit, offset } = await _validateListQuery(req.query) + const { where, empty } = await _buildRouterWhere(req.query || {}, transaction) + if (empty) { + return { nodes: [], total: 0, limit, offset } + } + + const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) + const { count, rows } = await _routerModel().findAndCountAll({ + where, + limit, + offset, + order: [['id', 'ASC']], + transaction + }) + + const fogMap = await _loadFogMap(rows.map((router) => router.iofogUuid), transaction) + return { + nodes: rows.map((router) => _formatRouterListNode(router, defaultRouter, fogMap)), + total: count, + limit, + offset + } +} + +async function listNatsNodes (req, transaction) { + const { limit, offset } = await _validateListQuery(req.query) + const { where, empty } = await _buildNatsWhere(req.query || {}, transaction) + if (empty) { + return { nodes: [], total: 0, limit, offset } + } + + const defaultHub = await NatsInstanceManager.findOne({ isHub: true }, transaction) + const { count, rows } = await _natsInstanceModel().findAndCountAll({ + where, + limit, + offset, + order: [['id', 'ASC']], + transaction + }) + + const fogMap = await _loadFogMap(rows.map((nats) => nats.iofogUuid), transaction) + return { + nodes: rows.map((nats) => _formatNatsListNode(nats, defaultHub, fogMap)), + total: count, + limit, + offset + } +} + +async function getRouterNode (req, transaction) { + const { router, defaultRouter } = await _findRouterByNodeId(req.params.id, transaction) + const fogMap = await _loadFogMap([router.iofogUuid], transaction) + return _formatRouterDetailNode(router, defaultRouter, fogMap) +} + +async function getNatsNode (req, transaction) { + const { nats, defaultHub } = await _findNatsByNodeId(req.params.id, transaction) + const fogMap = await _loadFogMap([nats.iofogUuid], transaction) + return _formatNatsDetailNode(nats, defaultHub, fogMap) +} + +async function getRouterNodeConnections (req, transaction) { + const { router, defaultRouter } = await _findRouterByNodeId(req.params.id, transaction) + const [upstreamConnections, downstreamConnections] = await Promise.all([ + RouterConnectionManager.findAllWithRouters({ sourceRouter: router.id }, transaction), + RouterConnectionManager.findAllWithRouters({ destRouter: router.id }, transaction) + ]) + + return { + upstream: (upstreamConnections || []).map((connection) => _formatRouterConnection(connection, defaultRouter)), + downstream: (downstreamConnections || []).map((connection) => _formatRouterConnection(connection, defaultRouter)) + } +} + +async function getNatsNodeConnections (req, transaction) { + const { nats, defaultHub } = await _findNatsByNodeId(req.params.id, transaction) + const [upstreamConnections, downstreamConnections] = await Promise.all([ + NatsConnectionManager.findAllWithNats({ sourceNats: nats.id }, transaction), + NatsConnectionManager.findAllWithNats({ destNats: nats.id }, transaction) + ]) + + return { + upstream: (upstreamConnections || []).map((connection) => _formatNatsConnection(connection, defaultHub)), + downstream: (downstreamConnections || []).map((connection) => _formatNatsConnection(connection, defaultHub)) + } +} + +async function listRouterConnections (req, transaction) { + const { limit, offset } = await _validateListQuery(req.query) + const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) + const { count, rows } = await _routerConnectionModel().findAndCountAll({ + include: [ + { model: _routerModel(), as: 'source', required: true }, + { model: _routerModel(), as: 'dest', required: true } + ], + limit, + offset, + order: [['id', 'ASC']], + transaction + }) + + return { + connections: rows.map((connection) => _formatRouterConnection(connection, defaultRouter)), + total: count, + limit, + offset + } +} + +async function listNatsConnections (req, transaction) { + const { limit, offset } = await _validateListQuery(req.query) + const defaultHub = await NatsInstanceManager.findOne({ isHub: true }, transaction) + const { count, rows } = await _natsConnectionModel().findAndCountAll({ + include: [ + { model: _natsInstanceModel(), as: 'source', required: true }, + { model: _natsInstanceModel(), as: 'dest', required: true } + ], + limit, + offset, + order: [['id', 'ASC']], + transaction + }) + + return { + connections: rows.map((connection) => _formatNatsConnection(connection, defaultHub)), + total: count, + limit, + offset + } +} + +async function _buildSubgraph (layer, centerId, depth, nodeLimit, transaction) { + const isRouter = layer === 'router' + const findByNodeId = isRouter ? _findRouterByNodeId : _findNatsByNodeId + const connectionManager = isRouter ? RouterConnectionManager : NatsConnectionManager + const sourceField = isRouter ? 'sourceRouter' : 'sourceNats' + const destField = isRouter ? 'destRouter' : 'destNats' + const findAllWith = isRouter ? 'findAllWithRouters' : 'findAllWithNats' + const formatListNode = isRouter ? _formatRouterListNode : _formatNatsListNode + const formatConnection = isRouter ? _formatRouterConnection : _formatNatsConnection + + const centerLookup = await findByNodeId(centerId, transaction) + const anchor = isRouter ? centerLookup.router : centerLookup.nats + const defaultAnchor = isRouter ? centerLookup.defaultRouter : centerLookup.defaultHub + + const nodeRecords = new Map([[anchor.id, anchor]]) + const connectionRecords = new Map() + let frontierIds = new Set([anchor.id]) + + for (let hop = 0; hop < depth; hop++) { + const nextFrontier = new Set() + for (const nodeId of frontierIds) { + const [upstream, downstream] = await Promise.all([ + connectionManager[findAllWith]({ [sourceField]: nodeId }, transaction), + connectionManager[findAllWith]({ [destField]: nodeId }, transaction) + ]) + + for (const connection of [...(upstream || []), ...(downstream || [])]) { + connectionRecords.set(connection.id, connection) + nodeRecords.set(connection.source.id, connection.source) + nodeRecords.set(connection.dest.id, connection.dest) + if (connection.source.id !== nodeId) { + nextFrontier.add(connection.source.id) + } + if (connection.dest.id !== nodeId) { + nextFrontier.add(connection.dest.id) + } + } + } + frontierIds = nextFrontier + if (nodeRecords.size >= nodeLimit) { + break + } + } + + const limitedNodes = [...nodeRecords.values()].slice(0, nodeLimit) + const limitedNodeIds = new Set(limitedNodes.map((node) => node.id)) + const fogMap = await _loadFogMap(limitedNodes.map((node) => node.iofogUuid), transaction) + + const connections = [...connectionRecords.values()] + .filter((connection) => limitedNodeIds.has(connection.source.id) && limitedNodeIds.has(connection.dest.id)) + .map((connection) => formatConnection(connection, defaultAnchor)) + + return { + nodes: limitedNodes.map((node) => formatListNode(node, defaultAnchor, fogMap)), + connections + } +} + +async function getRouterSubgraph (req, transaction) { + const { center, depth, limit } = await _validateSubgraphQuery(req.query) + return _buildSubgraph('router', center, depth, limit, transaction) +} + +async function getNatsSubgraph (req, transaction) { + const { center, depth, limit } = await _validateSubgraphQuery(req.query) + return _buildSubgraph('nats', center, depth, limit, transaction) +} + +module.exports = { + getSummary: TransactionDecorator.generateTransaction(getSummary), + getRouterOverview: TransactionDecorator.generateTransaction(getRouterOverview), + getNatsOverview: TransactionDecorator.generateTransaction(getNatsOverview), + listRouterNodes: TransactionDecorator.generateTransaction(listRouterNodes), + listNatsNodes: TransactionDecorator.generateTransaction(listNatsNodes), + getRouterNode: TransactionDecorator.generateTransaction(getRouterNode), + getNatsNode: TransactionDecorator.generateTransaction(getNatsNode), + getRouterNodeConnections: TransactionDecorator.generateTransaction(getRouterNodeConnections), + getNatsNodeConnections: TransactionDecorator.generateTransaction(getNatsNodeConnections), + listRouterConnections: TransactionDecorator.generateTransaction(listRouterConnections), + listNatsConnections: TransactionDecorator.generateTransaction(listNatsConnections), + getRouterSubgraph: TransactionDecorator.generateTransaction(getRouterSubgraph), + getNatsSubgraph: TransactionDecorator.generateTransaction(getNatsSubgraph) +} diff --git a/src/services/rbac-service.js b/src/services/rbac-service.js index f6c94413..5263eb63 100644 --- a/src/services/rbac-service.js +++ b/src/services/rbac-service.js @@ -87,6 +87,19 @@ async function createRoleEndpoint (roleData, transaction) { } } +/** + * Build roleRef with the canonical role name after a role update (handles rename). + * @param {Object|null|undefined} existingRef - Existing roleRef from binding or service account + * @param {string} updatedRoleName - Canonical role name post-update + * @returns {Object} roleRef object + */ +function refreshedRoleRef (existingRef, updatedRoleName) { + return { + kind: (existingRef && existingRef.kind) || 'Role', + name: updatedRoleName + } +} + async function updateRoleEndpoint (name, roleData, transaction) { // Validate schema await Validator.validate(roleData, Validator.schemas.roleUpdate) @@ -123,9 +136,9 @@ async function updateRoleEndpoint (name, roleData, transaction) { // Find all role bindings that reference this role using roleId for efficient querying const bindings = await RbacRoleBindingManager.findAll({ roleId }, transaction) for (const binding of bindings) { - // Trigger update to refresh cache and ensure roleId is set + // Trigger update to refresh cache, roleId, and roleRef.name (including on rename) await RbacRoleBindingManager.updateRoleBinding(binding.name, { - roleRef: binding.roleRef + roleRef: refreshedRoleRef(binding.roleRef, updatedRoleName) }, transaction) } @@ -136,7 +149,7 @@ async function updateRoleEndpoint (name, roleData, transaction) { const appName = application ? application.name : null if (appName) { await RbacServiceAccountManager.updateServiceAccount(appName, sa.name, { - roleRef: sa.roleRef + roleRef: refreshedRoleRef(sa.roleRef, updatedRoleName) }, transaction) } } diff --git a/src/services/registry-service.js b/src/services/registry-service.js index 41985fdc..8a20f281 100644 --- a/src/services/registry-service.js +++ b/src/services/registry-service.js @@ -1,5 +1,9 @@ const RegistryManager = require('../data/managers/registry-manager') const SecretHelper = require('../helpers/secret-helper') +const { + scheduleVaultDeleteAfterCommit, + scheduleVaultPromoteAfterCommit +} = require('../helpers/vault-transaction-helper') const Validator = require('../schemas') const Errors = require('../helpers/errors') const ErrorMessages = require('../helpers/error-messages') @@ -31,16 +35,22 @@ const createRegistry = async function (registry, transaction) { const createdRegistry = await RegistryManager.create(registryCreate, transaction) if (!isPasswordEmpty(registryCreate.password)) { - const encryptedPassword = await SecretHelper.encryptSecret( - { value: registryCreate.password }, - 'registry-' + createdRegistry.id, - 'registry' - ) + const secretName = 'registry-' + createdRegistry.id + const secretData = { value: registryCreate.password } + const internalEncrypted = await SecretHelper.encryptSecretInternal(secretData, secretName) await RegistryManager.update( { id: createdRegistry.id }, - { password: encryptedPassword }, + { password: internalEncrypted }, transaction ) + scheduleVaultPromoteAfterCommit(transaction, { + secretData, + secretName, + secretType: 'registry', + model: () => require('../data/models').Registry, + where: { id: createdRegistry.id }, + field: 'password' + }) } await _updateChangeTracking(transaction) @@ -110,7 +120,7 @@ const updateRegistry = async function (registry, registryId, isCLI, transaction) registryUpdate = AppHelper.deleteUndefinedFields(registryUpdate) if (registryUpdate.password !== undefined && isPasswordEmpty(registryUpdate.password) && SecretHelper.isVaultReference(existingRegistry.password)) { - await SecretHelper.deleteSecret('registry-' + existingRegistry.id, 'registry') + scheduleVaultDeleteAfterCommit(transaction, 'registry-' + existingRegistry.id, 'registry') } const where = isCLI diff --git a/src/services/router-connection-manager.js b/src/services/router-connection-manager.js index 22d7aaf6..edff2696 100644 --- a/src/services/router-connection-manager.js +++ b/src/services/router-connection-manager.js @@ -7,6 +7,7 @@ const RouterManager = require('../data/managers/router-manager') const CertificateService = require('./certificate-service') const SecretService = require('./secret-service') const os = require('os') +const { runInTransaction } = require('../helpers/transaction-runner') const CONTROLLER_CERT_PREFIX = 'controller-exec-session-client' const hostname = process.env.HOSTNAME || os.hostname() @@ -59,7 +60,6 @@ class RouterConnectionManager { this.certificatePromise = null this.cachedCertificate = null this.cachedRouterRecord = null - this.fakeTransaction = { fakeTransaction: true } this.slots = Array.from({ length: this.poolSize }, (_, slotId) => new PoolSlot(this, slotId)) this.recoveryListeners = [] this.saturationCount = 0 @@ -502,7 +502,10 @@ class RouterConnectionManager { if (this.cachedRouterRecord) { return this.cachedRouterRecord } - const router = await RouterManager.findOne({ isDefault: true }, this.fakeTransaction) + const router = await runInTransaction( + (transaction) => RouterManager.findOne({ isDefault: true }, transaction), + { label: 'router-connection-default-router' } + ) if (!router) { throw new Error('Default router not found. Please ensure default router is provisioned.') } @@ -536,42 +539,46 @@ class RouterConnectionManager { async _createControllerCertificate () { logger.debug('[AMQP] Ensuring controller certificate secret exists', { name: CONTROLLER_CERT_NAME }) - await CertificateService.ensureRouterLocalCA(this.fakeTransaction) - const existingSecret = await this._safeGetSecret(CONTROLLER_CERT_NAME) const caName = Constants.DEFAULT_ROUTER_LOCAL_CA - if (existingSecret) { - const caSecret = await this._safeGetSecret(caName) - const bundle = this._decodeCertificate(existingSecret, caSecret) - logger.debug({ msg: '[AMQP] Using existing controller-exec-session-client certificate', ca: caName }) - return bundle - } - const hosts = this._buildControllerHosts() - logger.debug({ msg: '[AMQP] Generating controller-exec-session-client certificate', hosts, ca: caName }) + return runInTransaction(async (transaction) => { + await CertificateService.ensureRouterLocalCA(transaction) - try { - await CertificateService.createCertificateEndpoint({ - name: CONTROLLER_CERT_NAME, - subject: CONTROLLER_CERT_NAME, - hosts: hosts.join(','), - ca: { - type: 'direct', - secretName: caName - }, - expiration: 36 - }) - } catch (error) { - logger.error({ err: error, ca: caName, msg: '[AMQP] Failed to create controller certificate' }) - throw error - } + const existingSecret = await this._safeGetSecret(CONTROLLER_CERT_NAME, transaction) + if (existingSecret) { + const caSecret = await this._safeGetSecret(caName, transaction) + const bundle = this._decodeCertificate(existingSecret, caSecret) + logger.debug({ msg: '[AMQP] Using existing controller-exec-session-client certificate', ca: caName }) + return bundle + } - const certSecret = await this._safeGetSecret(CONTROLLER_CERT_NAME) - const caSecret = await this._safeGetSecret(caName) - if (!certSecret || !caSecret) { - throw new Error('Controller certificate creation succeeded but secret not found') - } - logger.debug({ msg: '[AMQP] controller-exec-session-client certificate generated successfully', ca: caName }) - return this._decodeCertificate(certSecret, caSecret) + const hosts = this._buildControllerHosts() + logger.debug({ msg: '[AMQP] Generating controller-exec-session-client certificate', hosts, ca: caName }) + + try { + await CertificateService.createCertificateEndpoint({ + name: CONTROLLER_CERT_NAME, + subject: CONTROLLER_CERT_NAME, + hosts: hosts.join(','), + ca: { + type: 'direct', + secretName: caName + }, + expiration: 36 + }, transaction) + } catch (error) { + logger.error({ err: error, ca: caName, msg: '[AMQP] Failed to create controller certificate' }) + throw error + } + + const certSecret = await this._safeGetSecret(CONTROLLER_CERT_NAME, transaction) + const caSecret = await this._safeGetSecret(caName, transaction) + if (!certSecret || !caSecret) { + throw new Error('Controller certificate creation succeeded but secret not found') + } + logger.debug({ msg: '[AMQP] controller-exec-session-client certificate generated successfully', ca: caName }) + return this._decodeCertificate(certSecret, caSecret) + }, { label: 'router-connection-controller-cert' }) } _buildControllerHosts () { @@ -604,9 +611,9 @@ class RouterConnectionManager { } } - async _safeGetSecret (name) { + async _safeGetSecret (name, transaction) { try { - return await SecretService.getSecretEndpoint(name) + return await SecretService.getSecretEndpoint(name, transaction) } catch (error) { if (error.name === 'NotFoundError') { logger.debug('[AMQP] Secret not found', { secret: name }) diff --git a/src/services/router-service.js b/src/services/router-service.js index 7663eb45..38051f1b 100644 --- a/src/services/router-service.js +++ b/src/services/router-service.js @@ -205,13 +205,22 @@ async function _updateRouterPorts (routerMicroserviceUuid, router, transaction) } } -async function updateConfig (routerID, containerEngine, transaction) { - const router = await RouterManager.findOne({ id: routerID }, transaction) +function _upstreamConnectorsFingerprint (connectors) { + if (!connectors || typeof connectors !== 'object') { + return '' + } + return Object.keys(connectors).sort().map((name) => { + const connector = connectors[name] + return `${name}:${connector.host}:${connector.port}:${connector.role}:${connector.sslProfile || ''}` + }).join('|') +} + +async function buildFreshRouterMicroserviceConfig (routerID, containerEngine, transaction, routerArg = null) { + const router = routerArg || await RouterManager.findOne({ id: routerID }, transaction) if (!router) { throw new Errors.NotFoundError(AppHelper.formatMessage(ErrorMessages.INVALID_ROUTER, routerID)) } - // Get current configuration const routerCatalog = await CatalogService.getRouterCatalogItem(transaction) const routerMicroservice = await MicroserviceManager.findOne({ catalogItemId: routerCatalog.id, @@ -223,8 +232,6 @@ async function updateConfig (routerID, containerEngine, transaction) { } const currentConfig = JSON.parse(routerMicroservice.config || '{}') - - // Generate new configuration const newConfig = await _getRouterMicroserviceConfig( router.isEdge, router.iofogUuid, @@ -235,7 +242,6 @@ async function updateConfig (routerID, containerEngine, transaction) { transaction ) - // Add connectors for upstream routers const upstreamRoutersConnections = await RouterConnectionManager.findAllWithRouters( { sourceRouter: router.id }, transaction @@ -251,11 +257,41 @@ async function updateConfig (routerID, containerEngine, transaction) { newConfig.connectors[connectorConfig.name] = connectorConfig } + // Service platform owns bridges.tcpConnectors/tcpListeners; fog recompute rebuilds listeners. + if (currentConfig.bridges) { + newConfig.bridges = JSON.parse(JSON.stringify(currentConfig.bridges)) + } + + return newConfig +} + +async function updateConfig (routerID, containerEngine, transaction) { + const router = await RouterManager.findOne({ id: routerID }, transaction) + if (!router) { + throw new Errors.NotFoundError(AppHelper.formatMessage(ErrorMessages.INVALID_ROUTER, routerID)) + } + + const routerCatalog = await CatalogService.getRouterCatalogItem(transaction) + const routerMicroservice = await MicroserviceManager.findOne({ + catalogItemId: routerCatalog.id, + iofogUuid: router.iofogUuid + }, transaction) + + if (!routerMicroservice) { + throw new Errors.NotFoundError(AppHelper.formatMessage(ErrorMessages.INVALID_ROUTER, router.id)) + } + + const currentConfig = JSON.parse(routerMicroservice.config || '{}') + const newConfig = await buildFreshRouterMicroserviceConfig(routerID, containerEngine, transaction, router) + await _ensureRouterTlsVolumeMountsAndMappings(router.iofogUuid, routerMicroservice.uuid, transaction, true) await ChangeTrackingService.update(router.iofogUuid, ChangeTrackingService.events.microserviceConfig, transaction) + const upstreamFingerprintChanged = _upstreamConnectorsFingerprint(currentConfig.connectors) !== + _upstreamConnectorsFingerprint(newConfig.connectors) + // Check if configuration needs update - if (JSON.stringify(currentConfig) !== JSON.stringify(newConfig)) { + if (JSON.stringify(currentConfig) !== JSON.stringify(newConfig) || upstreamFingerprintChanged) { await MicroserviceManager.update( { uuid: routerMicroservice.uuid }, { config: JSON.stringify(newConfig) }, @@ -669,6 +705,7 @@ async function findOne (option, transaction) { } module.exports = { + buildFreshRouterMicroserviceConfig: TransactionDecorator.generateTransaction(buildFreshRouterMicroserviceConfig), createRouterForFog: TransactionDecorator.generateTransaction(createRouterForFog), updateConfig: TransactionDecorator.generateTransaction(updateConfig), updateRouter: TransactionDecorator.generateTransaction(updateRouter), diff --git a/src/services/secret-service.js b/src/services/secret-service.js index 59500b20..e1b73c8e 100644 --- a/src/services/secret-service.js +++ b/src/services/secret-service.js @@ -11,8 +11,7 @@ const Validator = require('../schemas/index') const VolumeMountService = require('./volume-mount-service') const VolumeMountingManager = require('../data/managers/volume-mounting-manager') const CertificateManager = require('../data/managers/certificate-manager') -const SecretHelper = require('../helpers/secret-helper') -const vaultManager = require('../vault/vault-manager') +const { scheduleVaultDeleteAfterCommit } = require('../helpers/vault-transaction-helper') function validateBase64 (value) { try { @@ -186,37 +185,22 @@ async function deleteSecretEndpoint (secretName, transaction) { } await CertificateManager.deleteCertificate(certificate.name, transaction) await SecretManager.deleteSecret(secretName, transaction) - // Remove secret from external vault if configured - if (vaultManager.isEnabled()) { - await SecretHelper.deleteSecret(secretName, existingSecret.type) - } await _deleteVolumeMountsUsingSecret(secretName, transaction) } else { await CertificateManager.deleteCertificate(certificate.name, transaction) await _deleteVolumeMountsUsingSecret(secretName, transaction) await SecretManager.deleteSecret(secretName, transaction) - // Remove secret from external vault if configured - if (vaultManager.isEnabled()) { - await SecretHelper.deleteSecret(secretName, existingSecret.type) - } } } else { - // Delete secret from database and external vault await SecretManager.deleteSecret(secretName, transaction) await _deleteVolumeMountsUsingSecret(secretName, transaction) - // Remove secret from external vault if configured - if (vaultManager.isEnabled()) { - await SecretHelper.deleteSecret(secretName, existingSecret.type) - } } } else { await SecretManager.deleteSecret(secretName, transaction) await _deleteVolumeMountsUsingSecret(secretName, transaction) - // Remove secret from external vault if configured - if (vaultManager.isEnabled()) { - await SecretHelper.deleteSecret(secretName, existingSecret.type) - } } + + scheduleVaultDeleteAfterCommit(transaction, secretName, existingSecret.type) return {} } diff --git a/src/services/service-bridge-config.js b/src/services/service-bridge-config.js index e9c50e3d..6624f05c 100644 --- a/src/services/service-bridge-config.js +++ b/src/services/service-bridge-config.js @@ -11,7 +11,7 @@ const ErrorMessages = require('../helpers/error-messages') const AppHelper = require('../helpers/app-helper') function isServiceDerivedBridgeKey (name) { - return typeof name === 'string' && (name.endsWith('-listener') || name.endsWith('-connector')) + return typeof name === 'string' && name.endsWith('-listener') } function stripServiceDerivedBridges (config) { @@ -27,13 +27,6 @@ function stripServiceDerivedBridges (config) { } } } - if (result.bridges.tcpConnectors) { - for (const key of Object.keys(result.bridges.tcpConnectors)) { - if (isServiceDerivedBridgeKey(key)) { - delete result.bridges.tcpConnectors[key] - } - } - } return result } @@ -53,6 +46,7 @@ async function _resolveFogTagValues (fogUuid, transaction) { } async function recomputeServiceBridgeConfig (fogUuid, baseConfig, transaction) { + // baseConfig must be freshly built from router DB state (connectors/upstreams), not stale MS JSON. let config = stripServiceDerivedBridges(baseConfig) const tagValues = await _resolveFogTagValues(fogUuid, transaction) diff --git a/src/services/service-platform-service.js b/src/services/service-platform-service.js index 14d97bfe..9cda3cf9 100644 --- a/src/services/service-platform-service.js +++ b/src/services/service-platform-service.js @@ -1,10 +1,9 @@ -const TransactionDecorator = require('../decorators/transaction-decorator') const config = require('../config') const Errors = require('../helpers/errors') const ErrorMessages = require('../helpers/error-messages') const AppHelper = require('../helpers/app-helper') const ServiceManager = require('../data/managers/service-manager') -const FogPlatformReconcileTaskManager = require('../data/managers/fog-platform-reconcile-task-manager') +const ReconcileOutboxManager = require('../data/managers/reconcile-outbox-manager') const ServicePlatformReconcileTaskManager = require('../data/managers/service-platform-reconcile-task-manager') const HubRouterConfigLockManager = require('../data/managers/hub-router-config-lock-manager') const RouterManager = require('../data/managers/router-manager') @@ -13,6 +12,7 @@ const FogManager = require('../data/managers/iofog-manager') const ChangeTrackingService = require('./change-tracking-service') const ServicesService = require('./services-service') const K8sClient = require('../utils/k8s-client') +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') const { ensureSystemApplication, getSystemMicroserviceName @@ -80,10 +80,6 @@ async function _updateRouterMicroserviceConfig (fogNodeUuid, routerConfig, trans } async function _patchK8sRouterConfig (routerConfig) { - const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) - if (!configMap) { - throw new Errors.NotFoundError(`ConfigMap not found: ${K8S_ROUTER_CONFIG_MAP}`) - } await K8sClient.patchConfigMap(K8S_ROUTER_CONFIG_MAP, { data: { 'skrouterd.json': JSON.stringify(routerConfig) @@ -102,28 +98,86 @@ async function _resolveHubListenerFogUuid (serviceConfig, transaction) { return serviceConfig.defaultBridge } -async function upsertHubTcpListener (serviceConfig, transaction) { - const isK8s = await ServicesService.checkKubernetesEnvironment() - const listener = ServicesService._buildTcpListener(serviceConfig) +function emptyK8sHubRouterPlan () { + return { + upsertListeners: [], + upsertConnectors: [], + deleteListenerNames: [], + deleteConnectorNames: [] + } +} - if (isK8s) { - const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) - if (!configMap) { - throw new Errors.NotFoundError(`ConfigMap not found: ${K8S_ROUTER_CONFIG_MAP}`) +function mergeK8sHubRouterPlans (...plans) { + const merged = emptyK8sHubRouterPlan() + for (const plan of plans) { + if (!plan) { + continue + } + merged.upsertListeners.push(...plan.upsertListeners) + merged.upsertConnectors.push(...plan.upsertConnectors) + merged.deleteListenerNames.push(...plan.deleteListenerNames) + merged.deleteConnectorNames.push(...plan.deleteConnectorNames) + } + return merged +} + +function applyK8sHubRouterPlanToConfig (routerConfig, plan) { + let updatedConfig = routerConfig + + for (const connectorName of plan.deleteConnectorNames) { + updatedConfig = updatedConfig.filter((item) => + !(item[0] === 'tcpConnector' && item[1].name === connectorName) + ) + } + for (const listenerName of plan.deleteListenerNames) { + updatedConfig = updatedConfig.filter((item) => + !(item[0] === 'tcpListener' && item[1].name === listenerName) + ) + } + for (const connector of plan.upsertConnectors) { + const connectorIndex = updatedConfig.findIndex((item) => + item[0] === 'tcpConnector' && item[1].name === connector.name + ) + if (connectorIndex !== -1) { + updatedConfig[connectorIndex] = ['tcpConnector', connector] + } else { + updatedConfig.push(['tcpConnector', connector]) } - const routerConfig = JSON.parse(configMap.data['skrouterd.json']) - const listenerIndex = routerConfig.findIndex((item) => + } + for (const listener of plan.upsertListeners) { + const listenerIndex = updatedConfig.findIndex((item) => item[0] === 'tcpListener' && item[1].name === listener.name ) if (listenerIndex !== -1) { - routerConfig[listenerIndex] = ['tcpListener', listener] + updatedConfig[listenerIndex] = ['tcpListener', listener] } else { - routerConfig.push(['tcpListener', listener]) + updatedConfig.push(['tcpListener', listener]) } - await _patchK8sRouterConfig(routerConfig) + } + + return updatedConfig +} + +async function applyK8sHubRouterPlan (plan) { + const hasChanges = plan.upsertListeners.length > 0 || + plan.upsertConnectors.length > 0 || + plan.deleteListenerNames.length > 0 || + plan.deleteConnectorNames.length > 0 + if (!hasChanges) { return } + const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) + if (!configMap) { + throw new Errors.NotFoundError(`ConfigMap not found: ${K8S_ROUTER_CONFIG_MAP}`) + } + const routerConfig = JSON.parse(configMap.data['skrouterd.json']) + const updatedConfig = applyK8sHubRouterPlanToConfig(routerConfig, plan) + await _patchK8sRouterConfig(updatedConfig) +} + +async function upsertHubTcpListenerDb (serviceConfig, transaction) { + const listener = ServicesService._buildTcpListener(serviceConfig) const fogNodeUuid = await _resolveHubListenerFogUuid(serviceConfig, transaction) const routerMicroservice = await _getRouterMicroservice(fogNodeUuid, transaction) const currentConfig = JSON.parse(routerMicroservice.config || '{}') @@ -137,30 +191,11 @@ async function upsertHubTcpListener (serviceConfig, transaction) { await _updateRouterMicroserviceConfig(fogNodeUuid, currentConfig, transaction) } -async function upsertHubTcpConnector (serviceConfig, transaction) { - const isK8s = await ServicesService.checkKubernetesEnvironment() +async function upsertHubTcpConnectorDb (serviceConfig, transaction) { const targetRouterNode = await ServicesService._determineConnectorSiteId(serviceConfig, transaction) const connector = await ServicesService._buildTcpConnector(serviceConfig, transaction) if (targetRouterNode === 'default-router') { - if (isK8s) { - const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) - if (!configMap) { - throw new Errors.NotFoundError(`ConfigMap not found: ${K8S_ROUTER_CONFIG_MAP}`) - } - const routerConfig = JSON.parse(configMap.data['skrouterd.json']) - const connectorIndex = routerConfig.findIndex((item) => - item[0] === 'tcpConnector' && item[1].name === connector.name - ) - if (connectorIndex !== -1) { - routerConfig[connectorIndex] = ['tcpConnector', connector] - } else { - routerConfig.push(['tcpConnector', connector]) - } - await _patchK8sRouterConfig(routerConfig) - return - } - const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) if (!defaultRouter) { throw new Errors.NotFoundError('Default router not found') @@ -192,25 +227,11 @@ async function upsertHubTcpConnector (serviceConfig, transaction) { await _updateRouterMicroserviceConfig(fogNodeUuid, currentConfig, transaction) } -async function deleteHubTcpConnector (serviceConfig, transaction) { - const isK8s = await ServicesService.checkKubernetesEnvironment() +async function deleteHubTcpConnectorDb (serviceConfig, transaction) { const connectorName = `${serviceConfig.name}-connector` const targetRouterNode = await ServicesService._determineConnectorSiteId(serviceConfig, transaction) if (targetRouterNode === 'default-router') { - if (isK8s) { - const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) - if (!configMap) { - throw new Errors.NotFoundError(`ConfigMap not found: ${K8S_ROUTER_CONFIG_MAP}`) - } - const routerConfig = JSON.parse(configMap.data['skrouterd.json']) - const updatedConfig = routerConfig.filter((item) => - !(item[0] === 'tcpConnector' && item[1].name === connectorName) - ) - await _patchK8sRouterConfig(updatedConfig) - return - } - const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) if (!defaultRouter) { throw new Errors.NotFoundError('Default router not found') @@ -234,23 +255,8 @@ async function deleteHubTcpConnector (serviceConfig, transaction) { await _updateRouterMicroserviceConfig(fogNodeUuid, currentConfig, transaction) } -async function deleteHubTcpListener (serviceConfig, transaction) { - const isK8s = await ServicesService.checkKubernetesEnvironment() +async function deleteHubTcpListenerDb (serviceConfig, transaction) { const listenerName = `${serviceConfig.name}-listener` - - if (isK8s) { - const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) - if (!configMap) { - throw new Errors.NotFoundError(`ConfigMap not found: ${K8S_ROUTER_CONFIG_MAP}`) - } - const routerConfig = JSON.parse(configMap.data['skrouterd.json']) - const updatedConfig = routerConfig.filter((item) => - !(item[0] === 'tcpListener' && item[1].name === listenerName) - ) - await _patchK8sRouterConfig(updatedConfig) - return - } - const fogNodeUuid = await _resolveHubListenerFogUuid(serviceConfig, transaction) const routerMicroservice = await _getRouterMicroservice(fogNodeUuid, transaction) const currentConfig = JSON.parse(routerMicroservice.config || '{}') @@ -260,15 +266,111 @@ async function deleteHubTcpListener (serviceConfig, transaction) { await _updateRouterMicroserviceConfig(fogNodeUuid, currentConfig, transaction) } -async function acquireHubLockWithTimeout (controllerUuid, transaction) { +async function planHubTcpConnectorUpsert (serviceConfig, transaction) { + const targetRouterNode = await ServicesService._determineConnectorSiteId(serviceConfig, transaction) + const connector = await ServicesService._buildTcpConnector(serviceConfig, transaction) + + if (targetRouterNode === 'default-router') { + return { + ...emptyK8sHubRouterPlan(), + upsertConnectors: [connector] + } + } + + await upsertHubTcpConnectorDb(serviceConfig, transaction) + return emptyK8sHubRouterPlan() +} + +async function planHubTcpConnectorDelete (serviceConfig, transaction) { + const connectorName = `${serviceConfig.name}-connector` + const targetRouterNode = await ServicesService._determineConnectorSiteId(serviceConfig, transaction) + + if (targetRouterNode === 'default-router') { + return { + ...emptyK8sHubRouterPlan(), + deleteConnectorNames: [connectorName] + } + } + + await deleteHubTcpConnectorDb(serviceConfig, transaction) + return emptyK8sHubRouterPlan() +} + +async function planHubTcpListenerUpsert (serviceConfig) { + const listener = ServicesService._buildTcpListener(serviceConfig) + return { + ...emptyK8sHubRouterPlan(), + upsertListeners: [listener] + } +} + +async function planHubTcpListenerDelete (serviceConfig) { + return { + ...emptyK8sHubRouterPlan(), + deleteListenerNames: [`${serviceConfig.name}-listener`] + } +} + +async function upsertHubTcpListener (serviceConfig, transaction) { + const isK8s = await ServicesService.checkKubernetesEnvironment() + + if (isK8s) { + const plan = await planHubTcpListenerUpsert(serviceConfig) + await applyK8sHubRouterPlan(plan) + return + } + + await upsertHubTcpListenerDb(serviceConfig, transaction) +} + +async function upsertHubTcpConnector (serviceConfig, transaction) { + const isK8s = await ServicesService.checkKubernetesEnvironment() + + if (isK8s) { + const plan = await planHubTcpConnectorUpsert(serviceConfig, transaction) + await applyK8sHubRouterPlan(plan) + return + } + + await upsertHubTcpConnectorDb(serviceConfig, transaction) +} + +async function deleteHubTcpConnector (serviceConfig, transaction) { + const isK8s = await ServicesService.checkKubernetesEnvironment() + + if (isK8s) { + const plan = await planHubTcpConnectorDelete(serviceConfig, transaction) + await applyK8sHubRouterPlan(plan) + return + } + + await deleteHubTcpConnectorDb(serviceConfig, transaction) +} + +async function deleteHubTcpListener (serviceConfig, transaction) { + const isK8s = await ServicesService.checkKubernetesEnvironment() + + if (isK8s) { + const plan = await planHubTcpListenerDelete(serviceConfig) + await applyK8sHubRouterPlan(plan) + return + } + + await deleteHubTcpListenerDb(serviceConfig, transaction) +} + +async function acquireHubLockWithTimeout (controllerUuid) { const timeoutSeconds = config.get('settings.hubRouterConfigLockTimeoutSeconds', 120) const deadline = Date.now() + timeoutSeconds * 1000 while (Date.now() < deadline) { - const acquired = await HubRouterConfigLockManager.tryAcquire( - controllerUuid, - timeoutSeconds, - transaction + const acquired = await runInTransaction( + (transaction) => HubRouterConfigLockManager.tryAcquire( + controllerUuid, + timeoutSeconds, + transaction + ), + { priority: PRIORITY_BACKGROUND, label: 'servicePlatform.hubLockAcquire' } ) if (acquired) { return true @@ -279,6 +381,13 @@ async function acquireHubLockWithTimeout (controllerUuid, transaction) { throw new Error(`Timed out waiting for hub router ConfigMap lock after ${timeoutSeconds}s`) } +async function releaseHubLock (controllerUuid) { + await runInTransaction( + (transaction) => HubRouterConfigLockManager.release(controllerUuid, transaction), + { priority: PRIORITY_BACKGROUND, label: 'servicePlatform.hubLockRelease' } + ) +} + async function watchLoadBalancerWithTimeout (serviceName) { const timeoutSeconds = config.get('settings.serviceLoadBalancerWatchTimeoutSeconds', 300) const retryInterval = 2000 @@ -302,19 +411,28 @@ function needsK8sService (serviceConfig, isK8s) { serviceType === 'external' } -async function reconcileK8sService (serviceConfig, isK8s, transaction) { +async function reconcileK8sServiceExternal (serviceConfig, isK8s) { if (!needsK8sService(serviceConfig, isK8s)) { return } - await ServicesService._updateK8sService(serviceConfig, transaction) + const loadBalancerIP = await ServicesService._syncK8sServiceResource(serviceConfig) - if ((serviceConfig.k8sType || '').toLowerCase() === 'loadbalancer') { - const loadBalancerIP = await watchLoadBalancerWithTimeout(serviceConfig.name) - await ServiceManager.update( - { name: serviceConfig.name }, - { serviceEndpoint: loadBalancerIP }, - transaction + if ((serviceConfig.k8sType || '').toLowerCase() === 'loadbalancer' && !loadBalancerIP) { + const timeoutSeconds = config.get('settings.serviceLoadBalancerWatchTimeoutSeconds', 300) + throw new Error( + `LoadBalancer IP not assigned for service ${serviceConfig.name} within ${timeoutSeconds}s` + ) + } + + if (loadBalancerIP) { + await runInTransaction( + (transaction) => ServiceManager.update( + { name: serviceConfig.name }, + { serviceEndpoint: loadBalancerIP }, + transaction + ), + { priority: PRIORITY_BACKGROUND, label: 'servicePlatform.k8sLoadBalancerEndpoint' } ) } } @@ -322,7 +440,7 @@ async function reconcileK8sService (serviceConfig, isK8s, transaction) { async function fanOutFogReconcile (serviceTags, transaction) { const fogUuids = await ServicesService.handleServiceDistribution(serviceTags, transaction) for (const fogUuid of fogUuids) { - await FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask({ + await ReconcileOutboxManager.enqueueFogPlatform({ fogUuid, reason: 'service-changed' }, transaction) @@ -331,34 +449,53 @@ async function fanOutFogReconcile (serviceTags, transaction) { } async function reconcileServiceHub (serviceConfig, snapshot, transaction) { + const plans = [] + if (snapshot && snapshot.resource != null && serviceConfig.resource != null && snapshot.resource !== serviceConfig.resource) { - await deleteHubTcpConnector(buildServiceConfigFromRow(snapshot), transaction) + plans.push(await planHubTcpConnectorDelete(buildServiceConfigFromRow(snapshot), transaction)) } - await upsertHubTcpConnector(serviceConfig, transaction) - await upsertHubTcpListener(serviceConfig, transaction) + plans.push(await planHubTcpConnectorUpsert(serviceConfig, transaction)) + plans.push(await planHubTcpListenerUpsert(serviceConfig)) + + return mergeK8sHubRouterPlans(...plans) } -async function reconcileServiceDeleteHub (serviceConfig, isK8s, transaction) { - await deleteHubTcpConnector(serviceConfig, transaction) - await deleteHubTcpListener(serviceConfig, transaction) +async function reconcileServiceDeleteHub (serviceConfig, transaction) { + const plans = [ + await planHubTcpConnectorDelete(serviceConfig, transaction), + await planHubTcpListenerDelete(serviceConfig) + ] + return mergeK8sHubRouterPlans(...plans) +} - if (isK8s && (serviceConfig.type || '').toLowerCase() !== 'k8s') { - await ServicesService._deleteK8sService(serviceConfig.name) +async function reconcileServiceHubDb (serviceConfig, snapshot, transaction) { + if (snapshot && + snapshot.resource != null && + serviceConfig.resource != null && + snapshot.resource !== serviceConfig.resource) { + await deleteHubTcpConnectorDb(buildServiceConfigFromRow(snapshot), transaction) } + + await upsertHubTcpConnectorDb(serviceConfig, transaction) + await upsertHubTcpListenerDb(serviceConfig, transaction) } -async function reconcileService (serviceName, task, transaction) { +async function reconcileServiceDeleteHubDb (serviceConfig, transaction) { + await deleteHubTcpConnectorDb(serviceConfig, transaction) + await deleteHubTcpListenerDb(serviceConfig, transaction) +} + +async function reconcileService (serviceName, task) { const startedAt = Date.now() const isDelete = task && task.reason === 'delete' const snapshot = task ? ServicePlatformReconcileTaskManager.getParsedSpecSnapshot(task) : null const controllerUuid = getControllerUuid() - let hubLockHeld = false - try { + const prep = await runInTransaction(async (transaction) => { let serviceConfig = null let fanOutTags = [] @@ -383,36 +520,57 @@ async function reconcileService (serviceName, task, transaction) { ) } - const isK8s = await ServicesService.checkKubernetesEnvironment() + return { serviceConfig, fanOutTags } + }, { priority: PRIORITY_BACKGROUND, label: 'servicePlatform.prepare' }) - if (isK8s) { - await acquireHubLockWithTimeout(controllerUuid, transaction) - hubLockHeld = true - } + const isK8s = await ServicesService.checkKubernetesEnvironment() - if (isDelete) { - await reconcileServiceDeleteHub(serviceConfig, isK8s, transaction) + try { + if (isK8s) { + await acquireHubLockWithTimeout(controllerUuid) + try { + const hubPlan = await runInTransaction(async (transaction) => { + if (isDelete) { + return reconcileServiceDeleteHub(prep.serviceConfig, transaction) + } + return reconcileServiceHub(prep.serviceConfig, snapshot, transaction) + }, { priority: PRIORITY_BACKGROUND, label: 'servicePlatform.hubReconcile' }) + + await applyK8sHubRouterPlan(hubPlan) + + if (isDelete) { + if ((prep.serviceConfig.type || '').toLowerCase() !== 'k8s') { + await ServicesService._deleteK8sService(prep.serviceConfig.name) + } + } else { + await reconcileK8sServiceExternal(prep.serviceConfig, isK8s) + } + } finally { + await releaseHubLock(controllerUuid) + } } else { - await reconcileServiceHub(serviceConfig, snapshot, transaction) - await reconcileK8sService(serviceConfig, isK8s, transaction) - } - - if (hubLockHeld) { - await HubRouterConfigLockManager.release(controllerUuid, transaction) - hubLockHeld = false + await runInTransaction(async (transaction) => { + if (isDelete) { + await reconcileServiceDeleteHubDb(prep.serviceConfig, transaction) + } else { + await reconcileServiceHubDb(prep.serviceConfig, snapshot, transaction) + } + }, { priority: PRIORITY_BACKGROUND, label: 'servicePlatform.hubDb' }) } - await fanOutFogReconcile(fanOutTags, transaction) - - if (!isDelete) { - await ServiceManager.update( - { name: serviceName }, - { provisioningStatus: 'ready', provisioningError: null }, - transaction - ) - } else if (task && task.id != null) { - await ServicePlatformReconcileTaskManager.delete({ id: task.id }, transaction) - } + await runInTransaction(async (transaction) => { + await fanOutFogReconcile(prep.fanOutTags, transaction) + + if (!isDelete) { + await ServiceManager.update( + { name: serviceName }, + { provisioningStatus: 'ready', provisioningError: null }, + transaction + ) + } else if (task && task.id != null) { + await ServicePlatformReconcileTaskManager.delete({ id: task.id }, transaction) + } + }, { priority: PRIORITY_BACKGROUND, label: 'servicePlatform.finalize' }) logger.info('servicePlatformReconcile completed', { serviceName, @@ -427,17 +585,6 @@ async function reconcileService (serviceName, task, transaction) { provisioningStatus: isDelete ? null : 'ready' } } catch (error) { - if (hubLockHeld) { - try { - await HubRouterConfigLockManager.release(controllerUuid, transaction) - } catch (releaseError) { - logger.warn('servicePlatformReconcile failed to release hub lock', { - serviceName, - error: releaseError.message - }) - } - } - logger.error('servicePlatformReconcile failed', { serviceName, reason: task ? task.reason : null, @@ -448,8 +595,6 @@ async function reconcileService (serviceName, task, transaction) { } } -const bypassOptions = { bypassQueue: true } - module.exports = { normalizeTags, unionTags, @@ -459,7 +604,10 @@ module.exports = { deleteHubTcpConnector, deleteHubTcpListener, acquireHubLockWithTimeout, + releaseHubLock, watchLoadBalancerWithTimeout, fanOutFogReconcile, - reconcileService: TransactionDecorator.generateTransaction(reconcileService, bypassOptions) + applyK8sHubRouterPlan, + applyK8sHubRouterPlanToConfig, + reconcileService } diff --git a/src/services/services-service.js b/src/services/services-service.js index a04bcbee..a278a9ac 100644 --- a/src/services/services-service.js +++ b/src/services/services-service.js @@ -13,7 +13,7 @@ const logger = require('../logger') const FogManager = require('../data/managers/iofog-manager') const TagsManager = require('../data/managers/tags-manager') const ChangeTrackingService = require('./change-tracking-service') -const ServicePlatformReconcileTaskManager = require('../data/managers/service-platform-reconcile-task-manager') +const ReconcileOutboxManager = require('../data/managers/reconcile-outbox-manager') const ApplicationManager = require('../data/managers/application-manager') const { ensureSystemApplication, @@ -22,7 +22,6 @@ const { const { getServiceAnnotationTag, getComponentLabelKey, getAppLabelKey } = require('../config/flavor') // const { Op } = require('sequelize') -const K8S_ROUTER_CONFIG_MAP = 'iofog-router' const EDGELET_BRIDGE_CONNECTOR_HOST = 'edgelet.default.svc.bridge.local' const INTERIOR_BRIDGE_CONNECTOR_HOST = '127.0.0.1' @@ -42,7 +41,7 @@ async function _setTags (serviceModel, tagsArray, transaction) { } tags.push(tagModel) } - await serviceModel.setTags(tags) + await serviceModel.setTags(tags, { transaction }) } } @@ -83,8 +82,23 @@ function _mergeServiceFieldsForSnapshot (base, patch, snapshotTags) { }) } +function _serviceToSpecSnapshotFields (service) { + return { + name: service.name, + type: service.type, + resource: service.resource, + defaultBridge: service.defaultBridge, + bridgePort: service.bridgePort, + targetPort: service.targetPort, + servicePort: service.servicePort, + k8sType: service.k8sType, + serviceEndpoint: service.serviceEndpoint, + tags: _mapTags(service) + } +} + async function _enqueueServiceReconcileTask (serviceName, reason, specSnapshot, transaction) { - await ServicePlatformReconcileTaskManager.enqueueServicePlatformReconcileTask({ + await ReconcileOutboxManager.enqueueServicePlatform({ serviceName, reason, specSnapshot @@ -442,22 +456,6 @@ async function _getRouterMicroservice (fogNodeUuid, transaction) { return routerMicroservice } -// Helper function to update router config in Kubernetes environment -async function _updateK8sRouterConfig (config) { - const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) - if (!configMap) { - throw new Errors.NotFoundError(`ConfigMap not found: ${K8S_ROUTER_CONFIG_MAP}`) - } - - const patchData = { - data: { - 'skrouterd.json': JSON.stringify(config) - } - } - - await K8sClient.patchConfigMap(K8S_ROUTER_CONFIG_MAP, patchData) -} - // Helper function to update router microservice config async function _updateRouterMicroserviceConfig (fogNodeUuid, config, transaction) { const routerMicroservice = await _getRouterMicroservice(fogNodeUuid, transaction) @@ -473,455 +471,6 @@ async function _updateRouterMicroserviceConfig (fogNodeUuid, config, transaction await ChangeTrackingService.update(fogNodeUuid, ChangeTrackingService.events.microserviceConfig, transaction) } -// Helper function to add tcpConnector to router config -async function _addTcpConnector (serviceConfig, transaction) { - const isK8s = await checkKubernetesEnvironment() - const targetRouterNode = await _determineConnectorSiteId(serviceConfig, transaction) - const connector = await _buildTcpConnector(serviceConfig, transaction) - - if (targetRouterNode === 'default-router') { - if (isK8s) { - // Update K8s router config - logger.debug('Updating K8s router config') - const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) - if (!configMap) { - logger.error('ConfigMap not found:' + K8S_ROUTER_CONFIG_MAP) - throw new Errors.NotFoundError(`ConfigMap not found: ${K8S_ROUTER_CONFIG_MAP}`) - } - - const routerConfig = JSON.parse(configMap.data['skrouterd.json']) - // Add new connector to the array - routerConfig.push(['tcpConnector', connector]) - - await _updateK8sRouterConfig(routerConfig) - } else { - // Update default router microservice config - const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) - if (!defaultRouter) { - logger.error('Default router not found') - throw new Errors.NotFoundError('Default router not found') - } - const fogNodeUuid = defaultRouter.iofogUuid - const routerMicroservice = await _getRouterMicroservice(fogNodeUuid, transaction) - const currentConfig = JSON.parse(routerMicroservice.config || '{}') - - if (!currentConfig.bridges) { - currentConfig.bridges = {} - } - if (!currentConfig.bridges.tcpConnectors) { - currentConfig.bridges.tcpConnectors = {} - } - currentConfig.bridges.tcpConnectors[connector.name] = connector - - await _updateRouterMicroserviceConfig(fogNodeUuid, currentConfig, transaction) - } - } else { - // Update specific router microservice config - const fogNodeUuid = targetRouterNode - const routerMicroservice = await _getRouterMicroservice(fogNodeUuid, transaction) - const currentConfig = JSON.parse(routerMicroservice.config || '{}') - - if (!currentConfig.bridges) { - currentConfig.bridges = {} - } - if (!currentConfig.bridges.tcpConnectors) { - currentConfig.bridges.tcpConnectors = {} - } - currentConfig.bridges.tcpConnectors[connector.name] = connector - - await _updateRouterMicroserviceConfig(fogNodeUuid, currentConfig, transaction) - } -} - -// Helper function to add tcpListener to router config -async function _addTcpListener (serviceConfig, transaction) { - const isK8s = await checkKubernetesEnvironment() - - // First handle K8s case if we're in K8s environment - if (isK8s) { - const k8sListener = _buildTcpListener(serviceConfig) - const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) - if (!configMap) { - logger.error('ConfigMap not found:' + K8S_ROUTER_CONFIG_MAP) - throw new Errors.NotFoundError(`ConfigMap not found: ${K8S_ROUTER_CONFIG_MAP}`) - } - - const routerConfig = JSON.parse(configMap.data['skrouterd.json']) - // Add new listener to the array - routerConfig.push(['tcpListener', k8sListener]) - - await _updateK8sRouterConfig(routerConfig) - } - - // Handle distributed router microservice cases - // Get list of fog nodes that need this listener - const fogNodeUuids = await handleServiceDistribution(serviceConfig.tags, transaction) - - // If not in K8s environment, always include default router - if (!isK8s) { - if (serviceConfig.defaultBridge === 'default-router') { - const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) - if (!defaultRouter) { - logger.error('Default router not found') - throw new Errors.NotFoundError('Default router not found') - } - // Add default router if not already in the list - if (!fogNodeUuids.includes(defaultRouter.iofogUuid)) { - fogNodeUuids.push(defaultRouter.iofogUuid) - } - } else { - if (!fogNodeUuids.includes(serviceConfig.defaultBridge)) { - fogNodeUuids.push(serviceConfig.defaultBridge) - } - } - } - // else if (!fogNodeUuids || fogNodeUuids.length === 0) { - // // If in K8s and no fog nodes found, add default router - // const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) - // if (!defaultRouter) { - // logger.error('Default router not found') - // throw new Errors.NotFoundError('Default router not found') - // } - // fogNodeUuids.push(defaultRouter.iofogUuid) - // } - - // Add listener to each router microservice - for (const fogNodeUuid of fogNodeUuids) { - try { - const listener = _buildTcpListener(serviceConfig) - const routerMicroservice = await _getRouterMicroservice(fogNodeUuid, transaction) - const currentConfig = JSON.parse(routerMicroservice.config || '{}') - if (!currentConfig.bridges) currentConfig.bridges = {} - if (!currentConfig.bridges.tcpListeners) currentConfig.bridges.tcpListeners = {} - currentConfig.bridges.tcpListeners[listener.name] = listener - await _updateRouterMicroserviceConfig(fogNodeUuid, currentConfig, transaction) - } catch (err) { - if (err instanceof Errors.NotFoundError) { - logger.info(`Router microservice not found for fogNodeUuid ${fogNodeUuid}, skipping.`) - continue - } - throw err - } - } -} - -// Helper function to update tcpConnector in router config -async function _updateTcpConnector (serviceConfig, transaction) { - const isK8s = await checkKubernetesEnvironment() - const targetRouterNode = await _determineConnectorSiteId(serviceConfig, transaction) - const connector = await _buildTcpConnector(serviceConfig, transaction) - - if (targetRouterNode === 'default-router') { - if (isK8s) { - // Update K8s router config - const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) - if (!configMap) { - throw new Errors.NotFoundError(`ConfigMap not found: ${K8S_ROUTER_CONFIG_MAP}`) - } - - const routerConfig = JSON.parse(configMap.data['skrouterd.json']) - // Find and update the existing connector - const connectorIndex = routerConfig.findIndex(item => - item[0] === 'tcpConnector' && item[1].name === connector.name - ) - if (connectorIndex !== -1) { - routerConfig[connectorIndex] = ['tcpConnector', connector] - } - - await _updateK8sRouterConfig(routerConfig) - } else { - // Update default router microservice config - const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) - if (!defaultRouter) { - throw new Errors.NotFoundError('Default router not found') - } - const fogNodeUuid = defaultRouter.iofogUuid - const routerMicroservice = await _getRouterMicroservice(fogNodeUuid, transaction) - const currentConfig = JSON.parse(routerMicroservice.config || '{}') - - if (!currentConfig.bridges) { - currentConfig.bridges = {} - } - if (!currentConfig.bridges.tcpConnectors) { - currentConfig.bridges.tcpConnectors = {} - } - // Update the connector with the same name - currentConfig.bridges.tcpConnectors[connector.name] = connector - - await _updateRouterMicroserviceConfig(fogNodeUuid, currentConfig, transaction) - } - } else { - // Update specific router microservice config - const fogNodeUuid = targetRouterNode - const routerMicroservice = await _getRouterMicroservice(fogNodeUuid, transaction) - const currentConfig = JSON.parse(routerMicroservice.config || '{}') - - if (!currentConfig.bridges) { - currentConfig.bridges = {} - } - if (!currentConfig.bridges.tcpConnectors) { - currentConfig.bridges.tcpConnectors = {} - } - // Update the connector with the same name - currentConfig.bridges.tcpConnectors[connector.name] = connector - - await _updateRouterMicroserviceConfig(fogNodeUuid, currentConfig, transaction) - } -} - -// // Helper function to update tcpListener in router config -// async function _updateTcpListener (serviceConfig, transaction) { -// const isK8s = await checkKubernetesEnvironment() - -// // First handle K8s case if we're in K8s environment -// if (isK8s) { -// const k8sListener = await _buildTcpListener(serviceConfig, null) // null for K8s case -// const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) -// if (!configMap) { -// throw new Errors.NotFoundError(`ConfigMap not found: ${K8S_ROUTER_CONFIG_MAP}`) -// } - -// const routerConfig = JSON.parse(configMap.data['skrouterd.json']) -// // Update the listener in the array -// const listenerIndex = routerConfig.findIndex(item => -// item[0] === 'tcpListener' && item[1].name === k8sListener.name -// ) -// if (listenerIndex !== -1) { -// routerConfig[listenerIndex] = ['tcpListener', k8sListener] -// } else { -// routerConfig.push(['tcpListener', k8sListener]) -// } - -// await _updateK8sRouterConfig(routerConfig) -// } - -// // Handle distributed router microservice cases -// // Get list of fog nodes that need this listener -// const fogNodeUuids = await handleServiceDistribution(serviceConfig.tags, transaction) -// // If not in K8s environment, always include default router -// if (!isK8s) { -// const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) -// if (!defaultRouter) { -// throw new Errors.NotFoundError('Default router not found') -// } -// // Add default router if not already in the list -// if (!fogNodeUuids.includes(defaultRouter.iofogUuid)) { -// fogNodeUuids.push(defaultRouter.iofogUuid) -// } -// } -// // else if (!fogNodeUuids || fogNodeUuids.length === 0) { -// // // If in K8s and no fog nodes found, add default router -// // const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) -// // if (!defaultRouter) { -// // throw new Errors.NotFoundError('Default router not found') -// // } -// // fogNodeUuids.push(defaultRouter.iofogUuid) -// // } - -// // Update listener in each router microservice -// for (const fogNodeUuid of fogNodeUuids) { -// try { -// const listener = await _buildTcpListener(serviceConfig, fogNodeUuid) -// const routerMicroservice = await _getRouterMicroservice(fogNodeUuid, transaction) -// const currentConfig = JSON.parse(routerMicroservice.config || '{}') - -// if (!currentConfig.bridges) { -// currentConfig.bridges = {} -// } -// if (!currentConfig.bridges.tcpListeners) { -// currentConfig.bridges.tcpListeners = {} -// } -// // Update listener with its name as key -// currentConfig.bridges.tcpListeners[listener.name] = listener - -// await _updateRouterMicroserviceConfig(fogNodeUuid, currentConfig, transaction) -// } catch (err) { -// if (err instanceof Errors.NotFoundError) { -// logger.info(`Router microservice not found for fogNodeUuid ${fogNodeUuid}, skipping.`) -// continue -// } -// throw err -// } -// } -// } - -// Helper function to delete tcpConnector from router config -async function _deleteTcpConnector (serviceName, transaction) { - logger.debug('_deleteTcpConnector: start', { serviceName }) - const isK8s = await checkKubernetesEnvironment() - const connectorName = `${serviceName}-connector` - - // Get service to determine if it's using default router - const service = await ServiceManager.findOne({ name: serviceName }, transaction) - if (!service) { - throw new Errors.NotFoundError(`Service not found: ${serviceName}`) - } - logger.debug('_deleteTcpConnector: service', { type: service.type, resource: service.resource, defaultBridge: service.defaultBridge }) - - const isDefaultRouter = service.defaultBridge === 'default-router' - let microserviceSource = null - if (service.type === 'microservice') { - microserviceSource = await MicroserviceManager.findOne({ uuid: service.resource }, transaction) - } - let fogSource = null - if (service.type === 'agent') { - fogSource = await FogManager.findOne({ uuid: service.resource }, transaction) - if (!fogSource) { - fogSource = await FogManager.findOne({ name: service.resource }, transaction) - } - } - - if (isDefaultRouter && (!microserviceSource || !fogSource)) { - logger.debug('_deleteTcpConnector: updating default router config') - if (isK8s) { - // Update K8s router config - const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) - if (!configMap) { - throw new Errors.NotFoundError(`ConfigMap not found: ${K8S_ROUTER_CONFIG_MAP}`) - } - - const routerConfig = JSON.parse(configMap.data['skrouterd.json']) - // Remove the connector from the array - const updatedConfig = routerConfig.filter(item => - !(item[0] === 'tcpConnector' && item[1].name === connectorName) - ) - - await _updateK8sRouterConfig(updatedConfig) - } else { - // Update default router microservice config - const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) - if (!defaultRouter) { - throw new Errors.NotFoundError('Default router not found') - } - const fogNodeUuid = defaultRouter.iofogUuid - const routerMicroservice = await _getRouterMicroservice(fogNodeUuid, transaction) - const currentConfig = JSON.parse(routerMicroservice.config || '{}') - - if (currentConfig.bridges && currentConfig.bridges.tcpConnectors) { - delete currentConfig.bridges.tcpConnectors[connectorName] - } - - await _updateRouterMicroserviceConfig(fogNodeUuid, currentConfig, transaction) - } - logger.debug('_deleteTcpConnector: done (default router updated)') - return - } - - let fogNodeUuid = null - if (!isDefaultRouter && (!microserviceSource || !fogSource)) { - fogNodeUuid = service.defaultBridge - } - if (microserviceSource) { - fogNodeUuid = microserviceSource.iofogUuid - } - if (fogSource) { - fogNodeUuid = fogSource.uuid - } - logger.debug('_deleteTcpConnector: fogNodeUuid for non-default', { fogNodeUuid }) - const routerMicroservice = await _getRouterMicroservice(fogNodeUuid, transaction) - const currentConfig = JSON.parse(routerMicroservice.config || '{}') - - if (currentConfig.bridges && currentConfig.bridges.tcpConnectors) { - delete currentConfig.bridges.tcpConnectors[connectorName] - } - - logger.debug('_deleteTcpConnector: updating router config', { fogNodeUuid }) - await _updateRouterMicroserviceConfig(fogNodeUuid, currentConfig, transaction) - logger.debug('_deleteTcpConnector: done') -} - -// Helper function to delete tcpListener from router config -async function _deleteTcpListener (serviceName, transaction) { - logger.debug('_deleteTcpListener: start', { serviceName }) - const isK8s = await checkKubernetesEnvironment() - const listenerName = `${serviceName}-listener` - - // First handle K8s case if we're in K8s environment - if (isK8s) { - const configMap = await K8sClient.getConfigMap(K8S_ROUTER_CONFIG_MAP) - if (!configMap) { - throw new Errors.NotFoundError(`ConfigMap not found: ${K8S_ROUTER_CONFIG_MAP}`) - } - - const routerConfig = JSON.parse(configMap.data['skrouterd.json']) - // Remove the listener from the array - const updatedConfig = routerConfig.filter(item => - !(item[0] === 'tcpListener' && item[1].name === listenerName) - ) - - await _updateK8sRouterConfig(updatedConfig) - } - - // Get service to determine its tags for distribution - const service = await ServiceManager.findOneWithTags({ name: serviceName }, transaction) - if (!service) { - throw new Errors.NotFoundError(`Service not found: ${serviceName}`) - } - logger.debug('_deleteTcpListener: service', { type: service.type, hasTags: !!service.tags, tagsIsArray: Array.isArray(service.tags) }) - - let microserviceSource = null - if (service.type === 'microservice') { - microserviceSource = await MicroserviceManager.findOne({ uuid: service.resource }, transaction) - } - // Handle distributed router microservice cases - // Get list of fog nodes that need this listener removed - const serviceTags = (service.tags && Array.isArray(service.tags)) ? service.tags.map(tag => tag.value) : [] - logger.debug('_deleteTcpListener: calling handleServiceDistribution', { serviceTagsLength: serviceTags.length, serviceTagsSample: serviceTags.slice(0, 3) }) - const fogNodeUuids = await handleServiceDistribution(serviceTags, transaction) - logger.debug('_deleteTcpListener: handleServiceDistribution returned', { fogNodeUuidsLength: fogNodeUuids ? fogNodeUuids.length : 'null/undefined', isArray: Array.isArray(fogNodeUuids) }) - - if (microserviceSource) { - if (!fogNodeUuids.includes(microserviceSource.iofogUuid)) { - fogNodeUuids.push(microserviceSource.iofogUuid) - } - } - // If not in K8s environment, always include default router - if (!isK8s) { - const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) - if (!defaultRouter) { - throw new Errors.NotFoundError('Default router not found') - } - // Add default router if not already in the list - if (!fogNodeUuids.includes(defaultRouter.iofogUuid)) { - fogNodeUuids.push(defaultRouter.iofogUuid) - } - } - // else if (!fogNodeUuids || fogNodeUuids.length === 0) { - // // If in K8s and no fog nodes found, add default router - // const defaultRouter = await RouterManager.findOne({ isDefault: true }, transaction) - // if (!defaultRouter) { - // throw new Errors.NotFoundError('Default router not found') - // } - // fogNodeUuids.push(defaultRouter.iofogUuid) - // } - - // Remove listener from each router microservice - const fogList = Array.isArray(fogNodeUuids) ? fogNodeUuids : [] - logger.debug('_deleteTcpListener: iterating router configs', { count: fogList.length }) - for (const fogNodeUuid of fogList) { - try { - const routerMicroservice = await _getRouterMicroservice(fogNodeUuid, transaction) - const currentConfig = JSON.parse(routerMicroservice.config || '{}') - if (currentConfig.bridges && currentConfig.bridges.tcpListeners) { - delete currentConfig.bridges.tcpListeners[listenerName] - } - await _updateRouterMicroserviceConfig(fogNodeUuid, currentConfig, transaction) - } catch (err) { - if (err instanceof Errors.NotFoundError) { - logger.info('_deleteTcpListener: router microservice not found, skipping', { fogNodeUuid }) - continue - } - logger.error({ - err, - msg: '_deleteTcpListener: error updating router config', - fogNodeUuid - }) - throw err - } - } - logger.debug('_deleteTcpListener: done') -} - // Common labels for Kubernetes services created by the controller function _getK8sServiceLabels () { const componentLabelKey = getComponentLabelKey() @@ -935,11 +484,11 @@ function _getK8sServiceLabels () { } } -// Helper function to create Kubernetes service -async function _createK8sService (serviceConfig, transaction) { - const normalizedTags = serviceConfig.tags.map(tag => tag.includes(':') ? tag : `${tag}:`) +// Helper function to build Kubernetes Service spec for create +function _buildK8sServiceSpec (serviceConfig) { + const normalizedTags = (serviceConfig.tags || []).map(tag => tag.includes(':') ? tag : `${tag}:`) const componentLabelKey = getComponentLabelKey() - const serviceSpec = { + return { apiVersion: 'v1', kind: 'Service', metadata: { @@ -964,72 +513,88 @@ async function _createK8sService (serviceConfig, transaction) { }] } } +} - const service = await K8sClient.createService(serviceSpec) - - // If LoadBalancer type, wait for and set the external IP - if (serviceConfig.k8sType === 'LoadBalancer') { - const loadBalancerIP = await K8sClient.watchLoadBalancerIP(serviceConfig.name) - if (loadBalancerIP) { - await ServiceManager.update( - { name: serviceConfig.name }, - { serviceEndpoint: loadBalancerIP }, - transaction - ) +function _buildK8sServicePatchData (serviceConfig) { + const normalizedTags = (serviceConfig.tags || []).map(tag => tag.includes(':') ? tag : `${tag}:`) + const componentLabelKey = getComponentLabelKey() + return { + metadata: { + labels: _getK8sServiceLabels(), + annotations: normalizedTags.reduce((acc, tag) => { + const [key, value] = tag.split(':') + acc[key] = (value || '').trim() + return acc + }, {}) + }, + spec: { + type: serviceConfig.k8sType, + selector: { + [componentLabelKey]: 'router' + }, + ports: [{ + name: 'iofog-service', + port: parseInt(serviceConfig.servicePort), + targetPort: parseInt(serviceConfig.bridgePort), + protocol: 'TCP' + }] } } - - return service } -// Helper function to update Kubernetes service -async function _updateK8sService (serviceConfig, transaction) { - const existingService = await K8sClient.getService(serviceConfig.name) +// Helper function to create or update a Kubernetes service resource (I/O only; no DB). +// Returns LoadBalancer IP when assigned, otherwise null. +async function _syncK8sServiceResource (serviceConfig) { + const existingService = await K8sClient.getService(serviceConfig.name, { ignoreNotFound: true }) + const serviceSpec = _buildK8sServiceSpec(serviceConfig) + if (!existingService) { logger.debug(`Service not found: ${serviceConfig.name}, creating new service`) - const service = await _createK8sService(serviceConfig, transaction) - return service + await K8sClient.createService(serviceSpec) } else { - const normalizedTags = serviceConfig.tags.map(tag => tag.includes(':') ? tag : `${tag}:`) - const componentLabelKey = getComponentLabelKey() - const patchData = { - metadata: { - labels: _getK8sServiceLabels(), - annotations: normalizedTags.reduce((acc, tag) => { - const [key, value] = tag.split(':') - acc[key] = (value || '').trim() - return acc - }, {}) - }, - spec: { - type: serviceConfig.k8sType, - selector: { - [componentLabelKey]: 'router' - }, - ports: [{ - name: 'iofog-service', - port: parseInt(serviceConfig.servicePort), - targetPort: parseInt(serviceConfig.bridgePort), - protocol: 'TCP' - }] - } - } - + const patchData = _buildK8sServicePatchData(serviceConfig) logger.debug(`Updating service: ${serviceConfig.name}`) - const updatedService = await K8sClient.updateService(serviceConfig.name, patchData) - - // If LoadBalancer type, wait for and set the external IP - if (serviceConfig.k8sType === 'LoadBalancer') { - const loadBalancerIP = await K8sClient.watchLoadBalancerIP(serviceConfig.name) - if (loadBalancerIP) { - await ServiceManager.update( - { name: serviceConfig.name }, - { serviceEndpoint: loadBalancerIP }, - transaction - ) + try { + await K8sClient.updateService(serviceConfig.name, patchData) + } catch (error) { + if (K8sClient.isK8sNotFound(error)) { + logger.warn(`Service ${serviceConfig.name} missing during update, creating new service`) + await K8sClient.createService(serviceSpec) + } else { + throw error } } - return updatedService + } + + if (serviceConfig.k8sType === 'LoadBalancer') { + return K8sClient.watchLoadBalancerIP(serviceConfig.name) + } + + return null +} + +// Helper function to create Kubernetes service +async function _createK8sService (serviceConfig, transaction) { + const loadBalancerIP = await _syncK8sServiceResource(serviceConfig) + if (loadBalancerIP) { + await ServiceManager.update( + { name: serviceConfig.name }, + { serviceEndpoint: loadBalancerIP }, + transaction + ) + } + return loadBalancerIP +} + +// Helper function to update Kubernetes service +async function _updateK8sService (serviceConfig, transaction) { + const loadBalancerIP = await _syncK8sServiceResource(serviceConfig) + if (loadBalancerIP) { + await ServiceManager.update( + { name: serviceConfig.name }, + { serviceEndpoint: loadBalancerIP }, + transaction + ) } } @@ -1226,10 +791,7 @@ async function deleteServiceEndpoint (serviceName, transaction) { } logger.debug('deleteServiceEndpoint: existingService', { type: existingService.type, defaultBridge: existingService.defaultBridge }) - const specSnapshot = _buildServiceSpecSnapshot({ - ...existingService, - tags: _mapTags(existingService) - }) + const specSnapshot = _buildServiceSpecSnapshot(_serviceToSpecSnapshotFields(existingService)) await _enqueueServiceReconcileTask(serviceName, 'delete', specSnapshot, transaction) logger.debug('deleteServiceEndpoint: deleting service from DB') @@ -1255,10 +817,7 @@ async function reconcileServiceEndpoint (serviceName, transaction) { service.provisioningError = null } - const specSnapshot = _buildServiceSpecSnapshot({ - ...service, - tags: _mapTags(service) - }) + const specSnapshot = _buildServiceSpecSnapshot(_serviceToSpecSnapshotFields(service)) await _enqueueServiceReconcileTask(serviceName, 'manual-retry', specSnapshot, transaction) return { @@ -1365,6 +924,7 @@ module.exports = { _mapTags, _setTags: TransactionDecorator.generateTransaction(_setTags), _createK8sService, + _syncK8sServiceResource, _updateK8sService, _deleteK8sService, createServiceEndpoint: TransactionDecorator.generateTransaction(createServiceEndpoint), @@ -1378,10 +938,5 @@ module.exports = { _determineConnectorSiteId, _buildTcpConnector, _buildTcpListener, - _addTcpConnector, - _addTcpListener, - _updateTcpConnector, - _deleteTcpConnector, - _deleteTcpListener, _resolveFogRouterMode } diff --git a/src/services/user-service.js b/src/services/user-service.js index d08f69b9..11a76ec8 100644 --- a/src/services/user-service.js +++ b/src/services/user-service.js @@ -38,64 +38,87 @@ function ensureEmbeddedMode () { } } -const login = async function (credentials, isCLI, transaction) { +const loginEmbedded = TransactionDecorator.generateTransaction(async function (credentials, isCLI, transaction) { + return AuthLoginService.login(credentials, transaction) +}) + +async function loginExternal (credentials) { + const oidcConfig = await getOidcConfiguration() + const parameters = { + username: credentials.email, + password: credentials.password + } + if (credentials.totp) { + parameters.totp = credentials.totp + } + + const tokenResponse = await genericGrantRequest(oidcConfig, 'password', parameters) + return tokensFromResponse(tokenResponse) +} + +const login = async function (credentials, isCLI) { ensureAuthConfigured() if (getAuthMode() === 'embedded') { - return AuthLoginService.login(credentials, transaction) + return loginEmbedded(credentials, isCLI) } try { - const oidcConfig = await getOidcConfiguration() - const parameters = { - username: credentials.email, - password: credentials.password - } - if (credentials.totp) { - parameters.totp = credentials.totp - } - - const tokenResponse = await genericGrantRequest(oidcConfig, 'password', parameters) - return tokensFromResponse(tokenResponse) + return await loginExternal(credentials) } catch (error) { mapOidcError(error) } } -const refresh = async function (credentials, isCLI, transaction) { +const refreshEmbedded = TransactionDecorator.generateTransaction(async function (credentials, isCLI, transaction) { + return AuthLoginService.refresh(credentials, transaction) +}) + +async function refreshExternal (credentials) { + const oidcConfig = await getOidcConfiguration() + const tokenResponse = await refreshTokenGrant(oidcConfig, credentials.refreshToken) + return tokensFromResponse(tokenResponse) +} + +const refresh = async function (credentials, isCLI) { ensureAuthConfigured() if (getAuthMode() === 'embedded') { - return AuthLoginService.refresh(credentials, transaction) + return refreshEmbedded(credentials, isCLI) } try { - const oidcConfig = await getOidcConfiguration() - const tokenResponse = await refreshTokenGrant(oidcConfig, credentials.refreshToken) - return tokensFromResponse(tokenResponse) + return await refreshExternal(credentials) } catch (error) { mapOidcError(error) } } -const profile = async function (req, isCLI, transaction) { +const profileEmbedded = TransactionDecorator.generateTransaction(async function (req, isCLI, transaction) { + return AuthLoginService.profile(req, transaction) +}) + +async function profileExternal (req) { + const accessToken = req.headers.authorization.replace('Bearer ', '') + const oidcConfig = await getOidcConfiguration() + const claims = decodeJwt(accessToken) + const subject = claims.sub + if (!subject) { + throw new Errors.InvalidCredentialsError('Invalid credentials') + } + + return fetchUserInfo(oidcConfig, accessToken, subject) +} + +const profile = async function (req, isCLI) { ensureAuthConfigured() if (getAuthMode() === 'embedded') { - return AuthLoginService.profile(req, transaction) + return profileEmbedded(req, isCLI) } - const accessToken = req.headers.authorization.replace('Bearer ', '') - try { - const oidcConfig = await getOidcConfiguration() - const claims = decodeJwt(accessToken) - const subject = claims.sub - if (!subject) { - throw new Errors.InvalidCredentialsError('Invalid credentials') - } - - return await fetchUserInfo(oidcConfig, accessToken, subject) + return await profileExternal(req) } catch (error) { if (error instanceof Errors.InvalidCredentialsError) { throw error @@ -104,13 +127,11 @@ const profile = async function (req, isCLI, transaction) { } } -const logout = async function (req, isCLI, transaction) { - ensureAuthConfigured() - - if (getAuthMode() === 'embedded') { - return AuthLoginService.logout(req, transaction) - } +const logoutEmbedded = TransactionDecorator.generateTransaction(async function (req, isCLI, transaction) { + return AuthLoginService.logout(req, transaction) +}) +async function logoutExternal (req) { const accessToken = req.headers.authorization.replace('Bearer ', '') try { @@ -126,7 +147,17 @@ const logout = async function (req, isCLI, transaction) { return { status: 'success' } } -const enrollMfa = async function (req, isCLI, transaction) { +const logout = async function (req, isCLI) { + ensureAuthConfigured() + + if (getAuthMode() === 'embedded') { + return logoutEmbedded(req, isCLI) + } + + return logoutExternal(req) +} + +const enrollMfa = TransactionDecorator.generateTransaction(async function (req, isCLI, transaction) { ensureAuthConfigured() ensureEmbeddedMode() @@ -136,9 +167,9 @@ const enrollMfa = async function (req, isCLI, transaction) { const userId = req.kauth.grant.access_token.content.sub return AuthMfaService.enrollMfa(userId, transaction) -} +}) -const confirmMfa = async function (req, isCLI, transaction) { +const confirmMfa = TransactionDecorator.generateTransaction(async function (req, isCLI, transaction) { ensureAuthConfigured() ensureEmbeddedMode() @@ -148,17 +179,17 @@ const confirmMfa = async function (req, isCLI, transaction) { const userId = req.kauth.grant.access_token.content.sub return AuthMfaService.confirmMfa(userId, req.body.code, transaction) -} +}) -const disableMfa = async function (req, isCLI, transaction) { +const disableMfa = TransactionDecorator.generateTransaction(async function (req, isCLI, transaction) { ensureAuthConfigured() ensureEmbeddedMode() const userId = req.kauth.grant.access_token.content.sub return AuthMfaService.disableMfa(userId, req.body.password, req.body.code, transaction) -} +}) -const changePassword = async function (req, payload, isCLI, transaction) { +const changePassword = TransactionDecorator.generateTransaction(async function (req, payload, isCLI, transaction) { ensureAuthConfigured() if (getAuthMode() === 'embedded') { @@ -170,76 +201,76 @@ const changePassword = async function (req, payload, isCLI, transaction) { } throw new Errors.NotImplementedError('Password change is only supported in embedded auth mode') -} +}) -const oauthAuthorize = async function (req, isCLI, transaction) { +const oauthAuthorize = async function (req, isCLI) { ensureAuthConfigured() return AuthOauthService.authorize(req) } -const oauthCallback = async function (req, isCLI, transaction) { +const oauthCallback = async function (req, isCLI) { ensureAuthConfigured() return AuthOauthService.callback(req) } -const interactionStatus = async function (uid, isCLI, transaction) { +const interactionStatus = async function (uid, isCLI) { ensureAuthConfigured() ensureEmbeddedMode() - return AuthInteractionService.getStatus(uid, transaction) + return AuthInteractionService.getStatus(uid) } -const interactionLogin = async function (uid, credentials, isCLI, transaction) { +const interactionLogin = async function (uid, credentials, isCLI) { ensureAuthConfigured() ensureEmbeddedMode() - return AuthInteractionService.submitLogin(uid, credentials, transaction) + return AuthInteractionService.submitLogin(uid, credentials) } -const interactionMfa = async function (uid, code, isCLI, transaction) { +const interactionMfa = async function (uid, code, isCLI) { ensureAuthConfigured() ensureEmbeddedMode() - return AuthInteractionService.submitMfa(uid, code, transaction) + return AuthInteractionService.submitMfa(uid, code) } -const interactionEnroll = async function (uid, isCLI, transaction) { +const interactionEnroll = async function (uid, isCLI) { ensureAuthConfigured() ensureEmbeddedMode() - return AuthInteractionService.submitEnroll(uid, transaction) + return AuthInteractionService.submitEnroll(uid) } -const interactionConfirmEnroll = async function (uid, code, isCLI, transaction) { +const interactionConfirmEnroll = async function (uid, code, isCLI) { ensureAuthConfigured() ensureEmbeddedMode() - return AuthInteractionService.submitConfirmEnroll(uid, code, transaction) + return AuthInteractionService.submitConfirmEnroll(uid, code) } -const interactionChangePassword = async function (uid, payload, isCLI, transaction) { +const interactionChangePassword = async function (uid, payload, isCLI) { ensureAuthConfigured() ensureEmbeddedMode() - return AuthInteractionService.submitChangePassword(uid, payload, transaction) + return AuthInteractionService.submitChangePassword(uid, payload) } -const interactionComplete = async function (uid, req, res, isCLI, transaction) { +const interactionComplete = async function (uid, req, res, isCLI) { ensureAuthConfigured() ensureEmbeddedMode() - return AuthInteractionService.complete(uid, req, res, transaction) + return AuthInteractionService.complete(uid, req, res) } module.exports = { - login: TransactionDecorator.generateTransaction(login), - refresh: TransactionDecorator.generateTransaction(refresh), - profile: TransactionDecorator.generateTransaction(profile), - logout: TransactionDecorator.generateTransaction(logout), - enrollMfa: TransactionDecorator.generateTransaction(enrollMfa), - confirmMfa: TransactionDecorator.generateTransaction(confirmMfa), - disableMfa: TransactionDecorator.generateTransaction(disableMfa), - changePassword: TransactionDecorator.generateTransaction(changePassword), - oauthAuthorize: TransactionDecorator.generateTransaction(oauthAuthorize), - oauthCallback: TransactionDecorator.generateTransaction(oauthCallback), - interactionStatus: TransactionDecorator.generateTransaction(interactionStatus), - interactionLogin: TransactionDecorator.generateTransaction(interactionLogin), - interactionMfa: TransactionDecorator.generateTransaction(interactionMfa), - interactionEnroll: TransactionDecorator.generateTransaction(interactionEnroll), - interactionConfirmEnroll: TransactionDecorator.generateTransaction(interactionConfirmEnroll), - interactionChangePassword: TransactionDecorator.generateTransaction(interactionChangePassword), - interactionComplete: TransactionDecorator.generateTransaction(interactionComplete) + login, + refresh, + profile, + logout, + enrollMfa, + confirmMfa, + disableMfa, + changePassword, + oauthAuthorize, + oauthCallback, + interactionStatus, + interactionLogin, + interactionMfa, + interactionEnroll, + interactionConfirmEnroll, + interactionChangePassword, + interactionComplete } diff --git a/src/services/volume-mount-service.js b/src/services/volume-mount-service.js index 2d2763de..72c225c5 100644 --- a/src/services/volume-mount-service.js +++ b/src/services/volume-mount-service.js @@ -18,7 +18,7 @@ async function findVolumeMountedFogNodes (volumeMountName, transaction) { throw new Errors.NotFoundError(AppHelper.formatMessage(ErrorMessages.VOLUME_MOUNT_NOT_FOUND, volumeMountName)) } - const fogs = await volumeMount.getFogs({}, transaction) + const fogs = await volumeMount.getFogs({ transaction }) return fogs.map(fog => fog.uuid) } @@ -158,7 +158,7 @@ async function linkVolumeMountEndpoint (name, fogUuids, transaction) { if (!agent) { throw new Errors.NotFoundError(AppHelper.formatMessage(ErrorMessages.NOT_FOUND_AGENT_NAME, fogUuid)) } - await agent.addVolumeMount(volumeMount.uuid, transaction) + await agent.addVolumeMount(volumeMount, { transaction }) } const newlyLinked = fogUuids.filter((uuid) => !alreadyLinked.has(uuid)) @@ -179,7 +179,7 @@ async function unlinkVolumeMountEndpoint (name, fogUuids, transaction) { if (!agent) { throw new Errors.NotFoundError(AppHelper.formatMessage(ErrorMessages.NOT_FOUND_AGENT_NAME, fogUuid)) } - await agent.removeVolumeMount(volumeMount.uuid, transaction) + await agent.removeVolumeMount(volumeMount, { transaction }) } // Update change tracking for all unlinked fog nodes diff --git a/src/services/websocket-queue-service.js b/src/services/websocket-queue-service.js index 595c9124..641461fa 100644 --- a/src/services/websocket-queue-service.js +++ b/src/services/websocket-queue-service.js @@ -123,6 +123,27 @@ class WebSocketQueueService { return this.execBridges.has(execId) } + setExecUserDeliveryHook (execId, hook) { + const bridge = this.execBridges.get(execId) + if (bridge) { + bridge.onUserRelayDelivery = hook + } + } + + setExecAgentDeliveryHook (execId, hook) { + const bridge = this.execBridges.get(execId) + if (bridge) { + bridge.onAgentRelayDelivery = hook + } + } + + setLogUserDeliveryHook (sessionId, hook) { + const bridge = this.logBridges.get(sessionId) + if (bridge) { + bridge.onUserRelayDelivery = hook + } + } + async publishToAgent (execId, buffer, options = {}) { await this._send(execId, 'agent', buffer, options) } @@ -422,6 +443,26 @@ class WebSocketQueueService { side, messageSize: body.length }) + if (side === 'user' && currentBridge.onUserRelayDelivery) { + try { + currentBridge.onUserRelayDelivery(body) + } catch (error) { + logger.warn('[AMQP][QUEUE] Exec user relay delivery hook failed', { + execId: session.execId, + error: error.message + }) + } + } + if (side === 'agent' && currentBridge.onAgentRelayDelivery) { + try { + currentBridge.onAgentRelayDelivery(body) + } catch (error) { + logger.warn('[AMQP][QUEUE] Exec agent relay delivery hook failed', { + execId: session.execId, + error: error.message + }) + } + } } catch (error) { logger.error('[AMQP][QUEUE] Failed to deliver message to socket', { execId: session.execId, @@ -482,6 +523,11 @@ class WebSocketQueueService { closeAck }) + if (closeAck) { + context.delivery.accept() + return + } + if (ws && ws.readyState === WebSocket.OPEN) { try { const reason = closeInitiator === 'agent' ? 'Agent closed connection' : 'User closed connection' @@ -504,11 +550,21 @@ class WebSocketQueueService { hasSocket: !!ws, socketState: ws ? ws.readyState : 'N/A' }) + if (bridge && bridge.cleanupCallback) { + try { + await bridge.cleanupCallback(execId) + } catch (error) { + logger.error('[AMQP][QUEUE] Error in cleanup callback during CLOSE handling', { + execId, + error: error.message + }) + } + } } context.delivery.accept() - if (!closeAck && this.execBridges.has(execId)) { + if (this.execBridges.has(execId)) { const ackSide = side === 'user' ? 'agent' : 'user' try { await this._send(execId, ackSide, body, { @@ -527,17 +583,6 @@ class WebSocketQueueService { }) } } - - if (bridge && bridge.cleanupCallback) { - try { - await bridge.cleanupCallback(execId) - } catch (error) { - logger.error('[AMQP][QUEUE] Error in cleanup callback during CLOSE handling', { - execId, - error: error.message - }) - } - } } async enableForLogSession (session, cleanupCallback) { @@ -777,6 +822,16 @@ class WebSocketQueueService { } ws.send(body, { binary: true }) context.delivery.accept() + if (currentBridge.onUserRelayDelivery) { + try { + currentBridge.onUserRelayDelivery(body) + } catch (error) { + logger.warn('[AMQP][QUEUE] Log user relay delivery hook failed', { + sessionId: session.sessionId, + error: error.message + }) + } + } } else { context.delivery.release() } diff --git a/src/utils/cert.js b/src/utils/cert.js index 04b0083a..ee267131 100644 --- a/src/utils/cert.js +++ b/src/utils/cert.js @@ -2,6 +2,7 @@ const forge = require('node-forge') const k8sClient = require('./k8s-client') const BigNumber = require('bignumber.js') const logger = require('../logger') +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') // Types for CA input const CA_TYPES = { @@ -81,7 +82,7 @@ async function validateCA (cert, key) { * @param {string} name - Name of the secret * @returns {Promise} */ -async function storeCA (ca, name) { +async function storeCA (ca, name, transaction) { try { // Ensure data is in base64 format for TLS secrets const secretData = { @@ -98,7 +99,7 @@ async function storeCA (ca, name) { // Use the secret service to store the CA const SecretService = require('../services/secret-service') - await SecretService.createSecretEndpoint(secret) + await SecretService.createSecretEndpoint(secret, transaction) } catch (error) { throw new Error(`Failed to store CA: ${error.message}`) } @@ -107,15 +108,20 @@ async function storeCA (ca, name) { /** * Loads CA certificate and key from internal secret storage * @param {string} name - Name of the secret + * @param {import('sequelize').Transaction} [transaction] * @returns {Promise} */ -async function loadCA (name) { +async function loadCA (name, transaction) { try { // Use SecretManager to get the secret with decryption handling const SecretManager = require('../data/managers/secret-manager') - const fakeTransaction = { fakeTransaction: true } - const secret = await SecretManager.getSecret(name, fakeTransaction) + const secret = transaction + ? await SecretManager.getSecret(name, transaction) + : await runInTransaction( + (tx) => SecretManager.getSecret(name, tx), + { priority: PRIORITY_BACKGROUND, label: 'cert-load-ca' } + ) if (!secret) { throw new Error(`TLS secret with name ${name} not found`) } @@ -233,7 +239,7 @@ async function generateSelfSignedCA (subject, expiration = 5 * 365 * 24 * 60 * 6 } // CA handling functions -async function getCAFromK8sSecret (secretName) { +async function getCAFromK8sSecret (secretName, transaction) { try { // Check that k8sClient is properly required and available if (!k8sClient) { @@ -257,28 +263,37 @@ async function getCAFromK8sSecret (secretName) { try { // Use SecretManager to check if there's a local secret const SecretManager = require('../data/managers/secret-manager') - const localSecret = await SecretManager.findOne({ name: secretName }, { fakeTransaction: true }) + const localSecret = transaction + ? await SecretManager.findOne({ name: secretName }, transaction) + : await runInTransaction( + (tx) => SecretManager.findOne({ name: secretName }, tx), + { priority: PRIORITY_BACKGROUND, label: 'cert-k8s-local-secret' } + ) - // If no local secret, we need to create one if (!localSecret) { - // Store the CA in local secret storage - await storeCA({ cert, key }, secretName) - // Also create a certificate record + await storeCA({ cert, key }, secretName, transaction) const CertificateManager = require('../data/managers/certificate-manager') const forge = require('node-forge') const forgeCert = forge.pki.certificateFromPem(cert) - // Extract subject const subject = forgeCert.subject.getField('CN') ? forgeCert.subject.getField('CN').value : secretName - // Create CA record - await CertificateManager.createCertificateRecord({ + const caRecord = { name: secretName, subject, isCA: true, validFrom: forgeCert.validity.notBefore, validTo: forgeCert.validity.notAfter, serialNumber: forgeCert.serialNumber - }, { fakeTransaction: true }) + } + + if (transaction) { + await CertificateManager.createCertificateRecord(caRecord, transaction) + } else { + await runInTransaction( + (tx) => CertificateManager.createCertificateRecord(caRecord, tx), + { priority: PRIORITY_BACKGROUND, label: 'cert-k8s-create-ca-record' } + ) + } } } catch (dbError) { // Continue anyway - we at least have the cert/key @@ -309,7 +324,7 @@ async function getCAFromDirect (ca) { } } -async function getCAFromInput (ca) { +async function getCAFromInput (ca, transaction) { if (!ca) { return null } @@ -319,11 +334,11 @@ async function getCAFromInput (ca) { switch (caType) { case CA_TYPES.K8S_SECRET.toLowerCase(): - return getCAFromK8sSecret(ca.secretName) + return getCAFromK8sSecret(ca.secretName, transaction) case CA_TYPES.DIRECT.toLowerCase(): if (ca.secretName) { // If secretName is provided, load from internal secret storage - const caData = await loadCA(ca.secretName) + const caData = await loadCA(ca.secretName, transaction) return getCAFromDirect(caData) } return getCAFromDirect(ca) @@ -345,7 +360,8 @@ async function generateCertificate ({ hosts, expiration = 5 * 365 * 24 * 60 * 60 * 1000, ca, - isRenewal = false + isRenewal = false, + transaction }) { try { return await _generateCertificateBody({ @@ -354,7 +370,8 @@ async function generateCertificate ({ hosts, expiration, ca, - isRenewal + isRenewal, + transaction }) } catch (error) { logger.error(`Certificate generation failed for ${name}:`, error.message) @@ -368,9 +385,10 @@ async function _generateCertificateBody ({ hosts, expiration, ca, - isRenewal + isRenewal, + transaction }) { - const caCert = await getCAFromInput(ca) + const caCert = await getCAFromInput(ca, transaction) // Generate RSA key pair const keys = forge.pki.rsa.generateKeyPair(2048) @@ -512,7 +530,7 @@ async function _generateCertificateBody ({ if (isRenewal) { // For renewals, delete the existing secret first try { - await SecretService.deleteSecretEndpoint(name) + await SecretService.deleteSecretEndpoint(name, transaction) } catch (error) { // If the secret doesn't exist, that's okay, just continue if (error.name !== 'NotFoundError') { @@ -522,7 +540,7 @@ async function _generateCertificateBody ({ } // Create new secret with certificate data - await SecretService.createSecretEndpoint(secret) + await SecretService.createSecretEndpoint(secret, transaction) return { cert: certPem, diff --git a/src/utils/k8s-client.js b/src/utils/k8s-client.js index afbd7597..3060aef8 100644 --- a/src/utils/k8s-client.js +++ b/src/utils/k8s-client.js @@ -55,14 +55,29 @@ async function getK8sAppsApi () { return k8sAppsApi } +function _parseK8sErrorBody (body) { + if (body == null) { + return body + } + if (typeof body === 'string') { + try { + return JSON.parse(body) + } catch (_) { + return body + } + } + return body +} + /** * Returns true if the error indicates a Kubernetes 404 Not Found. - * Handles both axios-style (error.response.status) and body.code/body.reason. + * Handles ApiException (client-node v1), axios-style, and Status body payloads. */ function isK8sNotFound (error) { if (!error) return false + if (error.code === 404) return true if (error.response && error.response.status === 404) return true - const body = error.body || (error.response && error.response.body) + const body = _parseK8sErrorBody(error.body || (error.response && error.response.body)) if (body && (body.code === 404 || body.reason === 'NotFound')) return true return false } @@ -72,8 +87,9 @@ function isK8sNotFound (error) { */ function isK8sConflict (error) { if (!error) return false + if (error.code === 409) return true if (error.response && error.response.status === 409) return true - const body = error.body || (error.response && error.response.body) + const body = _parseK8sErrorBody(error.body || (error.response && error.response.body)) if (body && (body.code === 409 || body.reason === 'Conflict')) return true return false } diff --git a/src/websocket/exec-session-manager.js b/src/websocket/exec-session-manager.js index 94d68dd0..048f7bcd 100644 --- a/src/websocket/exec-session-manager.js +++ b/src/websocket/exec-session-manager.js @@ -1,5 +1,6 @@ const WebSocket = require('ws') const logger = require('../logger') +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') const MicroserviceExecSessionManager = require('../data/managers/microservice-exec-session-manager') const ChangeTrackingService = require('../services/change-tracking-service') const FogManager = require('../data/managers/iofog-manager') @@ -15,6 +16,7 @@ class ExecSessionManager { this.execSessions = new Map() this.config = config this.cleanupInterval = null + this.expiredSessionHandler = null this.startCleanupInterval() logger.info('ExecSessionManager initialized with config:' + JSON.stringify({ execPendingTimeoutMs: config.session.execPendingTimeoutMs, @@ -46,7 +48,14 @@ class ExecSessionManager { createdAt: Date.now(), transaction, queueBridgeEnabled: false, - metricsActive: false + metricsActive: false, + activationSent: false, + remoteAgentPaired: false, + remoteUserPaired: false, + pendingPairingTimer: null, + pairingStartedAt: null, + pairingMetricsStarted: false, + pairingCompleted: false } this.execSessions.set(sessionId, session) return session @@ -66,6 +75,10 @@ class ExecSessionManager { return sessions } + setExpiredSessionHandler (handler) { + this.expiredSessionHandler = typeof handler === 'function' ? handler : null + } + updateLastActivity (sessionId) { const session = this.execSessions.get(sessionId) if (session) { @@ -73,9 +86,19 @@ class ExecSessionManager { } } + detachLocalExecSession (sessionId) { + const session = this.execSessions.get(sessionId) + if (!session || session.removing) { + return + } + this.execSessions.delete(sessionId) + } + async removeExecSession (sessionId, transaction) { const session = this.execSessions.get(sessionId) - if (!session) return + if (!session || session.removing) return + + session.removing = true if (session.agent && session.agent.readyState === WebSocket.OPEN) { session.agent.close() @@ -124,8 +147,12 @@ class ExecSessionManager { let isExpired = false - if (!session.agent && session.user) { + if (!session.agent && !session.remoteAgentPaired && session.user) { isExpired = timeSinceCreation > pendingTimeout + } else if (session.user && !session.agent && session.remoteAgentPaired) { + isExpired = timeSinceLastActivity > maxDuration + } else if (session.agent && !session.user && session.remoteUserPaired) { + isExpired = timeSinceLastActivity > maxDuration } else if (session.agent && !session.user) { isExpired = timeSinceLastActivity > pendingTimeout } else if (session.agent && session.user) { @@ -140,42 +167,43 @@ class ExecSessionManager { } for (const sessionId of expiredSessions) { - logger.info('Cleaning up expired exec session:' + JSON.stringify({ sessionId })) const session = this.execSessions.get(sessionId) - if (session && session.user && session.user.readyState === WebSocket.OPEN) { - try { - session.user.close(1008, session.agent ? 'Exec session max duration exceeded' : 'Timeout waiting for agent connection') - } catch (error) { - logger.warn('Failed to close expired exec user connection:' + error.message) - } - } - if (session && session.agent && session.agent.readyState === WebSocket.OPEN) { - try { - session.agent.close(1000, 'Exec session expired') - } catch (error) { - logger.warn('Failed to close expired exec agent connection:' + error.message) - } + if (this.expiredSessionHandler) { + await this.expiredSessionHandler(sessionId, session, transaction) + } else { + await this._removeExpiredExecSession(sessionId, session, transaction) } - await this.removeExecSession(sessionId, transaction) } return expiredSessions.length } + async _removeExpiredExecSession (sessionId, session, transaction) { + logger.info('Cleaning up expired exec session:' + JSON.stringify({ sessionId })) + if (session && session.user && session.user.readyState === WebSocket.OPEN) { + try { + session.user.close(1008, (session.agent || session.remoteAgentPaired) ? 'Exec session max duration exceeded' : 'Timeout waiting for agent connection') + } catch (error) { + logger.warn('Failed to close expired exec user connection:' + error.message) + } + } + if (session && session.agent && session.agent.readyState === WebSocket.OPEN) { + try { + session.agent.close(1000, 'Exec session expired') + } catch (error) { + logger.warn('Failed to close expired exec agent connection:' + error.message) + } + } + await this.removeExecSession(sessionId, transaction) + } + startCleanupInterval () { const interval = this.config.session.cleanupInterval || 30000 this.cleanupInterval = setInterval(async () => { try { - const models = require('../data/models') - const sequelize = models.sequelize - if (!sequelize) { - logger.warn('Sequelize not available, skipping exec session cleanup') - return - } - - await sequelize.transaction(async (transaction) => { + await runInTransaction(async (transaction) => { await this.cleanupExpiredSessions(transaction) - }) + }, { priority: PRIORITY_BACKGROUND, label: 'ws.execSessionCleanup' }) } catch (error) { logger.error('Error during exec session cleanup:' + JSON.stringify({ error: error.message, diff --git a/src/websocket/log-session-manager.js b/src/websocket/log-session-manager.js index a84b63ec..9f10b5df 100644 --- a/src/websocket/log-session-manager.js +++ b/src/websocket/log-session-manager.js @@ -1,5 +1,6 @@ const WebSocket = require('ws') const logger = require('../logger') +const { runInTransaction, PRIORITY_BACKGROUND } = require('../helpers/transaction-runner') const MicroserviceLogStatusManager = require('../data/managers/microservice-log-status-manager') const FogLogStatusManager = require('../data/managers/fog-log-status-manager') const ChangeTrackingService = require('../services/change-tracking-service') @@ -16,6 +17,7 @@ class LogSessionManager { this.logSessions = new Map() // Map this.config = config this.cleanupInterval = null + this.expiredSessionHandler = null this.startCleanupInterval() logger.info('LogSessionManager initialized with config:' + JSON.stringify({ logPendingTimeoutMs: config.session.logPendingTimeoutMs, @@ -46,7 +48,13 @@ class LogSessionManager { tailConfig, // Per-session tail configuration lastActivity: Date.now(), createdAt: Date.now(), - transaction + transaction, + remoteAgentPaired: false, + remoteUserPaired: false, + pendingPairingTimer: null, + pairingStartedAt: null, + pairingMetricsStarted: false, + pairingCompleted: false } this.logSessions.set(sessionId, session) return session @@ -67,6 +75,10 @@ class LogSessionManager { return sessions } + setExpiredSessionHandler (handler) { + this.expiredSessionHandler = typeof handler === 'function' ? handler : null + } + updateLastActivity (sessionId) { const session = this.logSessions.get(sessionId) if (session) { @@ -74,9 +86,19 @@ class LogSessionManager { } } + detachLocalLogSession (sessionId) { + const session = this.logSessions.get(sessionId) + if (!session || session.removing) { + return + } + this.logSessions.delete(sessionId) + } + async removeLogSession (sessionId, transaction) { const session = this.logSessions.get(sessionId) - if (!session) return + if (!session || session.removing) return + + session.removing = true // Close connections if (session.agent && session.agent.readyState === WebSocket.OPEN) { @@ -148,8 +170,12 @@ class LogSessionManager { let isExpired = false - if (!session.agent && session.user) { + if (!session.agent && !session.remoteAgentPaired && session.user) { isExpired = timeSinceCreation > pendingTimeout + } else if (session.user && !session.agent && session.remoteAgentPaired) { + isExpired = timeSinceLastActivity > idleTimeout + } else if (session.agent && !session.user && session.remoteUserPaired) { + isExpired = timeSinceLastActivity > idleTimeout } else if (session.agent && !session.user) { isExpired = timeSinceLastActivity > pendingTimeout } else if (session.agent && session.user) { @@ -164,42 +190,43 @@ class LogSessionManager { } for (const sessionId of expiredSessions) { - logger.info('Cleaning up expired log session:' + JSON.stringify({ sessionId })) const session = this.logSessions.get(sessionId) - if (session && session.user && session.user.readyState === WebSocket.OPEN) { - try { - session.user.close(1008, session.agent ? 'Log session idle timeout' : 'Timeout waiting for agent connection') - } catch (error) { - logger.warn('Failed to close expired log user connection:' + error.message) - } - } - if (session && session.agent && session.agent.readyState === WebSocket.OPEN) { - try { - session.agent.close(1000, 'Log session expired') - } catch (error) { - logger.warn('Failed to close expired log agent connection:' + error.message) - } + if (this.expiredSessionHandler) { + await this.expiredSessionHandler(sessionId, session, transaction) + } else { + await this._removeExpiredLogSession(sessionId, session, transaction) } - await this.removeLogSession(sessionId, transaction) } return expiredSessions.length } + async _removeExpiredLogSession (sessionId, session, transaction) { + logger.info('Cleaning up expired log session:' + JSON.stringify({ sessionId })) + if (session && session.user && session.user.readyState === WebSocket.OPEN) { + try { + session.user.close(1008, (session.agent || session.remoteAgentPaired) ? 'Log session idle timeout' : 'Timeout waiting for agent connection') + } catch (error) { + logger.warn('Failed to close expired log user connection:' + error.message) + } + } + if (session && session.agent && session.agent.readyState === WebSocket.OPEN) { + try { + session.agent.close(1000, 'Log session expired') + } catch (error) { + logger.warn('Failed to close expired log agent connection:' + error.message) + } + } + await this.removeLogSession(sessionId, transaction) + } + startCleanupInterval () { const interval = this.config.session.cleanupInterval || 30000 // Default 30 seconds this.cleanupInterval = setInterval(async () => { try { - const models = require('../data/models') - const sequelize = models.sequelize - if (!sequelize) { - logger.warn('Sequelize not available, skipping log session cleanup') - return - } - - await sequelize.transaction(async (transaction) => { + await runInTransaction(async (transaction) => { await this.cleanupExpiredSessions(transaction) - }) + }, { priority: PRIORITY_BACKGROUND, label: 'ws.logSessionCleanup' }) } catch (error) { logger.error('Error during log session cleanup:' + JSON.stringify({ error: error.message, diff --git a/src/websocket/server.js b/src/websocket/server.js index 88754bf2..eab4ca3f 100644 --- a/src/websocket/server.js +++ b/src/websocket/server.js @@ -11,11 +11,15 @@ const MicroserviceStatusManager = require('../data/managers/microservice-status- const { microserviceState } = require('../enums/microservice-state') const AuthDecorator = require('../decorators/authorization-decorator') const TransactionDecorator = require('../decorators/transaction-decorator') +const transactionRunner = require('../helpers/transaction-runner') +const { PRIORITY_BACKGROUND } = transactionRunner const msgpack = require('@msgpack/msgpack') const { resolveTransport } = require('../services/ws-relay-transport-factory') const { recordExecSessionActive, - recordLogSessionActive + recordLogSessionActive, + recordPendingPairing, + recordPairingDurationMs } = require('./ws-metrics') const AppHelper = require('../helpers/app-helper') const MicroserviceLogStatusManager = require('../data/managers/microservice-log-status-manager') @@ -24,6 +28,8 @@ const FogLogStatusManager = require('../data/managers/fog-log-status-manager') const ChangeTrackingService = require('../services/change-tracking-service') const FogManager = require('../data/managers/iofog-manager') const FogStates = require('../enums/fog-state') +const Sequelize = require('sequelize') +const Op = Sequelize.Op const MESSAGE_TYPES = { STDIN: 0, @@ -44,6 +50,9 @@ const DRAIN_CLOSE_CODE = 1001 const DRAIN_CLOSE_REASON = 'Server draining' // when user WS bufferedAmount exceeds this, drop LOG_LINE silently and emit LOG_ERROR once. const LOG_BACKPRESSURE_BUFFER_BYTES = 256 * 1024 +const EXEC_AGENT_READY_NOTICE = 'Agent connected. Interactive exec is ready.\n' +const LOG_AGENT_READY_NOTICE = 'Agent connected. Log streaming started.\n' +const LOG_AGENT_DISCONNECTED_NOTICE = 'Agent disconnected.\n' const EventService = require('../services/event-service') const { isAuthConfigured: isOidcAuthConfigured } = require('../config/oidc') @@ -174,9 +183,7 @@ class WebSocketServer { if (!session || !session.user || !session.agent) return session.activationSent = false try { - await TransactionDecorator.generateTransaction(async (tx) => { - await this.sendExecActivationToExecSession(session, sessionId, tx) - })() + await this.sendExecActivationToExecSession(session, sessionId) } catch (error) { logger.error('[RELAY] Failed to resend exec activation after relay recovery', { sessionId, @@ -185,6 +192,8 @@ class WebSocketServer { } }) this.pendingCloseTimeouts = new Map() // Track pending CLOSE messages in cross-replica scenarios + this._execCleanupInflight = new Map() + this._logCleanupInflight = new Map() this.haConfig = config.get('server.webSocket.ha') || {} this.isDraining = false this.drainPromise = null @@ -214,6 +223,52 @@ class WebSocketServer { } }) } + + this.execSessionManager.setExpiredSessionHandler((sessionId, session, transaction) => + this._handleExpiredExecSession(sessionId, session, transaction)) + this.logSessionManager.setExpiredSessionHandler((sessionId, session, transaction) => + this._handleExpiredLogSession(sessionId, session, transaction)) + } + + _startWebSocketHeartbeat (ws, { label, sessionId } = {}) { + if (!ws) { + return + } + this._stopWebSocketHeartbeat(ws) + + const intervalMs = Number(this.config.pingInterval) + if (!Number.isFinite(intervalMs) || intervalMs <= 0) { + return + } + + ws._heartbeatTimer = setInterval(() => { + if (ws.readyState === WebSocket.OPEN) { + try { + ws.ping() + } catch (error) { + logger.debug('[WS-HEARTBEAT] Failed to send ping frame', { + label: label || null, + sessionId: sessionId || null, + error: error.message + }) + } + } + }, intervalMs) + + if (!ws._heartbeatCloseRegistered) { + ws._heartbeatCloseRegistered = true + ws.on('close', () => { + this._stopWebSocketHeartbeat(ws) + }) + } + } + + _stopWebSocketHeartbeat (ws) { + if (!ws || ws._heartbeatTimer == null) { + return + } + clearInterval(ws._heartbeatTimer) + ws._heartbeatTimer = null } // MessagePack encoding/decoding helpers with improved error handling @@ -348,73 +403,1038 @@ class WebSocketServer { // Don't let the error crash the process }) - process.on('unhandledRejection', (reason, promise) => { - logger.error({ - msg: 'Unhandled rejection in process (registered by WebSocket server)', - reason: formatErrorForLog(reason), - promise: { - type: promise && promise.constructor ? promise.constructor.name : typeof promise - } + process.on('unhandledRejection', (reason, promise) => { + logger.error({ + msg: 'Unhandled rejection in process (registered by WebSocket server)', + reason: formatErrorForLog(reason), + promise: { + type: promise && promise.constructor ? promise.constructor.name : typeof promise + } + }) + // Don't let the error crash the process + }) + + processErrorHandlersRegistered = true + } + } + + getLogConcurrencyLimit () { + return this.sessionConfig.logMaxConcurrentPerResource || 5 + } + + getExecConcurrencyLimit () { + return this.sessionConfig.execMaxConcurrentPerResource || 5 + } + + getLogTailMaxLines () { + return this.sessionConfig.logTailMaxLines || 5000 + } + + getExecPendingTimeoutMs () { + return this.sessionConfig.execPendingTimeoutMs || 60000 + } + + getLogPendingTimeoutMs () { + return this.sessionConfig.logPendingTimeoutMs || 120000 + } + + getDrainTimeoutMs () { + return this.sessionConfig.drainTimeoutMs || 30000 + } + + isCrossReplicaSession (session) { + return !!(session && (!session.agent || !session.user)) + } + + async requireRelayForCrossReplica (ws) { + if (this.haConfig.failFastOnRouterUnavailable === false) { + return true + } + const available = await this.relayTransport.isAvailable() + if (!available) { + logger.warn('[RELAY] Relay backend unavailable for cross-replica session', { + transport: this.relayTransport.getTransport() + }) + if (ws && ws.readyState === WebSocket.OPEN) { + ws.close(RELAY_UNAVAILABLE_CLOSE_CODE, RELAY_UNAVAILABLE_CLOSE_REASON) + } + return false + } + return true + } + + _scheduleRelaySetupAfterCommit (label, setupFn) { + setImmediate(async () => { + try { + await setupFn() + } catch (error) { + logger.error(`Failed to ${label}:` + JSON.stringify({ + error: error.message, + stack: error.stack + })) + } + }) + } + + _runDedupedSessionCleanup (inflightMap, sessionId, label, cleanupFn) { + const existing = inflightMap.get(sessionId) + if (existing) { + return existing + } + + const promise = transactionRunner.runInTransaction( + cleanupFn, + { priority: PRIORITY_BACKGROUND, label } + ).finally(() => { + inflightMap.delete(sessionId) + }) + + inflightMap.set(sessionId, promise) + return promise + } + + async _cleanupLogSessionInTransaction (sessionId) { + return this._runDedupedSessionCleanup( + this._logCleanupInflight, + sessionId, + 'ws.log.cleanup', + (transaction) => this.cleanupLogSession(sessionId, transaction) + ) + } + + async _cleanupExecSessionInTransaction (sessionId) { + return this._runDedupedSessionCleanup( + this._execCleanupInflight, + sessionId, + 'ws.exec.cleanup', + (transaction) => this.cleanupExecSession(sessionId, transaction) + ) + } + + _isExecSessionAgentPaired (session) { + return !!(session && (session.agent || session.remoteAgentPaired)) + } + + _isExecSessionUserPaired (session) { + return !!(session && (session.user || session.remoteUserPaired)) + } + + _isLogSessionAgentPaired (session) { + return !!(session && (session.agent || session.remoteAgentPaired)) + } + + _isLogSessionUserPaired (session) { + return !!(session && (session.user || session.remoteUserPaired)) + } + + _clearPendingPairingTimer (session) { + if (session && session.pendingPairingTimer) { + clearTimeout(session.pendingPairingTimer) + session.pendingPairingTimer = null + } + } + + _startExecPendingPairingMetrics (session) { + if (!session || session.pairingMetricsStarted) { + return + } + session.pairingMetricsStarted = true + session.pairingStartedAt = Date.now() + recordPendingPairing(1) + } + + _startLogPendingPairingMetrics (session) { + if (!session || session.pairingMetricsStarted) { + return + } + session.pairingMetricsStarted = true + session.pairingStartedAt = Date.now() + recordPendingPairing(1) + } + + _recordPairingCompleted (session) { + if (!session || session.pairingCompleted || !session.pairingMetricsStarted) { + return + } + session.pairingCompleted = true + recordPendingPairing(-1) + if (session.pairingStartedAt != null) { + recordPairingDurationMs(Date.now() - session.pairingStartedAt) + } + } + + _abortPendingPairingMetrics (session) { + if (!session || session.pairingCompleted || !session.pairingMetricsStarted) { + return + } + session.pairingCompleted = true + recordPendingPairing(-1) + } + + _markExecAgentPaired (sessionId, { notifyUser = false, source = 'local' } = {}) { + const session = this.execSessionManager.getExecSession(sessionId) + if (!session) { + return false + } + const wasPaired = this._isExecSessionAgentPaired(session) + session.remoteAgentPaired = true + this._clearPendingPairingTimer(session) + if (!wasPaired) { + this._recordPairingCompleted(session) + } + if (notifyUser && session.user && session.user.readyState === WebSocket.OPEN) { + try { + const readyMsg = { + type: MESSAGE_TYPES.STDERR, + data: Buffer.from(EXEC_AGENT_READY_NOTICE), + sessionId, + microserviceUuid: session.microserviceUuid, + execId: sessionId, + timestamp: Date.now() + } + session.user.send(this.encodeMessage(readyMsg), { binary: true }) + } catch (error) { + logger.warn('Failed to notify user that exec agent connected:' + JSON.stringify({ + sessionId, + source, + error: error.message + })) + } + } + return true + } + + _markExecUserPaired (sessionId, { source = 'relay-notify' } = {}) { + const session = this.execSessionManager.getExecSession(sessionId) + if (!session) { + return false + } + session.remoteUserPaired = true + session.lastActivity = Date.now() + logger.info('Exec remote user paired:' + JSON.stringify({ sessionId, source })) + return true + } + + _markLogUserPaired (sessionId, { source = 'relay-notify' } = {}) { + const session = this.logSessionManager.getLogSession(sessionId) + if (!session) { + return false + } + session.remoteUserPaired = true + session.lastActivity = Date.now() + logger.info('Log remote user paired:' + JSON.stringify({ + sessionId, + source, + microserviceUuid: session.microserviceUuid, + fogUuid: session.fogUuid + })) + return true + } + + _markLogAgentPaired (sessionId, { notifyUser = false, source = 'local' } = {}) { + const session = this.logSessionManager.getLogSession(sessionId) + if (!session) { + return false + } + const wasPaired = this._isLogSessionAgentPaired(session) + session.remoteAgentPaired = true + this._clearPendingPairingTimer(session) + if (!wasPaired) { + this._recordPairingCompleted(session) + } + if (notifyUser && session.user && session.user.readyState === WebSocket.OPEN) { + try { + const agentConnectedMsg = { + type: MESSAGE_TYPES.LOG_LINE, + data: Buffer.from(LOG_AGENT_READY_NOTICE), + sessionId, + timestamp: Date.now(), + microserviceUuid: session.microserviceUuid || null, + iofogUuid: session.fogUuid || null + } + session.user.send(this.encodeMessage(agentConnectedMsg), { binary: true }) + logger.info('Notified user that agent connected for log session:' + JSON.stringify({ + sessionId, + source, + microserviceUuid: session.microserviceUuid, + fogUuid: session.fogUuid + })) + } catch (error) { + logger.warn('Failed to notify user that log agent connected:' + JSON.stringify({ + sessionId, + source, + error: error.message + })) + } + } + return true + } + + async _sendExecActivationViaRelay (sessionId, microserviceUuid) { + if (!this.relayTransport.shouldUseRelay(sessionId)) { + return false + } + const activationMsg = { + type: MESSAGE_TYPES.ACTIVATION, + data: Buffer.from(JSON.stringify({ + sessionId, + execId: sessionId, + microserviceUuid, + timestamp: Date.now() + })), + sessionId, + microserviceUuid, + execId: sessionId, + timestamp: Date.now() + } + try { + await this.relayTransport.publishToAgent( + sessionId, + this.encodeMessage(activationMsg), + { messageType: MESSAGE_TYPES.ACTIVATION } + ) + const session = this.execSessionManager.getExecSession(sessionId) + if (session) { + session.activationSent = true + } + logger.info('[RELAY] Exec activation published to agent via relay:' + JSON.stringify({ + sessionId, + microserviceUuid + })) + return true + } catch (error) { + logger.error('[RELAY] Failed to publish exec activation via relay:' + JSON.stringify({ + sessionId, + microserviceUuid, + error: error.message + })) + return false + } + } + + async _notifyExecUserViaRelay (sessionId, microserviceUuid) { + if (!this.relayTransport.shouldUseRelay(sessionId)) { + return false + } + const readyMsg = { + type: MESSAGE_TYPES.STDERR, + data: Buffer.from(EXEC_AGENT_READY_NOTICE), + sessionId, + microserviceUuid, + execId: sessionId, + timestamp: Date.now() + } + try { + await this.relayTransport.publishToUser( + sessionId, + this.encodeMessage(readyMsg), + { messageType: MESSAGE_TYPES.STDERR } + ) + logger.info('[RELAY] Exec user ready notice published via relay:' + JSON.stringify({ + sessionId, + microserviceUuid + })) + return true + } catch (error) { + logger.error('[RELAY] Failed to notify exec user via relay:' + JSON.stringify({ + sessionId, + microserviceUuid, + error: error.message + })) + return false + } + } + + async _notifyLogUserViaRelay (sessionId, { microserviceUuid, fogUuid, message }) { + if (!this.relayTransport.shouldUseRelayForLogs(sessionId)) { + return false + } + const notifyMsg = { + type: MESSAGE_TYPES.LOG_LINE, + data: Buffer.from(message), + sessionId, + timestamp: Date.now(), + microserviceUuid: microserviceUuid || null, + iofogUuid: fogUuid || null + } + try { + await this.relayTransport.publishLogToUser(sessionId, this.encodeMessage(notifyMsg)) + return true + } catch (error) { + logger.error('[RELAY] Failed to notify log user via relay:' + JSON.stringify({ + sessionId, + microserviceUuid, + fogUuid, + error: error.message + })) + return false + } + } + + _registerExecUserRelayPairingHook (sessionId) { + if (typeof this.relayTransport.setExecUserDeliveryHook !== 'function') { + return + } + this.relayTransport.setExecUserDeliveryHook(sessionId, (buffer) => { + this._onExecUserRelayDelivery(sessionId, buffer) + }) + } + + _registerExecAgentRelayActivityHook (sessionId) { + if (typeof this.relayTransport.setExecAgentDeliveryHook !== 'function') { + return + } + this.relayTransport.setExecAgentDeliveryHook(sessionId, (buffer) => { + this._onExecAgentRelayDelivery(sessionId, buffer) + }) + } + + _registerLogUserRelayPairingHook (sessionId) { + if (typeof this.relayTransport.setLogUserDeliveryHook !== 'function') { + return + } + this.relayTransport.setLogUserDeliveryHook(sessionId, (buffer) => { + this._onLogUserRelayDelivery(sessionId, buffer) + }) + } + + _onExecUserRelayDelivery (sessionId, buffer) { + const session = this.execSessionManager.getExecSession(sessionId) + if (session) { + session.lastActivity = Date.now() + } + try { + const msg = this.decodeMessage(buffer) + if (msg.type === MESSAGE_TYPES.STDERR && msg.data) { + const text = msg.data.toString() + if (text.includes('Interactive exec is ready')) { + this._markExecAgentPaired(sessionId, { notifyUser: false, source: 'relay-notify' }) + } + } + } catch (error) { + logger.debug('Ignoring exec user relay delivery hook decode error:' + JSON.stringify({ + sessionId, + error: error.message + })) + } + } + + _onExecAgentRelayDelivery (sessionId, buffer) { + const session = this.execSessionManager.getExecSession(sessionId) + if (session) { + session.lastActivity = Date.now() + } + try { + this.decodeMessage(buffer) + } catch (error) { + logger.debug('Ignoring exec agent relay delivery hook decode error:' + JSON.stringify({ + sessionId, + error: error.message + })) + } + } + + _onLogUserRelayDelivery (sessionId, buffer) { + const session = this.logSessionManager.getLogSession(sessionId) + if (session) { + session.lastActivity = Date.now() + } + try { + const msg = this.decodeMessage(buffer) + if (msg.type === MESSAGE_TYPES.LOG_LINE && msg.data) { + const text = msg.data.toString() + if (text.includes('Log streaming started')) { + this._markLogAgentPaired(sessionId, { notifyUser: false, source: 'relay-notify' }) + } + } + } catch (error) { + logger.debug('Ignoring log user relay delivery hook decode error:' + JSON.stringify({ + sessionId, + error: error.message + })) + } + } + + async _checkExecAgentPairedInDb (sessionId, transaction) { + const row = await MicroserviceExecSessionManager.findBySessionId(sessionId, transaction) + return !!(row && row.agentConnected) + } + + async _checkExecUserConnectedInDb (sessionId, transaction) { + const row = await MicroserviceExecSessionManager.findBySessionId(sessionId, transaction) + return !!(row && row.userConnected) + } + + async _checkLogAgentPairedInDb (sessionId, microserviceUuid, fogUuid, transaction) { + let row = null + if (microserviceUuid) { + row = await MicroserviceLogStatusManager.findOne({ sessionId }, transaction) + } else if (fogUuid) { + row = await FogLogStatusManager.findOne({ sessionId }, transaction) + } + return !!(row && row.agentConnected) + } + + async _checkLogUserConnectedInDb (sessionId, microserviceUuid, fogUuid, transaction) { + let row = null + if (microserviceUuid) { + row = await MicroserviceLogStatusManager.findOne({ sessionId }, transaction) + } else if (fogUuid) { + row = await FogLogStatusManager.findOne({ sessionId }, transaction) + } + return !!(row && row.userConnected) + } + + async _isLogUserStillConnected (sessionId, session, microserviceUuid, fogUuid) { + if (session && session.user) { + return true + } + try { + return await transactionRunner.runInTransaction( + (tx) => this._checkLogUserConnectedInDb(sessionId, microserviceUuid, fogUuid, tx), + { priority: PRIORITY_BACKGROUND, label: 'ws.log.user-connected-db-check' } + ) + } catch (error) { + logger.warn('Log user-connected DB check failed:' + JSON.stringify({ + sessionId, + error: error.message + })) + return false + } + } + + async _isExecUserStillConnected (sessionId, session) { + if (session && session.user) { + return true + } + try { + return await transactionRunner.runInTransaction( + (tx) => this._checkExecUserConnectedInDb(sessionId, tx), + { priority: PRIORITY_BACKGROUND, label: 'ws.exec.user-connected-db-check' } + ) + } catch (error) { + logger.warn('Exec user-connected DB check failed:' + JSON.stringify({ + sessionId, + error: error.message + })) + return false + } + } + + async _notifyExecRemotePeerClose (sessionId, session, reason = 'Exec session expired') { + if (!session || !this.relayTransport.shouldUseRelay(sessionId)) { + return + } + + const closeMsg = { + type: MESSAGE_TYPES.CLOSE, + execId: sessionId, + sessionId, + microserviceUuid: session.microserviceUuid, + timestamp: Date.now(), + data: Buffer.from(reason) + } + const encoded = this.encodeMessage(closeMsg) + + try { + if (session.agent && !session.user && session.remoteUserPaired) { + await this.relayTransport.publishToUser(sessionId, encoded, { messageType: MESSAGE_TYPES.CLOSE }) + } else if (session.user && !session.agent && session.remoteAgentPaired) { + await this.relayTransport.publishToAgent(sessionId, encoded, { messageType: MESSAGE_TYPES.CLOSE }) + } + } catch (error) { + logger.error('[WS-CLOSE] Failed to notify remote exec peer via relay during session close', { + sessionId, + error: error.message + }) + } + } + + async _handleExpiredExecSession (sessionId, session, transaction) { + logger.info('Cleaning up expired exec session:' + JSON.stringify({ sessionId })) + if (session && session.user && session.user.readyState === WebSocket.OPEN) { + try { + session.user.close( + 1008, + (session.agent || session.remoteAgentPaired) + ? 'Exec session max duration exceeded' + : 'Timeout waiting for agent connection' + ) + } catch (error) { + logger.warn('Failed to close expired exec user connection:' + error.message) + } + } + if (session && session.agent && session.agent.readyState === WebSocket.OPEN) { + try { + session.agent.close(1000, 'Exec session expired') + } catch (error) { + logger.warn('Failed to close expired exec agent connection:' + error.message) + } + } + + await this._notifyExecRemotePeerClose(sessionId, session, 'Exec session expired') + await this._cleanupExecSessionInTransaction(sessionId) + } + + async _handleExpiredLogSession (sessionId, session, transaction) { + logger.info('Cleaning up expired log session:' + JSON.stringify({ sessionId })) + if (session && session.user && session.user.readyState === WebSocket.OPEN) { + try { + session.user.close( + 1008, + (session.agent || session.remoteAgentPaired) + ? 'Log session idle timeout' + : 'Timeout waiting for agent connection' + ) + } catch (error) { + logger.warn('Failed to close expired log user connection:' + error.message) + } + } + if (session && session.agent && session.agent.readyState === WebSocket.OPEN) { + try { + session.agent.close(1000, 'Log session expired') + } catch (error) { + logger.warn('Failed to close expired log agent connection:' + error.message) + } + } + + if (session && this.relayTransport.shouldUseRelayForLogs(sessionId)) { + if (session.agent && !session.user && session.remoteUserPaired) { + await this._notifyLogUserViaRelay(sessionId, { + microserviceUuid: session.microserviceUuid, + fogUuid: session.fogUuid, + message: 'Log session ended.\n' + }).catch((error) => { + logger.error('[WS-CLOSE] Failed to notify remote log user via relay during session expiry', { + sessionId, + error: error.message + }) + }) + } + } + + await this._cleanupLogSessionInTransaction(sessionId) + } + + _scheduleExecPendingPairingTimeout (sessionId, userWs, microserviceUuid) { + const session = this.execSessionManager.getExecSession(sessionId) + if (!session) { + return + } + this._clearPendingPairingTimer(session) + const timeoutMs = this.getExecPendingTimeoutMs() + session.pendingPairingTimer = setTimeout(() => { + this._handleExecPendingTimeout(sessionId, userWs, microserviceUuid).catch((error) => { + logger.warn('Exec pending timeout handler failed:' + error.message) + }) + }, timeoutMs) + } + + _scheduleLogPendingPairingTimeout (sessionId, userWs, microserviceUuid, fogUuid) { + const session = this.logSessionManager.getLogSession(sessionId) + if (!session) { + return + } + this._clearPendingPairingTimer(session) + const timeoutMs = this.getLogPendingTimeoutMs() + session.pendingPairingTimer = setTimeout(() => { + this._handleLogPendingTimeout(sessionId, userWs, microserviceUuid, fogUuid).catch((error) => { + logger.warn('Log pending timeout handler failed:' + error.message) + }) + }, timeoutMs) + } + + async _handleExecPendingTimeout (sessionId, userWs, microserviceUuid) { + const session = this.execSessionManager.getExecSession(sessionId) + if (!session || this._isExecSessionAgentPaired(session)) { + return + } + + try { + const pairedInDb = await transactionRunner.runInTransaction( + (tx) => this._checkExecAgentPairedInDb(sessionId, tx), + { priority: PRIORITY_BACKGROUND, label: 'ws.exec.pending-db-check' } + ) + if (pairedInDb) { + this._markExecAgentPaired(sessionId, { notifyUser: false, source: 'db-fallback' }) + return + } + } catch (error) { + logger.warn('Exec pending timeout DB check failed:' + JSON.stringify({ + sessionId, + error: error.message + })) + } + + this._abortPendingPairingMetrics(session) + logger.warn('Exec session pending timeout:' + JSON.stringify({ + sessionId, + microserviceUuid, + timeout: this.getExecPendingTimeoutMs() + })) + try { + if (userWs.readyState === WebSocket.OPEN) { + const timeoutMsg = { + type: MESSAGE_TYPES.STDERR, + data: Buffer.from('Timeout waiting for agent connection.\n'), + sessionId, + microserviceUuid, + execId: sessionId, + timestamp: Date.now() + } + userWs.send(this.encodeMessage(timeoutMsg), { binary: true }) + userWs.close(1008, 'Timeout waiting for agent connection') + } + } catch (error) { + logger.warn('Failed to close exec session on pending timeout:' + error.message) + } + } + + async _handleLogPendingTimeout (sessionId, userWs, microserviceUuid, fogUuid) { + const session = this.logSessionManager.getLogSession(sessionId) + if (!session || this._isLogSessionAgentPaired(session)) { + return + } + + try { + const pairedInDb = await transactionRunner.runInTransaction( + (tx) => this._checkLogAgentPairedInDb(sessionId, microserviceUuid, fogUuid, tx), + { priority: PRIORITY_BACKGROUND, label: 'ws.log.pending-db-check' } + ) + if (pairedInDb) { + this._markLogAgentPaired(sessionId, { notifyUser: false, source: 'db-fallback' }) + return + } + } catch (error) { + logger.warn('Log pending timeout DB check failed:' + JSON.stringify({ + sessionId, + error: error.message + })) + } + + this._abortPendingPairingMetrics(session) + logger.warn('Log session pending timeout:' + JSON.stringify({ + sessionId, + microserviceUuid, + fogUuid, + timeout: this.getLogPendingTimeoutMs() + })) + try { + if (userWs.readyState === WebSocket.OPEN) { + const timeoutMsg = { + type: MESSAGE_TYPES.LOG_LINE, + data: Buffer.from('Timeout waiting for agent connection.\n'), + sessionId, + timestamp: Date.now(), + microserviceUuid: microserviceUuid || null, + iofogUuid: fogUuid || null + } + userWs.send(this.encodeMessage(timeoutMsg), { binary: true }) + userWs.close(1008, 'Timeout waiting for agent connection') + } + } catch (error) { + logger.warn('Failed to close log session on pending timeout:' + error.message) + } + } + + async _detachExecSessionLocal (sessionId) { + const session = this.execSessionManager.getExecSession(sessionId) + if (session) { + this._clearPendingPairingTimer(session) + if (session.metricsActive) { + recordExecSessionActive(-1) + session.metricsActive = false + } + } + this.execSessionManager.detachLocalExecSession(sessionId) + await this.relayTransport.cleanup(sessionId) + .catch((error) => { + logger.warn('[RELAY] Failed to cleanup exec relay bridge during local detach', { + sessionId, + error: error.message + }) + }) + } + + async _detachLogSessionLocal (sessionId) { + const session = this.logSessionManager.getLogSession(sessionId) + if (session) { + this._clearPendingPairingTimer(session) + if (session.metricsActive) { + recordLogSessionActive(-1) + session.metricsActive = false + } + } + this.logBackpressureNotified.delete(sessionId) + this.logSessionManager.detachLocalLogSession(sessionId) + await this.relayTransport.cleanupLogSession(sessionId) + .catch((error) => { + logger.warn('[RELAY] Failed to cleanup log relay bridge during local detach', { + sessionId, + error: error.message + }) + }) + logger.info('Log session local detach complete:' + JSON.stringify({ + sessionId, + microserviceUuid: session ? session.microserviceUuid || null : null, + fogUuid: session ? session.fogUuid || null : null + })) + } + + async _handleAgentExecPartialDisconnect (sessionId, currentSession, fog) { + currentSession.agent = null + currentSession.activationSent = false + currentSession.remoteAgentPaired = false + currentSession.remoteUserPaired = false + currentSession.lastActivity = Date.now() + + await TransactionDecorator.generateTransaction(async (closeTransaction) => { + await MicroserviceExecSessionManager.update( + { sessionId }, + { agentConnected: false }, + closeTransaction + ) + await ChangeTrackingService.update( + fog.uuid, + ChangeTrackingService.events.microserviceExecSessions, + closeTransaction + ) + })() + + const relayEnabled = this.relayTransport.shouldUseRelay(sessionId) + if (relayEnabled) { + try { + const closeMsg = { + type: MESSAGE_TYPES.CLOSE, + execId: sessionId, + sessionId, + microserviceUuid: currentSession.microserviceUuid, + timestamp: Date.now(), + data: Buffer.from('Agent closed connection') + } + const encoded = this.encodeMessage(closeMsg) + await this.relayTransport.publishToUser(sessionId, encoded, { messageType: MESSAGE_TYPES.CLOSE }) + } catch (error) { + logger.error('[WS-CLOSE] Failed to send CLOSE to user via relay after agent exec disconnect', { + sessionId, + error: error.message + }) + } + } else if (currentSession.user && currentSession.user.readyState === WebSocket.OPEN) { + currentSession.user.close(1000, 'Agent closed connection') + } + + if (!currentSession.user) { + await this._detachExecSessionLocal(sessionId) + } + } + + async _handleUserExecPartialDisconnect (sessionId, currentSession, microserviceUuid) { + currentSession.user = null + currentSession.remoteAgentPaired = false + currentSession.lastActivity = Date.now() + + await TransactionDecorator.generateTransaction(async (closeTransaction) => { + await MicroserviceExecSessionManager.update( + { sessionId }, + { userConnected: false }, + closeTransaction + ) + const microservice = await MicroserviceManager.findOne( + { uuid: microserviceUuid }, + closeTransaction + ) + if (microservice) { + const fog = await FogManager.findOne({ uuid: microservice.iofogUuid }, closeTransaction) + if (fog) { + await ChangeTrackingService.update( + fog.uuid, + ChangeTrackingService.events.microserviceExecSessions, + closeTransaction + ) + } + } + })() + + if (currentSession.agent) { + if (currentSession.agent.readyState === WebSocket.OPEN) { + currentSession.agent.close(1000, 'User closed connection') + } + await this._cleanupExecSessionInTransaction(sessionId) + return + } + + const relayEnabled = this.relayTransport.shouldUseRelay(sessionId) + if (relayEnabled) { + try { + const closeMsg = { + type: MESSAGE_TYPES.CLOSE, + execId: sessionId, + sessionId, + microserviceUuid: currentSession.microserviceUuid, + timestamp: Date.now(), + data: Buffer.from('User closed connection') + } + const encoded = this.encodeMessage(closeMsg) + await this.relayTransport.publishToAgent(sessionId, encoded, { messageType: MESSAGE_TYPES.CLOSE }) + } catch (error) { + logger.error('[WS-CLOSE] Failed to send CLOSE to agent via relay after user exec disconnect', { + sessionId, + error: error.message }) - // Don't let the error crash the process - }) + } + } - processErrorHandlersRegistered = true + let agentStillConnected = false + try { + agentStillConnected = await transactionRunner.runInTransaction( + (tx) => this._checkExecAgentPairedInDb(sessionId, tx), + { priority: PRIORITY_BACKGROUND, label: 'ws.exec.user-partial-agent-db-check' } + ) + } catch (error) { + logger.warn('Exec user partial disconnect agent DB check failed:' + JSON.stringify({ + sessionId, + error: error.message + })) } - } - getLogConcurrencyLimit () { - return this.sessionConfig.logMaxConcurrentPerResource || 3 + if (!agentStillConnected) { + await this._cleanupExecSessionInTransaction(sessionId) + } else { + await this._detachExecSessionLocal(sessionId) + } } - getExecConcurrencyLimit () { - return this.sessionConfig.execMaxConcurrentPerResource || 3 - } + async _handleUserLogPartialDisconnect (sessionId, session, microserviceUuid, fogUuid) { + session.user = null + session.remoteUserPaired = false + session.lastActivity = Date.now() - getLogTailMaxLines () { - return this.sessionConfig.logTailMaxLines || 5000 - } + await TransactionDecorator.generateTransaction(async (closeTransaction) => { + if (microserviceUuid) { + await MicroserviceLogStatusManager.update( + { sessionId }, + { userConnected: false }, + closeTransaction + ) + } else if (fogUuid) { + await FogLogStatusManager.update( + { sessionId }, + { userConnected: false }, + closeTransaction + ) + } - getExecPendingTimeoutMs () { - return this.sessionConfig.execPendingTimeoutMs || 60000 - } + const fogForTracking = await FogManager.findOne({ + uuid: fogUuid || (await MicroserviceManager.findOne({ uuid: microserviceUuid }, closeTransaction)).iofogUuid + }, closeTransaction) + if (fogForTracking) { + await ChangeTrackingService.update( + fogForTracking.uuid, + fogUuid ? ChangeTrackingService.events.fogLogs : ChangeTrackingService.events.microserviceLogs, + closeTransaction + ) + } + })() - getLogPendingTimeoutMs () { - return this.sessionConfig.logPendingTimeoutMs || 120000 - } + if (session.agent) { + if (session.agent.readyState === WebSocket.OPEN) { + session.agent.close(1000, 'User closed connection') + } + await this._cleanupLogSessionInTransaction(sessionId) + return + } - getDrainTimeoutMs () { - return this.sessionConfig.drainTimeoutMs || 30000 - } + let agentStillConnected = false + try { + agentStillConnected = await transactionRunner.runInTransaction( + (tx) => this._checkLogAgentPairedInDb(sessionId, microserviceUuid, fogUuid, tx), + { priority: PRIORITY_BACKGROUND, label: 'ws.log.user-partial-agent-db-check' } + ) + } catch (error) { + logger.warn('Log user partial disconnect agent DB check failed:' + JSON.stringify({ + sessionId, + error: error.message + })) + } - isCrossReplicaSession (session) { - return !!(session && (!session.agent || !session.user)) + if (!agentStillConnected) { + await this._cleanupLogSessionInTransaction(sessionId) + } else { + await this._detachLogSessionLocal(sessionId) + } } - async requireRelayForCrossReplica (ws) { - if (this.haConfig.failFastOnRouterUnavailable === false) { - return true - } - const available = await this.relayTransport.isAvailable() - if (!available) { - logger.warn('[RELAY] Relay backend unavailable for cross-replica session', { - transport: this.relayTransport.getTransport() - }) - if (ws && ws.readyState === WebSocket.OPEN) { - ws.close(RELAY_UNAVAILABLE_CLOSE_CODE, RELAY_UNAVAILABLE_CLOSE_REASON) + async _handleAgentLogPartialDisconnect (sessionId, session, { microserviceUuid, iofogUuid, logStatus }) { + session.agent = null + session.remoteAgentPaired = false + session.remoteUserPaired = false + session.lastActivity = Date.now() + + await TransactionDecorator.generateTransaction(async (closeTransaction) => { + if (microserviceUuid) { + await MicroserviceLogStatusManager.update( + { sessionId }, + { agentConnected: false }, + closeTransaction + ) + } else if (iofogUuid) { + await FogLogStatusManager.update( + { sessionId }, + { agentConnected: false }, + closeTransaction + ) } - return false + + let fogUuidForTracking = iofogUuid || logStatus.iofogUuid + if (!fogUuidForTracking && logStatus.microserviceUuid) { + const microservice = await MicroserviceManager.findOne( + { uuid: logStatus.microserviceUuid }, + closeTransaction + ) + fogUuidForTracking = microservice ? microservice.iofogUuid : null + } + + if (fogUuidForTracking) { + await ChangeTrackingService.update( + fogUuidForTracking, + iofogUuid ? ChangeTrackingService.events.fogLogs : ChangeTrackingService.events.microserviceLogs, + closeTransaction + ) + } + })() + + const relayEnabled = this.relayTransport.shouldUseRelayForLogs(sessionId) + if (relayEnabled) { + await this._notifyLogUserViaRelay(sessionId, { + microserviceUuid: session.microserviceUuid, + fogUuid: session.fogUuid, + message: LOG_AGENT_DISCONNECTED_NOTICE + }) + } + + if (!session.user) { + await this._detachLogSessionLocal(sessionId) } - return true } async countLogSessionsInDb (microserviceUuid, fogUuid, transaction) { + const activeUserFilter = { + userConnected: true, + status: { [Op.in]: ['PENDING', 'ACTIVE'] } + } if (microserviceUuid) { - const rows = await MicroserviceLogStatusManager.findAll({ microserviceUuid }, transaction) + const rows = await MicroserviceLogStatusManager.findAll({ + microserviceUuid, + ...activeUserFilter + }, transaction) return rows.length } if (fogUuid) { - const rows = await FogLogStatusManager.findAll({ iofogUuid: fogUuid }, transaction) + const rows = await FogLogStatusManager.findAll({ + iofogUuid: fogUuid, + ...activeUserFilter + }, transaction) return rows.length } return 0 @@ -424,7 +1444,11 @@ class WebSocketServer { if (!microserviceUuid) { return 0 } - const rows = await MicroserviceExecSessionManager.findAll({ microserviceUuid }, transaction) + const rows = await MicroserviceExecSessionManager.findAll({ + microserviceUuid, + userConnected: true, + status: { [Op.in]: ['PENDING', 'ACTIVE'] } + }, transaction) return rows.length } @@ -867,6 +1891,8 @@ class WebSocketServer { ) execSession.metricsActive = true recordExecSessionActive(1) + this._startExecPendingPairingMetrics(execSession) + this._startWebSocketHeartbeat(ws, { label: 'user-exec', sessionId }) const activationMsg = { type: MESSAGE_TYPES.ACTIVATION, @@ -899,43 +1925,12 @@ class WebSocketServer { })) } - await this.setupExecMessageForwarding(sessionId, transaction) + this._scheduleRelaySetupAfterCommit( + 'setup exec message forwarding', + () => this.setupExecMessageForwarding(sessionId) + ) - const EXEC_PENDING_TIMEOUT = this.getExecPendingTimeoutMs() - const pendingTimer = setTimeout(async () => { - const session = this.execSessionManager.getExecSession(sessionId) - if (!session || session.agent) { - return - } - logger.warn('Exec session pending timeout:' + JSON.stringify({ - sessionId, - microserviceUuid, - timeout: EXEC_PENDING_TIMEOUT - })) - try { - if (ws.readyState === WebSocket.OPEN) { - const timeoutMsg = { - type: MESSAGE_TYPES.STDERR, - data: Buffer.from('Timeout waiting for agent connection.\n'), - sessionId, - microserviceUuid, - execId: sessionId, - timestamp: Date.now() - } - ws.send(this.encodeMessage(timeoutMsg), { binary: true }) - ws.close(1008, 'Timeout waiting for agent connection') - } - } catch (error) { - logger.warn('Failed to close exec session on pending timeout:' + error.message) - } - try { - await TransactionDecorator.generateTransaction(async (timeoutTransaction) => { - await this.cleanupExecSession(sessionId, timeoutTransaction) - })() - } catch (error) { - logger.error('Failed to remove exec session after pending timeout:' + error.message) - } - }, EXEC_PENDING_TIMEOUT) + this._scheduleExecPendingPairingTimeout(sessionId, ws, microserviceUuid) setImmediate(async () => { try { @@ -957,16 +1952,36 @@ class WebSocketServer { }) ws.on('close', async (code, reason) => { - clearTimeout(pendingTimer) const session = this.execSessionManager.getExecSession(sessionId) if (session) { + this._clearPendingPairingTimer(session) + if (session.pairingMetricsStarted && !session.pairingCompleted) { + this._abortPendingPairingMetrics(session) + } session.user = null session.lastActivity = Date.now() try { - await TransactionDecorator.generateTransaction(async (closeTransaction) => { - await this.cleanupExecSession(sessionId, closeTransaction) - })() + let agentStillConnected = session.agent != null || session.remoteAgentPaired + if (!agentStillConnected) { + try { + agentStillConnected = await transactionRunner.runInTransaction( + (tx) => this._checkExecAgentPairedInDb(sessionId, tx), + { priority: PRIORITY_BACKGROUND, label: 'ws.exec.user-disconnect-db-check' } + ) + } catch (error) { + logger.warn('Exec user disconnect DB check failed:' + JSON.stringify({ + sessionId, + error: error.message + })) + } + } + + if (agentStillConnected) { + await this._handleUserExecPartialDisconnect(sessionId, session, microserviceUuid) + } else { + await this._cleanupExecSessionInTransaction(sessionId) + } } catch (err) { logger.error('Failed to cleanup exec session on user disconnect:' + JSON.stringify({ error: err.message, @@ -1054,27 +2069,12 @@ class WebSocketServer { session.lastActivity = Date.now() session.activationSent = false } + this._startWebSocketHeartbeat(ws, { label: 'agent-exec', sessionId }) - await this.setupExecMessageForwarding(sessionId, transaction) - - if (session.user && session.user.readyState === WebSocket.OPEN) { - try { - const readyMsg = { - type: MESSAGE_TYPES.STDERR, - data: Buffer.from('Agent connected. Interactive exec is ready.\n'), - sessionId, - microserviceUuid, - execId: sessionId, - timestamp: Date.now() - } - session.user.send(this.encodeMessage(readyMsg), { binary: true }) - } catch (error) { - logger.warn('Failed to notify user that exec agent connected:' + JSON.stringify({ - sessionId, - error: error.message - })) - } - } + this._scheduleRelaySetupAfterCommit( + 'setup exec message forwarding', + () => this.setupExecMessageForwarding(sessionId) + ) this.scheduleAgentExecConnectEvent(req, microserviceUuid) @@ -1112,51 +2112,13 @@ class WebSocketServer { ws.on('close', async (code, reason) => { const currentSession = this.execSessionManager.getExecSession(sessionId) if (currentSession) { - currentSession.agent = null - currentSession.lastActivity = Date.now() - try { - await TransactionDecorator.generateTransaction(async (closeTransaction) => { - await MicroserviceExecSessionManager.update( - { sessionId }, - { agentConnected: false }, - closeTransaction - ) - - const relayEnabled = this.relayTransport.shouldUseRelay(sessionId) - - if (!currentSession.user) { - await this.cleanupExecSession(sessionId, closeTransaction) - } else { - if (relayEnabled) { - try { - const closeMsg = { - type: MESSAGE_TYPES.CLOSE, - execId: sessionId, - sessionId, - microserviceUuid: currentSession.microserviceUuid, - timestamp: Date.now(), - data: Buffer.from('Agent closed connection') - } - const encoded = this.encodeMessage(closeMsg) - await this.relayTransport.publishToUser(sessionId, encoded, { messageType: MESSAGE_TYPES.CLOSE }) - } catch (error) { - logger.error('[WS-CLOSE] Failed to send CLOSE to user via queue after agent exec disconnect', { - sessionId, - error: error.message - }) - } - } else if (currentSession.user.readyState === WebSocket.OPEN) { - currentSession.user.close(1000, 'Agent closed connection') - } - - await ChangeTrackingService.update( - fog.uuid, - ChangeTrackingService.events.microserviceExecSessions, - closeTransaction - ) - } - })() + const userStillConnected = await this._isExecUserStillConnected(sessionId, currentSession) + if (userStillConnected) { + await this._handleAgentExecPartialDisconnect(sessionId, currentSession, fog) + } else { + await this._cleanupExecSessionInTransaction(sessionId) + } } catch (err) { logger.error('Failed to handle agent exec disconnect:' + JSON.stringify({ sessionId, @@ -1224,14 +2186,17 @@ class WebSocketServer { // return noisePatterns.some(pattern => pattern.test(output)) // } - async sendExecActivationToExecSession (session, sessionId, transaction) { - if (!session.user || !session.agent) { - return false - } + async sendExecActivationToExecSession (session, sessionId) { if (session.activationSent) { return true } + const hasLocalAgent = session.agent && session.agent.readyState === WebSocket.OPEN + const relayEnabled = this.relayTransport.shouldUseRelay(sessionId) + if (!hasLocalAgent && !relayEnabled) { + return false + } + const activationMsg = { type: MESSAGE_TYPES.ACTIVATION, data: Buffer.from(JSON.stringify({ @@ -1261,7 +2226,7 @@ class WebSocketServer { microserviceUuid: session.microserviceUuid })) if (session.agent) { - await this.cleanupExecSession(sessionId, transaction) + await this._cleanupExecSessionInTransaction(sessionId) } } return success @@ -1271,7 +2236,7 @@ class WebSocketServer { error: error.message })) if (session.agent) { - await this.cleanupExecSession(sessionId, transaction) + await this._cleanupExecSessionInTransaction(sessionId) } return false } @@ -1440,9 +2405,7 @@ class WebSocketServer { for (const sessionId of execSessionIds) { cleanupTasks.push( - TransactionDecorator.generateTransaction(async (tx) => { - await this.cleanupExecSession(sessionId, tx) - })().catch((error) => { + this._cleanupExecSessionInTransaction(sessionId).catch((error) => { logger.warn('[WS-DRAIN] Exec session cleanup failed', { sessionId, error: error.message }) }) ) @@ -1450,9 +2413,7 @@ class WebSocketServer { for (const sessionId of logSessionIds) { cleanupTasks.push( - TransactionDecorator.generateTransaction(async (tx) => { - await this.cleanupLogSession(sessionId, tx) - })().catch((error) => { + this._cleanupLogSessionInTransaction(sessionId).catch((error) => { logger.warn('[WS-DRAIN] Log session cleanup failed', { sessionId, error: error.message }) }) ) @@ -1890,6 +2851,8 @@ class WebSocketServer { ) logSession.metricsActive = true recordLogSessionActive(1) + this._startLogPendingPairingMetrics(logSession) + this._startWebSocketHeartbeat(ws, { label: 'user-log', sessionId }) // 7. Send sessionId to user (MessagePack encoded) const sessionInfoMsg = { @@ -1926,46 +2889,13 @@ class WebSocketServer { })) } - // 9. Setup message forwarding (will be activated when agent connects) - await this.setupLogMessageForwarding(sessionId, transaction) + // 9. Relay setup after DB transaction commits (NATS hub lookup uses background writes). + this._scheduleRelaySetupAfterCommit( + 'setup log message forwarding', + () => this.setupLogMessageForwarding(sessionId) + ) - // Pending timeout: close if agent does not connect within logPendingTimeoutMs - const LOG_PENDING_TIMEOUT = this.getLogPendingTimeoutMs() - const pendingTimer = setTimeout(async () => { - const session = this.logSessionManager.getLogSession(sessionId) - if (!session || session.agent) { - return - } - logger.warn('Log session pending timeout:' + JSON.stringify({ - sessionId, - microserviceUuid, - fogUuid, - timeout: LOG_PENDING_TIMEOUT - })) - try { - if (ws.readyState === WebSocket.OPEN) { - const timeoutMsg = { - type: MESSAGE_TYPES.LOG_LINE, - data: Buffer.from('Timeout waiting for agent connection.\n'), - sessionId, - timestamp: Date.now(), - microserviceUuid: microserviceUuid || null, - iofogUuid: fogUuid || null - } - ws.send(this.encodeMessage(timeoutMsg), { binary: true }) - ws.close(1008, 'Timeout waiting for agent connection') - } - } catch (error) { - logger.warn('Failed to close log session on pending timeout:' + error.message) - } - try { - await TransactionDecorator.generateTransaction(async (timeoutTransaction) => { - await this.logSessionManager.removeLogSession(sessionId, timeoutTransaction) - })() - } catch (error) { - logger.error('Failed to remove log session after pending timeout:' + error.message) - } - }, LOG_PENDING_TIMEOUT) + this._scheduleLogPendingPairingTimeout(sessionId, ws, microserviceUuid, fogUuid) // 10. Record WebSocket connection event (non-blocking) setImmediate(async () => { @@ -1990,41 +2920,54 @@ class WebSocketServer { // Handle user disconnect ws.on('close', async (code, reason) => { - clearTimeout(pendingTimer) const session = this.logSessionManager.getLogSession(sessionId) if (session) { + this._clearPendingPairingTimer(session) + if (session.pairingMetricsStarted && !session.pairingCompleted) { + this._abortPendingPairingMetrics(session) + } + const agentStillConnected = session.agent != null || session.remoteAgentPaired session.user = null session.lastActivity = Date.now() try { - await TransactionDecorator.generateTransaction(async (closeTransaction) => { - if (microserviceUuid) { - await MicroserviceLogStatusManager.update( - { sessionId }, - { userConnected: false }, - closeTransaction - ) - } else if (fogUuid) { - await FogLogStatusManager.update( - { sessionId }, - { userConnected: false }, - closeTransaction + let agentConnected = agentStillConnected + if (!agentConnected) { + try { + agentConnected = await transactionRunner.runInTransaction( + (tx) => this._checkLogAgentPairedInDb(sessionId, microserviceUuid, fogUuid, tx), + { priority: PRIORITY_BACKGROUND, label: 'ws.log.user-disconnect-db-check' } ) + } catch (error) { + logger.warn('Log user disconnect DB check failed:' + JSON.stringify({ + sessionId, + error: error.message + })) } + } - if (!session.agent) { - await this.logSessionManager.removeLogSession(sessionId, closeTransaction) - } else { - const fogForTracking = await FogManager.findOne({ - uuid: fogUuid || (await MicroserviceManager.findOne({ uuid: microserviceUuid }, closeTransaction)).iofogUuid - }, closeTransaction) - await ChangeTrackingService.update( - fogForTracking.uuid, - fogUuid ? ChangeTrackingService.events.fogLogs : ChangeTrackingService.events.microserviceLogs, - closeTransaction - ) - } - })() + if (agentStillConnected) { + await this._handleUserLogPartialDisconnect( + sessionId, + session, + microserviceUuid, + fogUuid + ) + logger.info('Log session user disconnected (agent still connected):' + JSON.stringify({ + sessionId, + microserviceUuid: microserviceUuid || null, + fogUuid: fogUuid || null, + closeCode: code + })) + } else { + logger.info('Log session user disconnected (full cleanup):' + JSON.stringify({ + sessionId, + microserviceUuid: microserviceUuid || null, + fogUuid: fogUuid || null, + closeCode: code + })) + await this._cleanupLogSessionInTransaction(sessionId) + } } catch (err) { logger.error('Failed to cleanup log session on user disconnect:' + JSON.stringify({ error: err.message, @@ -2142,6 +3085,7 @@ class WebSocketServer { session.agent = ws session.lastActivity = Date.now() } + this._startWebSocketHeartbeat(ws, { label: 'agent-log', sessionId }) // 5.5. Set up message handler IMMEDIATELY on the agent WebSocket // This ensures messages are captured even if they arrive before setupLogMessageForwarding completes @@ -2176,12 +3120,12 @@ class WebSocketServer { if (msg.type === MESSAGE_TYPES.LOG_LINE) { // Forward to user (one-to-one, like exec sessions) - await this.forwardLogToUser(sessionId, buffer, transaction) + await this.forwardLogToUser(sessionId, buffer) } else if (msg.type === MESSAGE_TYPES.LOG_START || msg.type === MESSAGE_TYPES.LOG_STOP || msg.type === MESSAGE_TYPES.LOG_ERROR) { // Handle control messages - await this.forwardLogToUser(sessionId, buffer, transaction) + await this.forwardLogToUser(sessionId, buffer) } }) @@ -2204,34 +3148,16 @@ class WebSocketServer { } ws.send(this.encodeMessage(configMsg), { binary: true }) - // 7. Notify user that agent has connected and streaming has started + // 7. Notify user when agent connects (same-replica or relay in setupLogMessageForwarding) if (session.user && session.user.readyState === WebSocket.OPEN) { - try { - const agentConnectedMsg = { - type: MESSAGE_TYPES.LOG_START, - data: Buffer.from(JSON.stringify({ - sessionId, - message: 'Agent connected. Log streaming started.\n' - })), - sessionId, - timestamp: Date.now() - } - session.user.send(this.encodeMessage(agentConnectedMsg), { binary: true }) - logger.info('Notified user that agent connected for log session:' + JSON.stringify({ - sessionId, - microserviceUuid: logStatus.microserviceUuid, - iofogUuid: logStatus.iofogUuid - })) - } catch (error) { - logger.warn('Failed to notify user that agent connected:' + JSON.stringify({ - error: error.message, - sessionId - })) - } + this._markLogAgentPaired(sessionId, { notifyUser: true, source: 'same-replica' }) } - // 8. Setup message forwarding (unidirectional: agent → user, one-to-one) - await this.setupLogMessageForwarding(sessionId, transaction) + // 8. Relay setup after DB transaction commits (NATS hub lookup uses background writes). + this._scheduleRelaySetupAfterCommit( + 'setup log message forwarding', + () => this.setupLogMessageForwarding(sessionId) + ) // 9. Record WebSocket connection event (non-blocking) setImmediate(async () => { @@ -2270,38 +3196,35 @@ class WebSocketServer { ws.on('close', async (code, reason) => { const session = this.logSessionManager.getLogSession(sessionId) if (session) { - session.agent = null - session.lastActivity = Date.now() - try { - await TransactionDecorator.generateTransaction(async (closeTransaction) => { - if (microserviceUuid) { - await MicroserviceLogStatusManager.update( - { sessionId }, - { agentConnected: false }, - closeTransaction - ) - } else if (iofogUuid) { - await FogLogStatusManager.update( - { sessionId }, - { agentConnected: false }, - closeTransaction - ) - } - - if (!session.user) { - await this.logSessionManager.removeLogSession(sessionId, closeTransaction) - } else { - const fog = await FogManager.findOne({ - uuid: iofogUuid || logStatus.iofogUuid || (await MicroserviceManager.findOne({ uuid: logStatus.microserviceUuid }, closeTransaction)).iofogUuid - }, closeTransaction) - await ChangeTrackingService.update( - fog.uuid, - iofogUuid ? ChangeTrackingService.events.fogLogs : ChangeTrackingService.events.microserviceLogs, - closeTransaction - ) - } - })() + const userStillConnected = await this._isLogUserStillConnected( + sessionId, + session, + microserviceUuid, + iofogUuid + ) + if (userStillConnected) { + logger.info('Log session agent disconnected (partial detach):' + JSON.stringify({ + sessionId, + microserviceUuid: microserviceUuid || null, + fogUuid: iofogUuid || null, + userConnected: true, + closeCode: code + })) + await this._handleAgentLogPartialDisconnect(sessionId, session, { + microserviceUuid, + iofogUuid, + logStatus + }) + } else { + logger.info('Log session agent disconnected (full cleanup):' + JSON.stringify({ + sessionId, + microserviceUuid: microserviceUuid || null, + fogUuid: iofogUuid || null, + closeCode: code + })) + await this._cleanupLogSessionInTransaction(sessionId) + } } catch (err) { logger.error('Failed to cleanup log session on agent disconnect:' + JSON.stringify({ error: err.message, @@ -2352,7 +3275,7 @@ class WebSocketServer { } } - async setupLogMessageForwarding (sessionId, transaction) { + async setupLogMessageForwarding (sessionId) { const session = this.logSessionManager.getLogSession(sessionId) if (!session) { logger.warn('setupLogMessageForwarding: Session not found:' + JSON.stringify({ sessionId })) @@ -2360,10 +3283,28 @@ class WebSocketServer { } // Enable queue bridge for cross-replica support (one-to-one, like exec sessions) - await this.relayTransport.enableForLogSession(session, (sessionId) => { - this.cleanupLogSession(sessionId, transaction) + await this.relayTransport.enableForLogSession(session, (closedSessionId) => { + this._cleanupLogSessionInTransaction(closedSessionId) }) + const relayEnabled = this.relayTransport.shouldUseRelayForLogs(sessionId) + if (session.user && !session.agent && relayEnabled) { + this._registerLogUserRelayPairingHook(sessionId) + } + if (session.agent && !session.user && relayEnabled) { + const notified = await this._notifyLogUserViaRelay(sessionId, { + microserviceUuid: session.microserviceUuid, + fogUuid: session.fogUuid, + message: LOG_AGENT_READY_NOTICE + }) + if (notified) { + this._markLogUserPaired(sessionId, { source: 'relay-notify' }) + } + } + if (session.user && session.agent) { + this._markLogAgentPaired(sessionId, { notifyUser: false, source: 'same-replica-setup' }) + } + // ONLY agent → user forwarding (unidirectional, one-to-one) // All messages from agent are MessagePack encoded (binary) if (session.agent) { @@ -2405,14 +3346,16 @@ class WebSocketServer { dataLength: msg.data ? msg.data.length : 0 })) + session.lastActivity = Date.now() + if (msg.type === MESSAGE_TYPES.LOG_LINE) { // Forward to user (one-to-one, like exec sessions) - await this.forwardLogToUser(sessionId, buffer, transaction) + await this.forwardLogToUser(sessionId, buffer) } else if (msg.type === MESSAGE_TYPES.LOG_START || msg.type === MESSAGE_TYPES.LOG_STOP || msg.type === MESSAGE_TYPES.LOG_ERROR) { // Handle control messages - await this.forwardLogToUser(sessionId, buffer, transaction) + await this.forwardLogToUser(sessionId, buffer) } }) } else { @@ -2456,13 +3399,15 @@ class WebSocketServer { return true } - async forwardLogToUser (sessionId, buffer, transaction) { + async forwardLogToUser (sessionId, buffer) { const session = this.logSessionManager.getLogSession(sessionId) if (!session) { logger.warn('forwardLogToUser: Session not found:' + JSON.stringify({ sessionId })) return } + session.lastActivity = Date.now() + // Buffer is already MessagePack encoded from agent // Following exec session pattern: Use queue for ALL scenarios (single and multi-replica) // One-to-one forwarding (agent → user) via queue @@ -2515,15 +3460,25 @@ class WebSocketServer { async cleanupLogSession (sessionId, transaction) { const session = this.logSessionManager.getLogSession(sessionId) + if (session) { + this._clearPendingPairingTimer(session) + this._stopWebSocketHeartbeat(session.user) + this._stopWebSocketHeartbeat(session.agent) + } if (session && session.metricsActive) { recordLogSessionActive(-1) } this.logBackpressureNotified.delete(sessionId) await this.logSessionManager.removeLogSession(sessionId, transaction) await this.relayTransport.cleanupLogSession(sessionId) + logger.info('Log session cleanup complete:' + JSON.stringify({ + sessionId, + microserviceUuid: session ? session.microserviceUuid || null : null, + fogUuid: session ? session.fogUuid || null : null + })) } - async setupExecMessageForwarding (sessionId, transaction) { + async setupExecMessageForwarding (sessionId) { const session = this.execSessionManager.getExecSession(sessionId) if (!session) { logger.warn('setupExecMessageForwarding: Session not found:' + JSON.stringify({ sessionId })) @@ -2541,7 +3496,7 @@ class WebSocketServer { clearTimeout(timeout) this.pendingCloseTimeouts.delete(closeExecId) } - await this.cleanupExecSession(closeExecId, transaction) + await this._cleanupExecSessionInTransaction(closeExecId) }) session.queueBridgeEnabled = true if (!wasQueueBridgeEnabled) { @@ -2567,7 +3522,7 @@ class WebSocketServer { if (session.agent && session.agent.readyState === WebSocket.OPEN) { session.agent.close(RELAY_UNAVAILABLE_CLOSE_CODE, RELAY_UNAVAILABLE_CLOSE_REASON) } - await this.cleanupExecSession(sessionId, transaction) + await this._cleanupExecSessionInTransaction(sessionId) return } logger.warn('[RELAY] Failed to enable relay bridge for exec session', { @@ -2577,8 +3532,30 @@ class WebSocketServer { }) } + const relayEnabled = this.relayTransport.shouldUseRelay(sessionId) + + if (user && !agent && relayEnabled) { + this._registerExecUserRelayPairingHook(sessionId) + } + + if (agent && !user && relayEnabled) { + this._registerExecAgentRelayActivityHook(sessionId) + const activated = await this._sendExecActivationViaRelay(sessionId, session.microserviceUuid) + if (!activated) { + logger.error('[RELAY] Cross-replica exec activation failed on agent pod', { + sessionId, + microserviceUuid: session.microserviceUuid + }) + return + } + const notified = await this._notifyExecUserViaRelay(sessionId, session.microserviceUuid) + if (notified) { + this._markExecUserPaired(sessionId, { source: 'relay-notify' }) + } + } + if (user && agent) { - const activated = await this.sendExecActivationToExecSession(session, sessionId, transaction) + const activated = await this.sendExecActivationToExecSession(session, sessionId) if (!activated) { logger.error('[RELAY] Exec session activation failed; aborting message forwarding setup', { sessionId, @@ -2586,6 +3563,7 @@ class WebSocketServer { }) return } + this._markExecAgentPaired(sessionId, { notifyUser: true, source: 'same-replica' }) } if (user) { @@ -2610,7 +3588,9 @@ class WebSocketServer { const sent = await this.sendMessageToAgent(session.agent, msg, execId, session.microserviceUuid) if (!sent && this.relayTransport.shouldUseRelay(execId)) { logger.error('[RELAY] Exec relay publish failed; closing session', { sessionId: execId }) - await this.cleanupExecSession(execId, transaction) + await this._cleanupExecSessionInTransaction(execId) + } else { + session.lastActivity = Date.now() } return } @@ -2633,7 +3613,7 @@ class WebSocketServer { if (currentSession && currentSession.user && currentSession.user.readyState === WebSocket.OPEN) { try { currentSession.user.close(1000, 'Session closed (timeout)') - await this.cleanupExecSession(execId, transaction) + await this._cleanupExecSessionInTransaction(execId) } catch (error) { logger.error('[RELAY] Failed to close exec user socket on CLOSE timeout', { sessionId: execId, @@ -2650,7 +3630,7 @@ class WebSocketServer { if (user && user.readyState === WebSocket.OPEN) { user.close(1000, 'Session closed') } - await this.cleanupExecSession(execId, transaction) + await this._cleanupExecSessionInTransaction(execId) return } @@ -2673,7 +3653,9 @@ class WebSocketServer { const sent = await this.sendMessageToAgent(session.agent, msg, execId, session.microserviceUuid) if (!sent && this.relayTransport.shouldUseRelay(execId)) { logger.error('[RELAY] Exec relay publish failed; closing session', { sessionId: execId }) - await this.cleanupExecSession(execId, transaction) + await this._cleanupExecSessionInTransaction(execId) + } else { + session.lastActivity = Date.now() } } catch (error) { logger.error('[RELAY] Failed to process exec user message:' + JSON.stringify({ @@ -2709,7 +3691,7 @@ class WebSocketServer { } else if (session.user && session.user.readyState === WebSocket.OPEN) { session.user.close(1000, 'Agent closed connection') } - await this.cleanupExecSession(execId, transaction) + await this._cleanupExecSessionInTransaction(execId) return } @@ -2717,12 +3699,13 @@ class WebSocketServer { if (relayEnabled) { try { await this.relayTransport.publishToUser(execId, buffer) + session.lastActivity = Date.now() } catch (error) { logger.error('[RELAY] Exec relay publish to user failed; closing session', { sessionId: execId, error: error.message }) - await this.cleanupExecSession(execId, transaction) + await this._cleanupExecSessionInTransaction(execId) } } else if (session.user && session.user.readyState === WebSocket.OPEN) { if (msg.type === MESSAGE_TYPES.STDOUT || msg.type === MESSAGE_TYPES.STDERR) { @@ -2736,6 +3719,7 @@ class WebSocketServer { timestamp: Date.now() } session.user.send(this.encodeMessage(userMsg), { binary: true }) + session.lastActivity = Date.now() } } else if (msg.type === MESSAGE_TYPES.CONTROL) { session.user.send(data, { binary: true }) @@ -2760,6 +3744,11 @@ class WebSocketServer { async cleanupExecSession (sessionId, transaction) { const session = this.execSessionManager.getExecSession(sessionId) + if (session) { + this._clearPendingPairingTimer(session) + this._stopWebSocketHeartbeat(session.user) + this._stopWebSocketHeartbeat(session.agent) + } if (session && session.metricsActive) { recordExecSessionActive(-1) session.metricsActive = false @@ -2790,6 +3779,8 @@ class WebSocketServer { } } + await this._notifyExecRemotePeerClose(sessionId, session, 'Session closed') + await this.execSessionManager.removeExecSession(sessionId, transaction) await this.relayTransport.cleanup(sessionId) .catch(error => { diff --git a/test/load/transaction-safety-load.js b/test/load/transaction-safety-load.js new file mode 100644 index 00000000..095c6e46 --- /dev/null +++ b/test/load/transaction-safety-load.js @@ -0,0 +1,419 @@ +#!/usr/bin/env node +'use strict' + +/** + * Plan 19 transaction-safety load probe (sqlite profile). + * + * Simulates 200 fogs polling config/changes + status, 10 operator API clients, + * background reconcile outbox drainer + task claims, optional WS-style session churn. + * + * Usage: + * nvm use 24 + * node test/load/transaction-safety-load.js + * node test/load/transaction-safety-load.js --fogs 200 --soak-minutes 30 --operators 10 --poll-interval-ms 40000 + * + * Exit 0 when SLO gates pass; exit 1 otherwise. + */ + +const fs = require('fs') +const os = require('os') +const path = require('path') +const Sequelize = require('sequelize') + +function parseArg (name, fallback) { + const eq = process.argv.find((a) => a.startsWith(`--${name}=`)) + if (eq) return eq.split('=')[1] + const idx = process.argv.indexOf(`--${name}`) + if (idx !== -1 && process.argv[idx + 1]) return process.argv[idx + 1] + return fallback +} + +const FOG_COUNT = parseInt(parseArg('fogs', '200'), 10) +const SOAK_MINUTES = parseFloat(parseArg('soak-minutes', '30')) +const OPERATOR_COUNT = parseInt(parseArg('operators', '10'), 10) +const POLL_INTERVAL_MS = parseInt(parseArg('poll-interval-ms', '40000'), 10) +const BUSY_THRESHOLD = parseInt(parseArg('busy-threshold', '0'), 10) +const INVALIDATED_THRESHOLD = parseInt(parseArg('invalidated-threshold', '0'), 10) + +const AGENT_P99_SLO_MS = 200 +const OPERATOR_P99_SLO_MS = 1000 + +const agentLatencies = [] +const operatorLatencies = [] +const counters = { + busyRetries: 0, + connectionInvalidated: 0 +} + +let stopping = false +let dbPath +let sequelize +let originalSequelize +let originalNodeEnv + +function percentile (sorted, p) { + if (!sorted.length) return 0 + const idx = Math.ceil((p / 100) * sorted.length) - 1 + return sorted[Math.max(0, idx)] +} + +function recordLatency (bucket, ms) { + bucket.push(ms) +} + +function installMetricCounters () { + const dbMetrics = require('../../src/helpers/db-metrics') + const originalBusy = dbMetrics.recordBusyRetry + const originalInvalidated = dbMetrics.recordConnectionInvalidated + + dbMetrics.recordBusyRetry = (...args) => { + counters.busyRetries += 1 + return originalBusy(...args) + } + dbMetrics.recordConnectionInvalidated = (...args) => { + counters.connectionInvalidated += 1 + return originalInvalidated(...args) + } +} + +async function setupDatabase () { + originalNodeEnv = process.env.NODE_ENV + process.env.NODE_ENV = 'load' + delete process.env.DB_PROVIDER + + dbPath = path.join(os.tmpdir(), `controller-tx-load-${Date.now()}-${Math.random()}.sqlite`) + sequelize = new Sequelize({ + dialect: 'sqlite', + storage: dbPath, + logging: false, + pool: { max: 1, min: 0, idle: 10000 } + }) + + const { registerSqlitePragmas, applySqlitePragmas } = require('../../src/helpers/sqlite-pragmas') + registerSqlitePragmas(sequelize, { + journalMode: 'WAL', + busyTimeoutMs: 10000, + synchronous: 'NORMAL' + }) + await sequelize.authenticate() + await applySqlitePragmas(sequelize, { + journalMode: 'WAL', + busyTimeoutMs: 10000, + synchronous: 'NORMAL' + }) + + const defineFog = require('../../src/data/models/fog') + const defineChangeTracking = require('../../src/data/models/changetracking') + const defineReconcileOutbox = require('../../src/data/models/reconcileOutbox') + const defineFogPlatformReconcileTask = require('../../src/data/models/fogPlatformReconcileTask') + + const Fog = defineFog(sequelize, Sequelize.DataTypes) + const ChangeTracking = defineChangeTracking(sequelize, Sequelize.DataTypes) + const ReconcileOutbox = defineReconcileOutbox(sequelize, Sequelize.DataTypes) + const FogPlatformReconcileTask = defineFogPlatformReconcileTask(sequelize, Sequelize.DataTypes) + + await Fog.sync() + const modelBag = { Fog, ChangeTracking, ReconcileOutbox, FogPlatformReconcileTask } + if (typeof ChangeTracking.associate === 'function') { + ChangeTracking.associate(modelBag) + } + await ChangeTracking.sync() + await ReconcileOutbox.sync() + await FogPlatformReconcileTask.sync() + await sequelize.query(` + CREATE TABLE IF NOT EXISTS ws_session_sim ( + session_id TEXT PRIMARY KEY, + fog_uuid TEXT NOT NULL, + opened_at INTEGER NOT NULL + ) + `) + + const models = require('../../src/data/models') + models.Fog = Fog + models.ChangeTracking = ChangeTracking + models.ReconcileOutbox = ReconcileOutbox + models.FogPlatformReconcileTask = FogPlatformReconcileTask + models.sequelize = sequelize + + const databaseProvider = require('../../src/data/providers/database-factory') + originalSequelize = databaseProvider.sequelize + databaseProvider.sequelize = sequelize + + installMetricCounters() + + return { Fog, ChangeTracking } +} + +async function seedFogs (Fog, ChangeTracking) { + const rows = Array.from({ length: FOG_COUNT }, (_, index) => ({ + uuid: `load-fog-${String(index).padStart(4, '0')}`, + name: `Load Fog ${index}`, + daemonStatus: 'RUNNING', + memoryUsage: 10, + cpuUsage: 5 + })) + + await Fog.bulkCreate(rows) + await ChangeTracking.bulkCreate(rows.map((row) => ({ iofogUuid: row.uuid }))) + return rows.map((row) => row.uuid) +} + +async function agentConfigChanges (ChangeTrackingManager, fogUuid, runInTransaction, PRIORITY_INTERACTIVE) { + const start = Date.now() + await runInTransaction(async (transaction) => { + await ChangeTrackingManager.findAll({ iofogUuid: fogUuid }, transaction) + }, { priority: PRIORITY_INTERACTIVE, label: 'agent.configChanges' }) + recordLatency(agentLatencies, Date.now() - start) +} + +async function agentStatusPut (FogManager, fogUuid, runInTransaction, PRIORITY_INTERACTIVE) { + const start = Date.now() + await runInTransaction(async (transaction) => { + await FogManager.update({ uuid: fogUuid }, { + memoryUsage: Math.random() * 100, + cpuUsage: Math.random() * 100, + diskUsage: Math.random() * 100, + lastStatusTime: Date.now() + }, transaction) + }, { priority: PRIORITY_INTERACTIVE, label: 'agent.status' }) + recordLatency(agentLatencies, Date.now() - start) +} + +async function operatorRead (FogManager, runInTransaction, PRIORITY_INTERACTIVE) { + const start = Date.now() + await runInTransaction(async (transaction) => { + await FogManager.findAll({}, transaction) + }, { priority: PRIORITY_INTERACTIVE, label: 'operator.listFogs' }) + recordLatency(operatorLatencies, Date.now() - start) +} + +async function operatorMutate (ReconcileOutboxManager, fogUuid, generation, runInTransaction, PRIORITY_INTERACTIVE) { + const start = Date.now() + await runInTransaction(async (transaction) => { + await ReconcileOutboxManager.enqueueFogPlatform({ + fogUuid, + reason: 'spec-changed', + specGeneration: generation + }, transaction) + }, { priority: PRIORITY_INTERACTIVE, label: 'operator.enqueueReconcile' }) + recordLatency(operatorLatencies, Date.now() - start) +} + +async function wsSessionChurn (fogUuid, runInTransaction, PRIORITY_INTERACTIVE) { + const sessionId = `${fogUuid}-${Date.now()}-${Math.random()}` + await runInTransaction(async (transaction) => { + await sequelize.query( + 'INSERT INTO ws_session_sim (session_id, fog_uuid, opened_at) VALUES (:sessionId, :fogUuid, :openedAt)', + { + replacements: { sessionId, fogUuid, openedAt: Date.now() }, + transaction + } + ) + await sequelize.query( + 'DELETE FROM ws_session_sim WHERE session_id = :sessionId', + { replacements: { sessionId }, transaction } + ) + }, { priority: PRIORITY_INTERACTIVE, label: 'ws.sessionChurn' }) +} + +function startAgentSimulators (fogUuids, deps) { + const timers = [] + + fogUuids.forEach((fogUuid, index) => { + const staggerMs = Math.floor((index / fogUuids.length) * POLL_INTERVAL_MS) + const tick = async () => { + if (stopping) return + try { + await agentConfigChanges(deps.ChangeTrackingManager, fogUuid, deps.runInTransaction, deps.PRIORITY_INTERACTIVE) + await agentStatusPut(deps.FogManager, fogUuid, deps.runInTransaction, deps.PRIORITY_INTERACTIVE) + if (Math.random() < 0.02) { + await wsSessionChurn(fogUuid, deps.runInTransaction, deps.PRIORITY_INTERACTIVE) + } + } catch (error) { + console.error(`Agent simulator error (${fogUuid}):`, error.message) + } + if (!stopping) { + timers.push(setTimeout(tick, POLL_INTERVAL_MS)) + } + } + timers.push(setTimeout(tick, staggerMs)) + }) + + return () => timers.forEach(clearTimeout) +} + +function startOperatorSimulators (fogUuids, deps) { + const timers = [] + let generation = 1 + + for (let operator = 0; operator < OPERATOR_COUNT; operator++) { + const tick = async () => { + if (stopping) return + try { + if (Math.random() < 0.12) { + const fogUuid = fogUuids[Math.floor(Math.random() * fogUuids.length)] + generation += 1 + await operatorMutate(deps.ReconcileOutboxManager, fogUuid, generation, deps.runInTransaction, deps.PRIORITY_INTERACTIVE) + } else { + await operatorRead(deps.FogManager, deps.runInTransaction, deps.PRIORITY_INTERACTIVE) + } + } catch (error) { + console.error(`Operator simulator error (${operator}):`, error.message) + } + if (!stopping) { + timers.push(setTimeout(tick, 250 + Math.floor(Math.random() * 500))) + } + } + timers.push(setTimeout(tick, operator * 100)) + } + + return () => timers.forEach(clearTimeout) +} + +function startBackgroundWorkers (deps) { + const { drainOnce } = require('../../src/jobs/reconcile-outbox-drainer-job') + const drainerTimer = setInterval(async () => { + if (stopping) return + try { + await drainOnce() + } catch (error) { + console.error('Outbox drainer error:', error.message) + } + }, 3000) + + const claimTimer = setInterval(async () => { + if (stopping) return + try { + await deps.runInTransaction(async () => { + await deps.FogPlatformReconcileTaskManager.claimNextFogTask('tx-load-worker', 300) + }, { priority: deps.PRIORITY_BACKGROUND, label: 'reconcile.claim' }) + } catch (error) { + console.error('Reconcile claim error:', error.message) + } + }, 5000) + + return () => { + clearInterval(drainerTimer) + clearInterval(claimTimer) + } +} + +function reportResults () { + agentLatencies.sort((a, b) => a - b) + operatorLatencies.sort((a, b) => a - b) + + const agentP50 = percentile(agentLatencies, 50) + const agentP95 = percentile(agentLatencies, 95) + const agentP99 = percentile(agentLatencies, 99) + const operatorP50 = percentile(operatorLatencies, 50) + const operatorP95 = percentile(operatorLatencies, 95) + const operatorP99 = percentile(operatorLatencies, 99) + + const sloPass = agentLatencies.length > 0 && + operatorLatencies.length > 0 && + agentP99 < AGENT_P99_SLO_MS && + operatorP99 < OPERATOR_P99_SLO_MS && + counters.busyRetries <= BUSY_THRESHOLD && + counters.connectionInvalidated <= INVALIDATED_THRESHOLD + + console.log('') + console.log('Transaction safety load probe — results') + console.log(` fogs: ${FOG_COUNT}`) + console.log(` operators: ${OPERATOR_COUNT}`) + console.log(` soak minutes: ${SOAK_MINUTES}`) + console.log(` poll interval ms: ${POLL_INTERVAL_MS}`) + console.log('') + console.log('Agent latencies (config/changes + status):') + console.log(` samples: ${agentLatencies.length}`) + console.log(` p50: ${agentP50} ms`) + console.log(` p95: ${agentP95} ms`) + console.log(` p99: ${agentP99} ms (SLO < ${AGENT_P99_SLO_MS} ms)`) + console.log('') + console.log('Operator latencies (read-heavy + mutations):') + console.log(` samples: ${operatorLatencies.length}`) + console.log(` p50: ${operatorP50} ms`) + console.log(` p95: ${operatorP95} ms`) + console.log(` p99: ${operatorP99} ms (SLO < ${OPERATOR_P99_SLO_MS} ms)`) + console.log('') + console.log('Contention counters:') + console.log(` busy retries: ${counters.busyRetries} (threshold <= ${BUSY_THRESHOLD})`) + console.log(` connection invalidated: ${counters.connectionInvalidated} (threshold <= ${INVALIDATED_THRESHOLD})`) + console.log('') + console.log(` status: ${sloPass ? 'PASS' : 'FAIL'}`) + + return sloPass +} + +async function cleanup () { + const databaseProvider = require('../../src/data/providers/database-factory') + if (originalSequelize) { + databaseProvider.sequelize = originalSequelize + } + if (sequelize) { + await sequelize.close() + } + for (const suffix of ['', '-wal', '-shm']) { + try { + fs.unlinkSync(dbPath + suffix) + } catch (_) { /* ignore */ } + } + if (originalNodeEnv === undefined) { + delete process.env.NODE_ENV + } else { + process.env.NODE_ENV = originalNodeEnv + } +} + +async function main () { + const startedAt = Date.now() + const soakMs = SOAK_MINUTES * 60 * 1000 + + console.log('Transaction safety load probe — starting') + console.log(` profile: sqlite, ${FOG_COUNT} fogs, ${OPERATOR_COUNT} operators, ${SOAK_MINUTES} min soak`) + + const { Fog, ChangeTracking } = await setupDatabase() + const fogUuids = await seedFogs(Fog, ChangeTracking) + + const { + runInTransaction, + PRIORITY_BACKGROUND, + PRIORITY_INTERACTIVE, + _resetQueueForTests + } = require('../../src/helpers/transaction-runner') + + const deps = { + FogManager: require('../../src/data/managers/iofog-manager'), + ChangeTrackingManager: require('../../src/data/managers/change-tracking-manager'), + ReconcileOutboxManager: require('../../src/data/managers/reconcile-outbox-manager'), + FogPlatformReconcileTaskManager: require('../../src/data/managers/fog-platform-reconcile-task-manager'), + runInTransaction, + PRIORITY_BACKGROUND, + PRIORITY_INTERACTIVE + } + + const stopAgents = startAgentSimulators(fogUuids, deps) + const stopOperators = startOperatorSimulators(fogUuids, deps) + const stopBackground = startBackgroundWorkers(deps) + + await new Promise((resolve) => setTimeout(resolve, soakMs)) + + stopping = true + stopAgents() + stopOperators() + stopBackground() + _resetQueueForTests() + + const pass = reportResults() + console.log(` runtime: ${((Date.now() - startedAt) / 1000).toFixed(1)} s`) + + await cleanup() + process.exit(pass ? 0 : 1) +} + +main().catch(async (error) => { + console.error(error) + try { + await cleanup() + } catch (_) { /* ignore */ } + process.exit(1) +}) diff --git a/test/load/ws-pairing-load.js b/test/load/ws-pairing-load.js index c883fc00..d98e47e9 100644 --- a/test/load/ws-pairing-load.js +++ b/test/load/ws-pairing-load.js @@ -10,7 +10,7 @@ * node test/load/ws-pairing-load.js --pairs 500 * node test/load/ws-pairing-load.js --multi-ms 100 * - * --multi-ms N: create 3 exec sessions per microservice (Plan 17 quota) for N microservices. + * --multi-ms N: create 5 exec sessions per microservice (concurrency quota) for N microservices. * * Exit 0 when p99 < 5000ms; exit 1 otherwise. */ @@ -29,7 +29,7 @@ function parseArg (name, fallback) { const PAIR_COUNT = parseInt(parseArg('pairs', '500'), 10) const MULTI_MS_COUNT = parseInt(parseArg('multi-ms', '0'), 10) -const SESSIONS_PER_MS = 3 +const SESSIONS_PER_MS = 5 const FAST_CONFIG = { session: { diff --git a/test/src/config/flavor.test.js b/test/src/config/flavor.test.js new file mode 100644 index 00000000..9df1f576 --- /dev/null +++ b/test/src/config/flavor.test.js @@ -0,0 +1,63 @@ +'use strict' + +const { expect } = require('chai') +const sinon = require('sinon') + +const config = require('../../../src/config') +const flavor = require('../../../src/config/flavor') + +describe('flavor', () => { + def('sandbox', () => sinon.createSandbox()) + + afterEach(() => { + delete process.env.CONTROLLER_DISTRIBUTION + delete process.env.COMPONENT_LABEL_DOMAIN + $sandbox.restore() + }) + + function stubFlavorConfig ({ distribution, componentLabelDomain } = {}) { + $sandbox.stub(config, 'get').callsFake((key) => { + if (key === 'flavor.distribution') { + return distribution + } + if (key === 'flavor.componentLabelDomain') { + return componentLabelDomain + } + return undefined + }) + } + + describe('.getComponentLabelKey()', () => { + it('returns datasance.com/component when distribution is datasance', () => { + stubFlavorConfig({ distribution: 'datasance' }) + expect(flavor.getComponentLabelKey()).to.equal('datasance.com/component') + }) + + it('returns iofog.org/component when distribution is iofog', () => { + stubFlavorConfig({ distribution: 'iofog' }) + expect(flavor.getComponentLabelKey()).to.equal('iofog.org/component') + }) + + it('returns iofog.org/component when distribution is unset', () => { + stubFlavorConfig() + expect(flavor.getComponentLabelKey()).to.equal('iofog.org/component') + }) + + it('prefers CONTROLLER_DISTRIBUTION env over config', () => { + stubFlavorConfig({ distribution: 'datasance' }) + process.env.CONTROLLER_DISTRIBUTION = 'iofog' + expect(flavor.getComponentLabelKey()).to.equal('iofog.org/component') + }) + + it('prefers COMPONENT_LABEL_DOMAIN env over distribution', () => { + stubFlavorConfig({ distribution: 'datasance' }) + process.env.COMPONENT_LABEL_DOMAIN = 'custom.example/component' + expect(flavor.getComponentLabelKey()).to.equal('custom.example/component') + }) + + it('prefers flavor.componentLabelDomain config over distribution', () => { + stubFlavorConfig({ distribution: 'datasance', componentLabelDomain: 'override.example/component' }) + expect(flavor.getComponentLabelKey()).to.equal('override.example/component') + }) + }) +}) diff --git a/test/src/controllers/network-topology-controller.test.js b/test/src/controllers/network-topology-controller.test.js new file mode 100644 index 00000000..f13701b1 --- /dev/null +++ b/test/src/controllers/network-topology-controller.test.js @@ -0,0 +1,27 @@ +const { expect } = require('chai') +const sinon = require('sinon') + +const NetworkTopologyController = require('../../../src/controllers/network-topology-controller') +const NetworkTopologyService = require('../../../src/services/network-topology-service') + +describe('Network Topology Controller', () => { + def('subject', () => NetworkTopologyController) + def('sandbox', () => sinon.createSandbox()) + + afterEach(() => $sandbox.restore()) + + it('getSummaryEndPoint delegates to service', async () => { + const summary = { controlPlane: 'remote' } + $sandbox.stub(NetworkTopologyService, 'getSummary').resolves(summary) + await expect($subject.getSummaryEndPoint({})).to.eventually.eql(summary) + expect(NetworkTopologyService.getSummary).to.have.been.calledOnce + }) + + it('listRouterNodesEndPoint delegates to service', async () => { + const req = { query: { limit: '10' } } + const payload = { nodes: [], total: 0, limit: 10, offset: 0 } + $sandbox.stub(NetworkTopologyService, 'listRouterNodes').resolves(payload) + await expect($subject.listRouterNodesEndPoint(req)).to.eventually.eql(payload) + expect(NetworkTopologyService.listRouterNodes).to.have.been.calledOnceWith(req) + }) +}) diff --git a/test/src/data/fog-platform-managers.test.js b/test/src/data/fog-platform-managers.test.js index d0a221d2..f7e8e4fd 100644 --- a/test/src/data/fog-platform-managers.test.js +++ b/test/src/data/fog-platform-managers.test.js @@ -108,25 +108,60 @@ describe('Fog platform reconcile task enqueue', () => { expect(task).to.eql(created) }) - it('supersedes pending work with delete reason', async () => { + it('preempts in_progress tasks when delete is enqueued', async () => { const existing = { id: 9, fogUuid: 'fog-3', reason: 'spec-changed', status: 'in_progress' } const entity = { findOne: $sandbox.stub().resolves(existing), update: $sandbox.stub().resolves([1]) } + const preempted = { + id: 9, + fogUuid: 'fog-3', + reason: 'delete', + status: 'pending', + leaderUuid: null, + claimedAt: null + } + + $sandbox.stub(FogPlatformReconcileTaskManager, 'getEntity').returns(entity) + $sandbox.stub(FogPlatformReconcileTaskManager, 'findOne').resolves(preempted) + + const task = await FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask({ + fogUuid: 'fog-3', + reason: 'delete' + }, transaction) + + expect(entity.update).to.have.been.calledOnceWith({ + reason: 'delete', + specGeneration: null, + status: 'pending', + leaderUuid: null, + claimedAt: null, + nextAttemptAt: null, + attempts: 0, + lastError: null + }, sinon.match.has('where', { id: 9 })) + expect(task).to.eql(preempted) + }) + + it('updates pending tasks to delete without resetting claim state', async () => { + const existing = { id: 10, fogUuid: 'fog-4', reason: 'spec-changed', status: 'pending' } + const entity = { + findOne: $sandbox.stub().resolves(existing), + update: $sandbox.stub().resolves([1]) + } $sandbox.stub(FogPlatformReconcileTaskManager, 'getEntity').returns(entity) $sandbox.stub(FogPlatformReconcileTaskManager, 'findOne').resolves({ ...existing, reason: 'delete' }) await FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask({ - fogUuid: 'fog-3', + fogUuid: 'fog-4', reason: 'delete' }, transaction) - expect(entity.update).to.have.been.calledWithMatch( - { reason: 'delete' }, - sinon.match.has('where', { id: 9 }) - ) + const updateArg = entity.update.getCall(0).args[0] + expect(updateArg.reason).to.equal('delete') + expect(updateArg.status).to.be.undefined }) }) diff --git a/test/src/data/reconcile-outbox.test.js b/test/src/data/reconcile-outbox.test.js new file mode 100644 index 00000000..acfe3839 --- /dev/null +++ b/test/src/data/reconcile-outbox.test.js @@ -0,0 +1,187 @@ +'use strict' + +const { expect } = require('chai') +const fs = require('fs') +const os = require('os') +const path = require('path') +const Sequelize = require('sequelize') +const sinon = require('sinon') + +const databaseProvider = require('../../../src/data/providers/database-factory') +const defineReconcileOutbox = require('../../../src/data/models/reconcileOutbox') +const ReconcileOutboxManager = require('../../../src/data/managers/reconcile-outbox-manager') +const FogPlatformReconcileTaskManager = require('../../../src/data/managers/fog-platform-reconcile-task-manager') +const { runInTransaction } = require('../../../src/helpers/transaction-runner') +const { drainOnce } = require('../../../src/jobs/reconcile-outbox-drainer-job') + +describe('reconcile-outbox', () => { + const sandbox = sinon.createSandbox() + let originalDbProvider + let sequelize + let dbPath + let ReconcileOutbox + + beforeEach(async () => { + originalDbProvider = process.env.DB_PROVIDER + delete process.env.DB_PROVIDER + + dbPath = path.join(os.tmpdir(), `controller-outbox-${Date.now()}-${Math.random()}.sqlite`) + sequelize = new Sequelize({ + dialect: 'sqlite', + storage: dbPath, + logging: false, + pool: { max: 1, min: 0, idle: 10000 } + }) + await sequelize.authenticate() + + ReconcileOutbox = defineReconcileOutbox(sequelize, Sequelize.DataTypes) + await ReconcileOutbox.sync() + + const models = require('../../../src/data/models') + models.ReconcileOutbox = ReconcileOutbox + models.sequelize = sequelize + sandbox.stub(databaseProvider, 'sequelize').value(sequelize) + }) + + afterEach(async () => { + sandbox.restore() + if (originalDbProvider === undefined) { + delete process.env.DB_PROVIDER + } else { + process.env.DB_PROVIDER = originalDbProvider + } + if (sequelize) { + await sequelize.close() + } + for (const suffix of ['', '-wal', '-shm']) { + try { + fs.unlinkSync(dbPath + suffix) + } catch (_) { /* ignore */ } + } + }) + + it('inserts outbox row in same commit as business write', async () => { + await runInTransaction(async (transaction) => { + await sequelize.query('CREATE TABLE IF NOT EXISTS business (id INTEGER PRIMARY KEY, label TEXT)', { transaction }) + await sequelize.query('INSERT INTO business (label) VALUES (\'created\')', { transaction }) + await ReconcileOutboxManager.enqueueFogPlatform({ + fogUuid: 'fog-a', + reason: 'spec-changed', + specGeneration: 1 + }, transaction) + }) + + const rows = await ReconcileOutbox.findAll() + expect(rows).to.have.length(1) + expect(rows[0].kind).to.equal('fog_platform') + expect(rows[0].processedAt).to.be.null + }) + + it('rolls back outbox row when transaction fails', async () => { + try { + await runInTransaction(async (transaction) => { + await ReconcileOutboxManager.enqueueFogPlatform({ + fogUuid: 'fog-b', + reason: 'delete' + }, transaction) + throw new Error('forced rollback') + }) + } catch (error) { + expect(error.message).to.equal('forced rollback') + } + + const rows = await ReconcileOutbox.findAll() + expect(rows).to.have.length(0) + }) + + it('deduplicates enqueue by idempotency key', async () => { + await runInTransaction(async (transaction) => { + await ReconcileOutboxManager.enqueueFogPlatform({ + fogUuid: 'fog-c', + reason: 'spec-changed', + specGeneration: 2 + }, transaction) + await ReconcileOutboxManager.enqueueFogPlatform({ + fogUuid: 'fog-c', + reason: 'spec-changed', + specGeneration: 2 + }, transaction) + }) + + const rows = await ReconcileOutbox.findAll() + expect(rows).to.have.length(1) + }) + + it('re-opens processed outbox row when the same idempotency key is enqueued again', async () => { + await runInTransaction(async (transaction) => { + await ReconcileOutboxManager.enqueueNats({ + reason: 'cluster-routes-changed', + fogUuids: ['fog-other'] + }, transaction) + }) + + await runInTransaction(async (transaction) => { + const row = await ReconcileOutbox.findOne({ + where: { idempotencyKey: 'nats:cluster-routes-changed:null:null:null:null:null:null:fog-other' } + }, transaction) + await ReconcileOutboxManager.markProcessed(row.id, transaction) + }) + + await runInTransaction(async (transaction) => { + const row = await ReconcileOutboxManager.enqueueNats({ + reason: 'cluster-routes-changed', + fogUuids: ['fog-other'] + }, transaction) + expect(row.processedAt).to.be.null + await sequelize.query('SELECT 1 AS ok', { transaction, type: sequelize.QueryTypes.SELECT }) + }) + + const rows = await ReconcileOutbox.findAll() + expect(rows).to.have.length(1) + expect(rows[0].processedAt).to.be.null + }) + + it('drains unprocessed row into reconcile task and marks processed', async () => { + await runInTransaction(async (transaction) => { + await ReconcileOutboxManager.enqueueFogPlatform({ + fogUuid: 'fog-d', + reason: 'manual-retry', + specGeneration: 5 + }, transaction) + }) + + const enqueueStub = sandbox.stub(FogPlatformReconcileTaskManager, 'enqueueFogPlatformReconcileTask').resolves({ id: 99 }) + + const result = await drainOnce() + expect(result.processed).to.equal(1) + expect(result.failed).to.equal(0) + expect(enqueueStub).to.have.been.calledOnceWith({ + fogUuid: 'fog-d', + reason: 'manual-retry', + specGeneration: 5 + }, sinon.match.object) + + const row = await ReconcileOutbox.findOne({ where: { idempotencyKey: 'fp:fog-d:manual-retry:5' } }) + expect(row.processedAt).to.not.be.null + expect(row.lastError).to.be.null + }) + + it('records lastError when drain fails', async () => { + await runInTransaction(async (transaction) => { + await ReconcileOutboxManager.enqueueFogPlatform({ + fogUuid: 'fog-e', + reason: 'delete' + }, transaction) + }) + + sandbox.stub(FogPlatformReconcileTaskManager, 'enqueueFogPlatformReconcileTask').rejects(new Error('enqueue failed')) + + const result = await drainOnce() + expect(result.processed).to.equal(0) + expect(result.failed).to.equal(1) + + const row = await ReconcileOutbox.findOne({ where: { idempotencyKey: 'fp:fog-e:delete:null' } }) + expect(row.processedAt).to.be.null + expect(row.lastError).to.equal('enqueue failed') + }) +}) diff --git a/test/src/data/reconcile-task-claim-ha.test.js b/test/src/data/reconcile-task-claim-ha.test.js new file mode 100644 index 00000000..60ab88b1 --- /dev/null +++ b/test/src/data/reconcile-task-claim-ha.test.js @@ -0,0 +1,183 @@ +'use strict' + +const { expect } = require('chai') +const fs = require('fs') +const os = require('os') +const path = require('path') +const Sequelize = require('sequelize') +const sinon = require('sinon') + +const databaseProvider = require('../../../src/data/providers/database-factory') +const defineFogPlatformReconcileTask = require('../../../src/data/models/fogPlatformReconcileTask') +const FogPlatformReconcileTaskManager = require('../../../src/data/managers/fog-platform-reconcile-task-manager') +const dbDialect = require('../../../src/helpers/db-dialect') + +describe('reconcile-task-claim-ha', () => { + const sandbox = sinon.createSandbox() + + describe('SKIP LOCKED claim path (mysql/postgres dialect)', () => { + let queryStub + let originalDbProvider + + beforeEach(() => { + originalDbProvider = process.env.DB_PROVIDER + process.env.DB_PROVIDER = 'postgres' + + queryStub = sandbox.stub(databaseProvider.sequelize, 'query').resolves([]) + sandbox.stub(databaseProvider.sequelize, 'getDialect').returns('postgres') + sandbox.stub(databaseProvider.sequelize, 'transaction').callsFake(async (fn) => fn({})) + sandbox.stub(FogPlatformReconcileTaskManager, 'getEntity').returns({ + getTableName: () => 'FogPlatformReconcileTasks', + build: (row) => row, + update: sandbox.stub().resolves([0]), + findOne: sandbox.stub().resolves(null) + }) + }) + + afterEach(() => { + if (originalDbProvider === undefined) { + delete process.env.DB_PROVIDER + } else { + process.env.DB_PROVIDER = originalDbProvider + } + sandbox.restore() + }) + + it('uses FOR UPDATE SKIP LOCKED when dialect supports it', async () => { + await FogPlatformReconcileTaskManager.claimNextFogTask('controller-a', 300) + + expect(queryStub).to.have.been.calledOnce + const sql = queryStub.firstCall.args[0] + expect(sql).to.include('FOR UPDATE SKIP LOCKED') + expect(sql).to.include('"FogPlatformReconcileTasks"') + }) + }) + + describe('sqlite claim path', () => { + let sequelize + let dbPath + let FogPlatformReconcileTask + + beforeEach(async () => { + dbPath = path.join(os.tmpdir(), `controller-claim-${Date.now()}-${Math.random()}.sqlite`) + sequelize = new Sequelize({ + dialect: 'sqlite', + storage: dbPath, + logging: false, + pool: { max: 1, min: 0, idle: 10000 } + }) + await sequelize.authenticate() + + FogPlatformReconcileTask = defineFogPlatformReconcileTask(sequelize, Sequelize.DataTypes) + await FogPlatformReconcileTask.sync() + + const models = require('../../../src/data/models') + models.FogPlatformReconcileTask = FogPlatformReconcileTask + models.sequelize = sequelize + sandbox.stub(databaseProvider, 'sequelize').value(sequelize) + }) + + afterEach(async () => { + sandbox.restore() + if (sequelize) { + await sequelize.close() + } + for (const suffix of ['', '-wal', '-shm']) { + try { + fs.unlinkSync(dbPath + suffix) + } catch (_) { /* ignore */ } + } + }) + + it('claims each pending task at most once under concurrent claimers', async () => { + await FogPlatformReconcileTask.bulkCreate([ + { fogUuid: 'fog-1', reason: 'spec-changed', status: 'pending' }, + { fogUuid: 'fog-2', reason: 'spec-changed', status: 'pending' }, + { fogUuid: 'fog-3', reason: 'spec-changed', status: 'pending' } + ]) + + const claims = await Promise.all([ + FogPlatformReconcileTaskManager.claimNextFogTask('controller-a', 300), + FogPlatformReconcileTaskManager.claimNextFogTask('controller-b', 300), + FogPlatformReconcileTaskManager.claimNextFogTask('controller-c', 300) + ]) + + const claimedIds = claims.filter(Boolean).map((task) => task.id) + expect(claimedIds).to.have.length(3) + expect(new Set(claimedIds).size).to.equal(3) + + const leaders = await FogPlatformReconcileTask.findAll({ + where: { status: 'in_progress' }, + order: [['id', 'ASC']] + }) + expect(leaders).to.have.length(3) + expect(new Set(leaders.map((row) => row.leaderUuid)).size).to.equal(3) + }) + }) + + describe('concurrent claims on mysql/postgres (integration)', function () { + const haUrl = process.env.RECONCILE_CLAIM_HA_URL + + before(function () { + if (!haUrl) { + this.skip() + } + }) + + it('claims each task at most once with two parallel connections', async function () { + this.timeout(30000) + + const dialect = process.env.RECONCILE_CLAIM_HA_DIALECT || 'postgres' + const sequelizeA = new Sequelize(haUrl, { dialect, logging: false }) + const sequelizeB = new Sequelize(haUrl, { dialect, logging: false }) + + const FogTaskA = defineFogPlatformReconcileTask(sequelizeA, Sequelize.DataTypes) + const FogTaskB = defineFogPlatformReconcileTask(sequelizeB, Sequelize.DataTypes) + await FogTaskA.sync({ force: true }) + + await FogTaskA.bulkCreate([ + { fogUuid: 'fog-ha-1', reason: 'spec-changed', status: 'pending' }, + { fogUuid: 'fog-ha-2', reason: 'spec-changed', status: 'pending' } + ]) + + const claimWithConnection = async (sequelize, controllerUuid) => { + return sequelize.transaction(async (transaction) => { + const rows = await sequelize.query( + `SELECT id FROM "FogPlatformReconcileTasks" + WHERE status IN ('pending', 'in_progress') + AND (leader_uuid IS NULL) + ORDER BY id ASC + LIMIT 1 + FOR UPDATE SKIP LOCKED`, + { type: sequelize.QueryTypes.SELECT, transaction } + ) + if (!rows.length) { + return null + } + await sequelize.query( + `UPDATE "FogPlatformReconcileTasks" + SET leader_uuid = :leader, claimed_at = NOW(), status = 'in_progress' + WHERE id = :id AND leader_uuid IS NULL`, + { + replacements: { leader: controllerUuid, id: rows[0].id }, + transaction + } + ) + return rows[0].id + }) + } + + const [idA, idB] = await Promise.all([ + claimWithConnection(sequelizeA, 'replica-a'), + claimWithConnection(sequelizeB, 'replica-b') + ]) + + expect(idA).to.be.a('number') + expect(idB).to.be.a('number') + expect(idA).to.not.equal(idB) + + await sequelizeA.close() + await sequelizeB.close() + }) + }) +}) diff --git a/test/src/decorators/transaction-decorator.test.js b/test/src/decorators/transaction-decorator.test.js new file mode 100644 index 00000000..113338ba --- /dev/null +++ b/test/src/decorators/transaction-decorator.test.js @@ -0,0 +1,75 @@ +'use strict' + +const { expect } = require('chai') +const sinon = require('sinon') +const Transaction = require('sequelize/lib/transaction') + +const TransactionDecorator = require('../../../src/decorators/transaction-decorator') +const transactionRunner = require('../../../src/helpers/transaction-runner') + +describe('transaction-decorator', () => { + const sandbox = sinon.createSandbox() + const parentTransaction = Object.assign(Object.create(Transaction.prototype), { + commit: () => {}, + rollback: () => {} + }) + let originalNodeEnv + + beforeEach(() => { + originalNodeEnv = process.env.NODE_ENV + process.env.NODE_ENV = 'production' + }) + + afterEach(() => { + process.env.NODE_ENV = originalNodeEnv + sandbox.restore() + }) + + it('passes through when transaction is not the last argument', async () => { + sandbox.stub(transactionRunner, 'runInTransaction').rejects(new Error('should not enqueue')) + + async function ensureOperator (transaction, options = {}) { + return { transaction, options } + } + + const wrapped = TransactionDecorator.generateTransaction(ensureOperator) + const result = await wrapped(parentTransaction, { triggerReconcile: false }) + + expect(result.transaction).to.equal(parentTransaction) + expect(result.options).to.deep.equal({ triggerReconcile: false }) + expect(transactionRunner.runInTransaction).to.not.have.been.called + }) + + it('nested runInTransaction reuses explicit parent transaction without decorator enqueue', async () => { + const runInTransactionSpy = sandbox.spy(transactionRunner, 'runInTransaction') + + async function innerUtil () { + return transactionRunner.runInTransaction((tx) => tx) + } + + async function outer (data, transaction) { + return innerUtil() + } + + const wrapped = TransactionDecorator.generateTransaction(outer) + const result = await wrapped({}, parentTransaction) + + expect(result).to.equal(parentTransaction) + expect(runInTransactionSpy).to.have.been.calledOnce + }) + + it('registers ALS so getActiveTransactionContext sees explicit parent tx', async () => { + let ctxInsideHandler + + async function outer (data, transaction) { + ctxInsideHandler = transactionRunner.getActiveTransactionContext() + return transaction + } + + const wrapped = TransactionDecorator.generateTransaction(outer) + await wrapped({}, parentTransaction) + + expect(ctxInsideHandler).to.not.equal(null) + expect(ctxInsideHandler.transaction).to.equal(parentTransaction) + }) +}) diff --git a/test/src/helpers/db-busy-retry.test.js b/test/src/helpers/db-busy-retry.test.js index d4dc16b9..20bd8921 100644 --- a/test/src/helpers/db-busy-retry.test.js +++ b/test/src/helpers/db-busy-retry.test.js @@ -3,11 +3,24 @@ const fs = require('fs') const os = require('os') const path = require('path') const Sequelize = require('sequelize') +const sinon = require('sinon') -const { isSqliteBusyError, withDbBusyRetry } = require('../../../src/helpers/db-busy-retry') +const { + CONFIG_DEFAULT_BASE_MS, + CONFIG_DEFAULT_MAX_RETRIES, + getRetryDefaults, + isSqliteBusyError, + withDbBusyRetry +} = require('../../../src/helpers/db-busy-retry') const { registerSqlitePragmas, applySqlitePragmas } = require('../../../src/helpers/sqlite-pragmas') describe('db-busy-retry', () => { + const sandbox = sinon.createSandbox() + + afterEach(() => { + sandbox.restore() + }) + it('detects SQLITE_BUSY on nested Sequelize errors', () => { const error = { message: 'SQLITE_BUSY: database is locked', @@ -48,6 +61,35 @@ describe('db-busy-retry', () => { } }) + it('reads retry defaults from config', () => { + const defaults = getRetryDefaults() + expect(defaults.maxRetries).to.equal(CONFIG_DEFAULT_MAX_RETRIES) + expect(defaults.baseMs).to.equal(CONFIG_DEFAULT_BASE_MS) + }) + + it('waits with exponential backoff between busy retries', async () => { + const clock = sinon.useFakeTimers({ shouldAdvanceTime: true }) + let attempts = 0 + + try { + const promise = withDbBusyRetry(async () => { + attempts++ + throw new Error('SQLITE_BUSY: database is locked') + }, { maxRetries: 2, baseMs: 10 }) + + await clock.tickAsync(0) + await clock.tickAsync(10) + await clock.tickAsync(20) + await promise + throw new Error('expected throw') + } catch (error) { + expect(error.message).to.contain('SQLITE_BUSY') + expect(attempts).to.equal(3) + } finally { + clock.restore() + } + }) + it('exhausts retries and rethrows the last busy error', async () => { let attempts = 0 try { @@ -61,6 +103,23 @@ describe('db-busy-retry', () => { expect(attempts).to.equal(3) } }) + + it('records busy retry metric on SQLITE_BUSY', async () => { + const dbMetrics = require('../../../src/helpers/db-metrics') + const recordSpy = sandbox.spy(dbMetrics, 'recordBusyRetry') + + let attempts = 0 + await withDbBusyRetry(async () => { + attempts++ + if (attempts < 2) { + throw new Error('SQLITE_BUSY: database is locked') + } + return 'ok' + }, { label: 'test.busy', maxRetries: 2, baseMs: 1 }) + + expect(recordSpy.calledOnce).to.equal(true) + expect(recordSpy.firstCall.args[0]).to.equal('test.busy') + }) }) describe('sqlite lock contention regression', () => { diff --git a/test/src/helpers/db-dialect.test.js b/test/src/helpers/db-dialect.test.js new file mode 100644 index 00000000..0221b3a7 --- /dev/null +++ b/test/src/helpers/db-dialect.test.js @@ -0,0 +1,40 @@ +'use strict' + +const { expect } = require('chai') +const sinon = require('sinon') + +const dbDialect = require('../../../src/helpers/db-dialect') +const databaseProvider = require('../../../src/data/providers/database-factory') + +describe('db-dialect', () => { + const sandbox = sinon.createSandbox() + + afterEach(() => { + sandbox.restore() + }) + + describe('supportsSkipLocked', () => { + it('returns false for sqlite', () => { + sandbox.stub(databaseProvider.sequelize, 'getDialect').returns('sqlite') + expect(dbDialect.supportsSkipLocked()).to.equal(false) + }) + + it('returns true for mysql and postgres', () => { + const getDialect = sandbox.stub(databaseProvider.sequelize, 'getDialect') + getDialect.returns('mysql') + expect(dbDialect.supportsSkipLocked()).to.equal(true) + + getDialect.returns('postgres') + expect(dbDialect.supportsSkipLocked()).to.equal(true) + }) + }) + + describe('quoteTableName', () => { + it('quotes for postgres and mysql', () => { + expect(dbDialect.quoteTableName('FogPlatformReconcileTasks', 'postgres')) + .to.equal('"FogPlatformReconcileTasks"') + expect(dbDialect.quoteTableName('FogPlatformReconcileTasks', 'mysql')) + .to.equal('`FogPlatformReconcileTasks`') + }) + }) +}) diff --git a/test/src/helpers/db-metrics.test.js b/test/src/helpers/db-metrics.test.js new file mode 100644 index 00000000..99b2cfc3 --- /dev/null +++ b/test/src/helpers/db-metrics.test.js @@ -0,0 +1,38 @@ +const { expect } = require('chai') + +const { + isConnectionInvalidatedError, + maybeRecordConnectionInvalidated, + recordBusyRetry, + recordSqliteFogCountWarning, + recordTransactionDuration, + recordWriteQueueWaitMs +} = require('../../../src/helpers/db-metrics') + +describe('db-metrics', () => { + it('detects connection invalidation error patterns', () => { + expect(isConnectionInvalidatedError(new Error('cannot rollback - no transaction is active'))).to.equal(true) + expect(isConnectionInvalidatedError(new Error('Connection terminated unexpectedly'))).to.equal(true) + expect(isConnectionInvalidatedError(new Error('SQLITE_BUSY: database is locked'))).to.equal(false) + }) + + it('records transaction duration without throwing when OTEL is not initialized', () => { + expect(() => recordTransactionDuration({ label: 'test', priority: 'interactive', provider: 'sqlite' }, 12)).to.not.throw() + }) + + it('records queue wait without throwing when OTEL is not initialized', () => { + expect(() => recordWriteQueueWaitMs('background', 5)).to.not.throw() + }) + + it('records busy retry without throwing when OTEL is not initialized', () => { + expect(() => recordBusyRetry('agent.updateStatus')).to.not.throw() + }) + + it('records fog count warning without throwing when OTEL is not initialized', () => { + expect(() => recordSqliteFogCountWarning()).to.not.throw() + }) + + it('maybeRecordConnectionInvalidated is a no-op for unrelated errors', () => { + expect(() => maybeRecordConnectionInvalidated(new Error('constraint violation'), 'sqlite')).to.not.throw() + }) +}) diff --git a/test/src/helpers/reconcile-outbox-keys.test.js b/test/src/helpers/reconcile-outbox-keys.test.js new file mode 100644 index 00000000..ac7cfa79 --- /dev/null +++ b/test/src/helpers/reconcile-outbox-keys.test.js @@ -0,0 +1,72 @@ +const { expect } = require('chai') +const crypto = require('crypto') +const { + buildFogPlatformIdempotencyKey, + buildServicePlatformIdempotencyKey, + buildNatsIdempotencyKey, + buildIdempotencyKey +} = require('../../../src/helpers/reconcile-outbox-keys') + +describe('reconcile-outbox-keys', () => { + it('builds stable fog platform keys', () => { + const key = buildFogPlatformIdempotencyKey({ + fogUuid: 'fog-1', + reason: 'spec-changed', + specGeneration: 3 + }) + expect(key).to.equal('fp:fog-1:spec-changed:3') + }) + + it('builds service platform keys with snapshot hash', () => { + const snapshot = { name: 'svc-a', resource: '10m' } + const key = buildServicePlatformIdempotencyKey({ + serviceName: 'svc-a', + reason: 'spec-changed', + specSnapshot: snapshot + }) + const expectedHash = crypto.createHash('sha256').update(JSON.stringify(snapshot)).digest('hex').slice(0, 16) + expect(key).to.equal(`sp:svc-a:spec-changed:${expectedHash}`) + }) + + it('builds nats keys with sorted fog uuids', () => { + const key = buildNatsIdempotencyKey({ + reason: 'cluster-routes-changed', + applicationId: null, + accountRuleId: null, + userRuleId: null, + fogUuids: ['b', 'a'] + }) + expect(key).to.equal('nats:cluster-routes-changed:null:null:null:null:null:null:a,b') + }) + + it('builds nats keys without fog uuids from scope fields', () => { + const key = buildNatsIdempotencyKey({ + reason: 'account-created', + applicationId: 42, + accountRuleId: null, + userRuleId: null + }) + expect(key).to.equal('nats:account-created:42:null:null:null:null:null') + }) + + it('distinguishes distinct mutations via microserviceUuid and mutationKind', () => { + const enableKey = buildNatsIdempotencyKey({ + reason: 'account-created', + applicationId: 42, + microserviceUuid: 'ms-a', + mutationKind: 'access-enable' + }) + const ruleKey = buildNatsIdempotencyKey({ + reason: 'account-created', + applicationId: 42, + microserviceUuid: 'ms-a', + mutationKind: 'rule-change' + }) + expect(enableKey).to.not.equal(ruleKey) + }) + + it('routes buildIdempotencyKey by kind', () => { + expect(buildIdempotencyKey('fog_platform', { fogUuid: 'x', reason: 'delete' })) + .to.equal('fp:x:delete:null') + }) +}) diff --git a/test/src/helpers/sqlite-fog-warning.test.js b/test/src/helpers/sqlite-fog-warning.test.js new file mode 100644 index 00000000..d0d631d3 --- /dev/null +++ b/test/src/helpers/sqlite-fog-warning.test.js @@ -0,0 +1,69 @@ +const { expect } = require('chai') +const sinon = require('sinon') + +describe('sqlite-fog-warning', () => { + const sandbox = sinon.createSandbox() + let models + let config + let logger + let dbMetrics + let transactionRunner + + beforeEach(() => { + models = require('../../../src/data/models') + models.Fog = { count: sandbox.stub() } + config = require('../../../src/config') + logger = require('../../../src/logger') + dbMetrics = require('../../../src/helpers/db-metrics') + transactionRunner = require('../../../src/helpers/transaction-runner') + + sandbox.stub(config, 'get').callsFake((key, defaultValue) => { + if (key === 'settings.sqliteEnterpriseFogWarningThreshold') { + return 50 + } + if (key === 'database.provider') { + return 'sqlite' + } + return defaultValue + }) + sandbox.stub(transactionRunner, 'isSqliteProvider').returns(true) + sandbox.stub(logger, 'warn') + sandbox.stub(dbMetrics, 'recordSqliteFogCountWarning') + }) + + afterEach(() => { + sandbox.restore() + }) + + it('logs warning and records metric when fog count exceeds threshold', async () => { + models.Fog.count.resolves(75) + const { checkSqliteFogCountWarning } = require('../../../src/helpers/sqlite-fog-warning') + + await checkSqliteFogCountWarning() + + expect(dbMetrics.recordSqliteFogCountWarning.calledOnce).to.equal(true) + expect(logger.warn.calledOnce).to.equal(true) + expect(logger.warn.firstCall.args[0]).to.contain('75 fogs') + expect(logger.warn.firstCall.args[0]).to.contain('mysql or postgres') + }) + + it('does nothing when fog count is at or below threshold', async () => { + models.Fog.count.resolves(50) + const { checkSqliteFogCountWarning } = require('../../../src/helpers/sqlite-fog-warning') + + await checkSqliteFogCountWarning() + + expect(dbMetrics.recordSqliteFogCountWarning.called).to.equal(false) + expect(logger.warn.called).to.equal(false) + }) + + it('skips check for non-sqlite providers', async () => { + transactionRunner.isSqliteProvider.returns(false) + models.Fog.count.resolves(100) + const { checkSqliteFogCountWarning } = require('../../../src/helpers/sqlite-fog-warning') + + await checkSqliteFogCountWarning() + + expect(models.Fog.count.called).to.equal(false) + }) +}) diff --git a/test/src/helpers/transaction-chaos.test.js b/test/src/helpers/transaction-chaos.test.js new file mode 100644 index 00000000..f31ad377 --- /dev/null +++ b/test/src/helpers/transaction-chaos.test.js @@ -0,0 +1,209 @@ +'use strict' + +const { expect } = require('chai') +const fs = require('fs') +const os = require('os') +const path = require('path') +const Sequelize = require('sequelize') +const sinon = require('sinon') + +const databaseProvider = require('../../../src/data/providers/database-factory') +const defineReconcileOutbox = require('../../../src/data/models/reconcileOutbox') +const defineFogPlatformReconcileTask = require('../../../src/data/models/fogPlatformReconcileTask') +const ReconcileOutboxManager = require('../../../src/data/managers/reconcile-outbox-manager') +const { runInTransaction } = require('../../../src/helpers/transaction-runner') +const { drainOnce } = require('../../../src/jobs/reconcile-outbox-drainer-job') + +describe('transaction chaos', () => { + const sandbox = sinon.createSandbox() + + describe('connection kill mid-transaction', () => { + let sequelize + let dbPath + + beforeEach(async () => { + dbPath = path.join(os.tmpdir(), `controller-chaos-kill-${Date.now()}-${Math.random()}.sqlite`) + sequelize = new Sequelize({ + dialect: 'sqlite', + storage: dbPath, + logging: false, + pool: { max: 1, min: 0, idle: 10000 } + }) + await sequelize.authenticate() + await sequelize.query(` + CREATE TABLE chaos_multi ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + step INTEGER NOT NULL + ) + `) + }) + + afterEach(async () => { + if (sequelize) { + try { + await sequelize.close() + } catch (_) { /* ignore */ } + } + for (const suffix of ['', '-wal', '-shm']) { + try { + fs.unlinkSync(dbPath + suffix) + } catch (_) { /* ignore */ } + } + }) + + it('does not leave partial multi-row state when the connection is killed mid-transaction', async function () { + this.timeout(15000) + + try { + await sequelize.transaction(async (transaction) => { + await sequelize.query('INSERT INTO chaos_multi (step) VALUES (1)', { transaction }) + await sequelize.connectionManager.close() + await sequelize.query('INSERT INTO chaos_multi (step) VALUES (2)', { transaction }) + }) + } catch (_) { + // Expected: connection teardown aborts the open transaction. + } + + sequelize = new Sequelize({ + dialect: 'sqlite', + storage: dbPath, + logging: false, + pool: { max: 1, min: 0, idle: 10000 } + }) + await sequelize.authenticate() + + const [rows] = await sequelize.query('SELECT step FROM chaos_multi ORDER BY step ASC') + expect(rows).to.deep.equal([]) + }) + }) + + describe('duplicate outbox drainer', () => { + let sequelize + let dbPath + let ReconcileOutbox + let FogPlatformReconcileTask + + beforeEach(async () => { + dbPath = path.join(os.tmpdir(), `controller-chaos-drainer-${Date.now()}-${Math.random()}.sqlite`) + sequelize = new Sequelize({ + dialect: 'sqlite', + storage: dbPath, + logging: false, + pool: { max: 1, min: 0, idle: 10000 } + }) + await sequelize.authenticate() + + ReconcileOutbox = defineReconcileOutbox(sequelize, Sequelize.DataTypes) + FogPlatformReconcileTask = defineFogPlatformReconcileTask(sequelize, Sequelize.DataTypes) + await ReconcileOutbox.sync() + await FogPlatformReconcileTask.sync() + + const models = require('../../../src/data/models') + models.ReconcileOutbox = ReconcileOutbox + models.FogPlatformReconcileTask = FogPlatformReconcileTask + models.sequelize = sequelize + sandbox.stub(databaseProvider, 'sequelize').value(sequelize) + }) + + afterEach(async () => { + sandbox.restore() + if (sequelize) { + await sequelize.close() + } + for (const suffix of ['', '-wal', '-shm']) { + try { + fs.unlinkSync(dbPath + suffix) + } catch (_) { /* ignore */ } + } + }) + + it('creates a single reconcile task when two drain ticks run in parallel', async () => { + await runInTransaction(async (transaction) => { + await ReconcileOutboxManager.enqueueFogPlatform({ + fogUuid: 'fog-chaos-1', + reason: 'manual-retry', + specGeneration: 9 + }, transaction) + }) + + const [resultA, resultB] = await Promise.all([drainOnce(), drainOnce()]) + const processed = (resultA.processed || 0) + (resultB.processed || 0) + expect(processed).to.be.at.least(1) + + const tasks = await FogPlatformReconcileTask.findAll({ + where: { fogUuid: 'fog-chaos-1' } + }) + expect(tasks).to.have.length(1) + + const outboxRow = await ReconcileOutbox.findOne({ + where: { idempotencyKey: 'fp:fog-chaos-1:manual-retry:9' } + }) + expect(outboxRow.processedAt).to.not.be.null + }) + }) + + describe('mysql/postgres HA claim (integration)', function () { + const haUrl = process.env.RECONCILE_CLAIM_HA_URL + + before(function () { + if (!haUrl) { + this.skip() + } + }) + + it('claims each task at most once with two parallel connections', async function () { + this.timeout(30000) + + const dialect = process.env.RECONCILE_CLAIM_HA_DIALECT || 'postgres' + const sequelizeA = new Sequelize(haUrl, { dialect, logging: false }) + const sequelizeB = new Sequelize(haUrl, { dialect, logging: false }) + + const FogTaskA = defineFogPlatformReconcileTask(sequelizeA, Sequelize.DataTypes) + await FogTaskA.sync({ force: true }) + + await FogTaskA.bulkCreate([ + { fogUuid: 'fog-chaos-ha-1', reason: 'spec-changed', status: 'pending' }, + { fogUuid: 'fog-chaos-ha-2', reason: 'spec-changed', status: 'pending' } + ]) + + const claimWithConnection = async (conn, controllerUuid) => { + return conn.transaction(async (transaction) => { + const rows = await conn.query( + `SELECT id FROM "FogPlatformReconcileTasks" + WHERE status IN ('pending', 'in_progress') + AND (leader_uuid IS NULL) + ORDER BY id ASC + LIMIT 1 + FOR UPDATE SKIP LOCKED`, + { type: conn.QueryTypes.SELECT, transaction } + ) + if (!rows.length) { + return null + } + await conn.query( + `UPDATE "FogPlatformReconcileTasks" + SET leader_uuid = :leader, claimed_at = NOW(), status = 'in_progress' + WHERE id = :id AND leader_uuid IS NULL`, + { + replacements: { leader: controllerUuid, id: rows[0].id }, + transaction + } + ) + return rows[0].id + }) + } + + const [idA, idB] = await Promise.all([ + claimWithConnection(sequelizeA, 'replica-a'), + claimWithConnection(sequelizeB, 'replica-b') + ]) + + expect(idA).to.be.a('number') + expect(idB).to.be.a('number') + expect(idA).to.not.equal(idB) + + await sequelizeA.close() + await sequelizeB.close() + }) + }) +}) diff --git a/test/src/helpers/transaction-grep-gates.test.js b/test/src/helpers/transaction-grep-gates.test.js new file mode 100644 index 00000000..c2786301 --- /dev/null +++ b/test/src/helpers/transaction-grep-gates.test.js @@ -0,0 +1,248 @@ +'use strict' + +const { expect } = require('chai') +const { execFileSync } = require('child_process') +const fs = require('fs') +const path = require('path') + +const REPO_ROOT = path.resolve(__dirname, '../../..') + +function grepSrc (pattern, extraArgs = [], searchPath = 'src/') { + try { + return execFileSync('grep', [ + '-R', + '-n', + '--include=*.js', + ...extraArgs, + pattern, + searchPath + ], { + cwd: REPO_ROOT, + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'pipe'] + }).trim() + } catch (error) { + if (error.status === 1) { + return '' + } + throw error + } +} + +/** + * Plan 19-I-C: K8s I/O must run outside runInTransaction callback bodies. + * Allowed external helpers (called after tx commit / between phases): + * nats-service: _applyEnsureNatsK8sExternal, _patchK8sHubConfigMapClusterRoutesExternal, + * _patchK8sJwtBundleExternal + * service-platform-service: applyK8sHubRouterPlan, reconcileK8sServiceExternal, + * watchLoadBalancerWithTimeout, ServicesService._syncK8sServiceResource + */ +function assertNoK8sClientInLabeledTxBlocks (source, labels) { + for (const label of labels) { + const labelToken = `label: '${label}'` + const labelIdx = source.indexOf(labelToken) + expect(labelIdx, `expected runInTransaction block with label ${label}`).to.be.greaterThan(-1) + const txStart = source.lastIndexOf('runInTransaction', labelIdx) + expect(txStart, `expected runInTransaction before label ${label}`).to.be.greaterThan(-1) + const block = source.slice(txStart, labelIdx + labelToken.length) + expect(block).to.not.match(/K8sClient\./, `K8sClient must not appear inside tx block ${label}`) + } +} + +describe('grep gates', () => { + it('has zero fakeTransaction hits in src/', () => { + expect(grepSrc('fakeTransaction')).to.equal('') + }) + + it('has zero bypassQueue hits in src/', () => { + expect(grepSrc('bypassQueue')).to.equal('') + }) + + it('has zero runInTransaction hits in src/data/managers/', () => { + expect(grepSrc('runInTransaction', [], 'src/data/managers/')).to.equal('') + }) + + it('allows sequelize.transaction only in transaction-runner.js', () => { + const hits = grepSrc('sequelize\\.transaction', ['--exclude=transaction-runner.js']) + expect(hits).to.equal('') + }) + + it('passes transaction to SecretService reads inside certificate-service.js', () => { + const hits = grepSrc('SecretService\\.getSecretEndpoint\\([^,)]+\\)', [ + '--include=certificate-service.js' + ], 'src/services') + expect(hits).to.equal('') + }) + + it('routes fog-token cleanup through runInTransaction', () => { + const jobSource = fs.readFileSync( + path.join(REPO_ROOT, 'src/jobs/fog-token-cleanup-job.js'), + 'utf8' + ) + expect(jobSource).to.include('runInTransaction') + expect(jobSource).to.not.match(/FogUsedTokenManager\.cleanupExpiredJtis\(\)/) + }) + + it('threads transaction through cert.js CA load paths without bare nested enqueue', () => { + const certSource = fs.readFileSync( + path.join(REPO_ROOT, 'src/utils/cert.js'), + 'utf8' + ) + expect(certSource).to.match(/async function loadCA \(name, transaction\)/) + expect(certSource).to.match(/async function getCAFromK8sSecret \(secretName, transaction\)/) + expect(certSource).to.match(/async function getCAFromInput \(ca, transaction\)/) + expect(certSource).to.match(/getCAFromInput\(ca, transaction\)/) + expect(certSource).to.match(/loadCA\(ca\.secretName, transaction\)/) + expect(certSource).to.match(/getCAFromK8sSecret\(ca\.secretName, transaction\)/) + expect(certSource).to.match(/await storeCA\(\{ cert, key \}, secretName, transaction\)/) + expect(certSource).to.not.match(/await loadCA\(ca\.secretName\)\s/) + expect(certSource).to.not.match(/await getCAFromK8sSecret\(ca\.secretName\)\s/) + // Branch on caller transaction before enqueueing nested runInTransaction (R126–R128) + expect(certSource).to.match(/async function loadCA[\s\S]*?const secret = transaction[\s\S]*?\? await SecretManager\.getSecret\(name, transaction\)[\s\S]*?: await runInTransaction/) + expect(certSource).to.match(/async function getCAFromK8sSecret[\s\S]*?const localSecret = transaction[\s\S]*?\? await SecretManager\.findOne\(\{ name: secretName \}, transaction\)[\s\S]*?: await runInTransaction/) + expect(certSource).to.match(/async function getCAFromK8sSecret[\s\S]*?if \(transaction\) \{[\s\S]*?await CertificateManager\.createCertificateRecord\(caRecord, transaction\)[\s\S]*?\} else \{[\s\S]*?await runInTransaction/) + }) + + it('passes transaction to cert util calls inside certificate-service createCAEndpoint', () => { + const serviceSource = fs.readFileSync( + path.join(REPO_ROOT, 'src/services/certificate-service.js'), + 'utf8' + ) + expect(serviceSource).to.match(/getCAFromK8sSecret\(caData\.secretName, transaction\)/) + expect(serviceSource).to.match(/loadCA\(caData\.secretName, transaction\)/) + }) + + it('keeps K8sClient calls out of nats-service DB transaction bodies', () => { + const natsSource = fs.readFileSync( + path.join(REPO_ROOT, 'src/services/nats-service.js'), + 'utf8' + ) + expect(natsSource).to.match(/async function ensureNatsForFogDb/) + expect(natsSource).to.match(/async function ensureNatsForFogAuthPrepDb/) + expect(natsSource).to.match(/async function ensureNatsForFogTopologyDb/) + expect(natsSource).to.match(/async function ensureNatsForFogDbMutation/) + expect(natsSource).to.match(/label: 'nats\.ensure\.certPrep'/) + expect(natsSource).to.match(/label: 'nats\.ensure\.authPrep'/) + expect(natsSource).to.match(/label: 'nats\.ensure\.topology'/) + expect(natsSource).to.match(/async function cleanupNatsForFogDb/) + expect(natsSource).to.match(/async function _reconcileResolverArtifactsOnceDb/) + expect(natsSource).to.match(/_patchK8sHubConfigMapClusterRoutesExternal/) + expect(natsSource).to.match(/_patchK8sJwtBundleExternal/) + expect(natsSource).to.not.match(/ensureNatsForFogAuthPrepDb[\s\S]*?K8sClient\./) + expect(natsSource).to.not.match(/ensureNatsForFogTopologyDb[\s\S]*?K8sClient\./) + expect(natsSource).to.not.match(/ensureNatsForFogDbMutation[\s\S]*?K8sClient\./) + expect(natsSource).to.not.match(/ensureNatsForFogDb[\s\S]*?K8sClient\./) + expect(natsSource).to.not.match(/cleanupNatsForFogDb[\s\S]*?K8sClient\./) + expect(natsSource).to.not.match(/_reconcileResolverArtifactsOnceDb[\s\S]*?K8sClient\./) + }) + + it('keeps K8sClient calls out of service-platform-service DB transaction bodies', () => { + const platformSource = fs.readFileSync( + path.join(REPO_ROOT, 'src/services/service-platform-service.js'), + 'utf8' + ) + expect(platformSource).to.match(/async function reconcileK8sServiceExternal/) + expect(platformSource).to.match(/async function applyK8sHubRouterPlan/) + assertNoK8sClientInLabeledTxBlocks(platformSource, [ + 'servicePlatform.hubLockAcquire', + 'servicePlatform.hubLockRelease', + 'servicePlatform.k8sLoadBalancerEndpoint', + 'servicePlatform.prepare', + 'servicePlatform.hubReconcile', + 'servicePlatform.hubDb', + 'servicePlatform.finalize' + ]) + }) + + it('passes Sequelize transaction inside options for volume-mount association calls', () => { + const volumeMountSource = fs.readFileSync( + path.join(REPO_ROOT, 'src/services/volume-mount-service.js'), + 'utf8' + ) + expect(volumeMountSource).to.match(/getFogs\(\{ transaction \}\)/) + expect(volumeMountSource).to.match(/addVolumeMount\(volumeMount, \{ transaction \}\)/) + expect(volumeMountSource).to.match(/removeVolumeMount\(volumeMount, \{ transaction \}\)/) + expect(volumeMountSource).to.not.match(/getFogs\(\{\}, transaction\)/) + expect(volumeMountSource).to.not.match(/addVolumeMount\(volumeMount\.uuid, transaction\)/) + expect(volumeMountSource).to.not.match(/removeVolumeMount\(volumeMount\.uuid, transaction\)/) + }) + + it('keeps vault HTTP out of secret/configmap/registry transaction bodies', () => { + const secretSource = fs.readFileSync( + path.join(REPO_ROOT, 'src/services/secret-service.js'), + 'utf8' + ) + expect(secretSource).to.include('scheduleVaultDeleteAfterCommit') + expect(secretSource).to.not.match(/deleteSecretEndpoint[\s\S]*?SecretHelper\.deleteSecret/) + + const configMapManagerSource = fs.readFileSync( + path.join(REPO_ROOT, 'src/data/managers/config-map-manager.js'), + 'utf8' + ) + expect(configMapManagerSource).to.include('scheduleVaultDeleteAfterCommit') + expect(configMapManagerSource).to.not.match(/deleteConfigMap[\s\S]*?SecretHelper\.deleteSecret/) + + const registryServiceSource = fs.readFileSync( + path.join(REPO_ROOT, 'src/services/registry-service.js'), + 'utf8' + ) + expect(registryServiceSource).to.include('scheduleVaultPromoteAfterCommit') + expect(registryServiceSource).to.include('scheduleVaultDeleteAfterCommit') + expect(registryServiceSource).to.not.match(/createRegistry[\s\S]*?SecretHelper\.encryptSecret\(/) + }) + + it('splits fog platform reconcile into phased runInTransaction labels', () => { + const fogSource = fs.readFileSync( + path.join(REPO_ROOT, 'src/services/fog-platform-service.js'), + 'utf8' + ) + expect(fogSource).to.match(/label: 'fogPlatform\.prepare'/) + expect(fogSource).to.match(/label: 'fogPlatform\.certPrep'/) + expect(fogSource).to.match(/ensureNatsForFogPhased/) + expect(fogSource).to.match(/label: 'fogPlatform\.platform'/) + expect(fogSource).to.match(/label: 'fogPlatform\.finalize'/) + expect(fogSource).to.not.match(/label: 'fogPlatform\.natsEnsure'/) + expect(fogSource).to.not.match(/reconcileFog: TransactionDecorator\.generateTransaction/) + }) + + it('passes transaction inside Sequelize options in nats-instance-manager.js', () => { + const source = fs.readFileSync( + path.join(REPO_ROOT, 'src/data/managers/nats-instance-manager.js'), + 'utf8' + ) + expect(source).to.not.match(/findOne\(\{ where: \{ iofogUuid \} \}, \{ transaction \}\)/) + expect(source).to.match(/transaction\s*\n\s*\}\)/) + }) + + it('does not export dead K8s-in-tx TCP bridge helpers from services-service.js', () => { + const servicesSource = fs.readFileSync( + path.join(REPO_ROOT, 'src/services/services-service.js'), + 'utf8' + ) + expect(servicesSource).to.not.match(/function _addTcpConnector/) + expect(servicesSource).to.not.match(/function _addTcpListener/) + expect(servicesSource).to.not.match(/function _updateTcpConnector/) + expect(servicesSource).to.not.match(/function _deleteTcpConnector/) + expect(servicesSource).to.not.match(/function _deleteTcpListener/) + expect(servicesSource).to.not.match(/_addTcpConnector,/) + }) + + it('routes OIDC provider adapter through runInTransaction', () => { + const adapterSource = fs.readFileSync( + path.join(REPO_ROOT, 'src/data/adapters/oidc-provider-adapter.js'), + 'utf8' + ) + expect(adapterSource).to.include('runInTransaction') + expect(adapterSource).to.match(/label: 'oidc\.adapter\.upsert'/) + expect(adapterSource).to.match(/\{\s*transaction,/) + }) + + it('passes transaction inside Sequelize options in volume-mounting-manager.js', () => { + const source = fs.readFileSync( + path.join(REPO_ROOT, 'src/data/managers/volume-mounting-manager.js'), + 'utf8' + ) + expect(source).to.not.match(/findOne\(\{[\s\S]*?\}, \{ transaction \}\)/) + expect(source).to.not.match(/findAll\(\{[\s\S]*?\}, \{ transaction \}\)/) + }) +}) diff --git a/test/src/helpers/transaction-runner.test.js b/test/src/helpers/transaction-runner.test.js new file mode 100644 index 00000000..7006f9c5 --- /dev/null +++ b/test/src/helpers/transaction-runner.test.js @@ -0,0 +1,288 @@ +const { expect } = require('chai') +const fs = require('fs') +const os = require('os') +const path = require('path') +const Sequelize = require('sequelize') +const Transaction = require('sequelize/lib/transaction') +const sinon = require('sinon') + +const databaseProvider = require('../../../src/data/providers/database-factory') +const { + PRIORITY_BACKGROUND, + PRIORITY_INTERACTIVE, + _resetQueueForTests, + getActiveTransactionContext, + getWriteQueueDepth, + runInTransaction, + runWithTransactionContext +} = require('../../../src/helpers/transaction-runner') +const { registerSqlitePragmas, applySqlitePragmas } = require('../../../src/helpers/sqlite-pragmas') + +describe('transaction-runner', () => { + const sandbox = sinon.createSandbox() + let originalDbProvider + let sequelize + let dbPath + + beforeEach(async () => { + originalDbProvider = process.env.DB_PROVIDER + delete process.env.DB_PROVIDER + _resetQueueForTests() + + dbPath = path.join(os.tmpdir(), `controller-tx-runner-${Date.now()}-${Math.random()}.sqlite`) + sequelize = new Sequelize({ + dialect: 'sqlite', + storage: dbPath, + logging: false, + pool: { max: 1, min: 0, idle: 10000 } + }) + registerSqlitePragmas(sequelize, { + journalMode: 'WAL', + busyTimeoutMs: 10000, + synchronous: 'NORMAL' + }) + await sequelize.authenticate() + await applySqlitePragmas(sequelize, { + journalMode: 'WAL', + busyTimeoutMs: 10000, + synchronous: 'NORMAL' + }) + await sequelize.query('CREATE TABLE tx_runner_test (id INTEGER PRIMARY KEY AUTOINCREMENT, label TEXT NOT NULL)') + + sandbox.stub(databaseProvider, 'sequelize').value(sequelize) + }) + + afterEach(async () => { + sandbox.restore() + _resetQueueForTests() + if (originalDbProvider === undefined) { + delete process.env.DB_PROVIDER + } else { + process.env.DB_PROVIDER = originalDbProvider + } + if (sequelize) { + await sequelize.close() + } + for (const suffix of ['', '-wal', '-shm']) { + try { + fs.unlinkSync(dbPath + suffix) + } catch (_) { /* ignore missing sidecar files */ } + } + }) + + it('exports priority constants', () => { + expect(PRIORITY_INTERACTIVE).to.equal('interactive') + expect(PRIORITY_BACKGROUND).to.equal('background') + }) + + it('passes a real Sequelize transaction to the callback on sqlite', async () => { + let seenTransaction + await runInTransaction(async (transaction) => { + seenTransaction = transaction + await sequelize.query('INSERT INTO tx_runner_test (label) VALUES (\'interactive\')', { transaction }) + }) + + expect(seenTransaction).to.be.instanceOf(Transaction) + const [rows] = await sequelize.query('SELECT label FROM tx_runner_test') + expect(rows).to.deep.equal([{ label: 'interactive' }]) + }) + + it('runs interactive tasks before queued background tasks on sqlite', async () => { + const order = [] + let releaseBg1 + + const bg1Gate = new Promise((resolve) => { + releaseBg1 = resolve + }) + + const bg1 = runInTransaction(async () => { + order.push('bg1-start') + await bg1Gate + order.push('bg1-end') + }, { priority: PRIORITY_BACKGROUND, label: 'bg1' }) + + await new Promise((resolve) => setTimeout(resolve, 20)) + + const bg2 = runInTransaction(async () => { + order.push('bg2') + }, { priority: PRIORITY_BACKGROUND, label: 'bg2' }) + + const interactive = runInTransaction(async () => { + order.push('interactive') + }, { priority: PRIORITY_INTERACTIVE, label: 'interactive' }) + + releaseBg1() + await Promise.all([bg1, interactive, bg2]) + + expect(order).to.deep.equal(['bg1-start', 'bg1-end', 'interactive', 'bg2']) + }) + + it('tracks sqlite queue depth by priority lane', async () => { + let release + const gate = new Promise((resolve) => { + release = resolve + }) + + const first = runInTransaction(async () => { + await gate + }, { priority: PRIORITY_BACKGROUND }) + + await new Promise((resolve) => setTimeout(resolve, 20)) + + runInTransaction(async () => {}, { priority: PRIORITY_INTERACTIVE }) + runInTransaction(async () => {}, { priority: PRIORITY_BACKGROUND }) + + expect(getWriteQueueDepth()).to.deep.equal({ interactive: 1, background: 1 }) + + release() + await first + }) + + it('retries on SQLITE_BUSY inside the sqlite queue worker', async () => { + const originalTransaction = sequelize.transaction.bind(sequelize) + let attempts = 0 + + sequelize.transaction = async (fn) => { + attempts++ + if (attempts === 1) { + throw new Error('SQLITE_BUSY: database is locked') + } + return originalTransaction(fn) + } + + await runInTransaction(async (transaction) => { + await sequelize.query('INSERT INTO tx_runner_test (label) VALUES (\'retried\')', { transaction }) + }) + + expect(attempts).to.be.at.least(2) + const [rows] = await sequelize.query('SELECT label FROM tx_runner_test') + expect(rows).to.deep.equal([{ label: 'retried' }]) + }) + + it('skips the global sqlite queue for mysql provider', async () => { + process.env.DB_PROVIDER = 'mysql' + + let tx1Running = false + let tx2StartedWhileTx1Running = false + const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms)) + + sequelize.transaction = sandbox.stub().callsFake(async (fn) => fn({})) + + await Promise.all([ + runInTransaction(async () => { + tx1Running = true + await delay(50) + tx1Running = false + }, { priority: PRIORITY_BACKGROUND }), + runInTransaction(async () => { + if (tx1Running) { + tx2StartedWhileTx1Running = true + } + }, { priority: PRIORITY_BACKGROUND }) + ]) + + expect(tx2StartedWhileTx1Running).to.equal(true) + }) + + it('reuses active sqlite transaction for nested interactive runInTransaction calls', async () => { + const order = [] + await runInTransaction(async (outerTx) => { + order.push('outer-start') + await runInTransaction(async (innerTx) => { + order.push('inner') + expect(innerTx).to.equal(outerTx) + }, { priority: PRIORITY_INTERACTIVE, label: 'nested-interactive' }) + order.push('outer-end') + }, { priority: PRIORITY_BACKGROUND, label: 'outer-background' }) + + expect(order).to.deep.equal(['outer-start', 'inner', 'outer-end']) + }) + + it('enqueues a fresh sqlite transaction for background runInTransaction after parent releases', async () => { + let outerTx + let innerTxPromise + await runInTransaction(async (transaction) => { + outerTx = transaction + innerTxPromise = runInTransaction(async (tx) => tx, { + priority: PRIORITY_BACKGROUND, + label: 'event.audit' + }) + }, { priority: PRIORITY_INTERACTIVE, label: 'handler' }) + + const innerTx = await innerTxPromise + expect(innerTx).to.be.instanceOf(Transaction) + expect(innerTx).to.not.equal(outerTx) + }) + + it('background runInTransaction ignores stale committed parent in ALS', async () => { + let committedTx + await runInTransaction(async (transaction) => { + committedTx = transaction + }, { priority: PRIORITY_INTERACTIVE, label: 'handler' }) + + await runWithTransactionContext(committedTx, PRIORITY_INTERACTIVE, async () => { + let auditTx + await runInTransaction(async (tx) => { + auditTx = tx + await sequelize.query('INSERT INTO tx_runner_test (label) VALUES (\'audit\')', { transaction: tx }) + }, { priority: PRIORITY_BACKGROUND, label: 'event.audit' }) + expect(auditTx).to.be.instanceOf(Transaction) + expect(auditTx).to.not.equal(committedTx) + }) + + const [rows] = await sequelize.query('SELECT label FROM tx_runner_test') + expect(rows).to.deep.equal([{ label: 'audit' }]) + }) + + it('skips the global sqlite queue for postgres provider', async () => { + process.env.DB_PROVIDER = 'postgres' + + let tx1Running = false + let tx2StartedWhileTx1Running = false + const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms)) + + sequelize.transaction = sandbox.stub().callsFake(async (fn) => fn({})) + + await Promise.all([ + runInTransaction(async () => { + tx1Running = true + await delay(50) + tx1Running = false + }), + runInTransaction(async () => { + if (tx1Running) { + tx2StartedWhileTx1Running = true + } + }) + ]) + + expect(tx2StartedWhileTx1Running).to.equal(true) + }) + + it('runWithTransactionContext skips duplicate ALS frame when same tx active', async () => { + const tx = { commit: () => {}, rollback: () => {} } + let nestedCtx + + await runWithTransactionContext(tx, PRIORITY_INTERACTIVE, async () => { + await runWithTransactionContext(tx, null, async () => { + nestedCtx = getActiveTransactionContext() + }) + }) + + expect(nestedCtx.transaction).to.equal(tx) + expect(nestedCtx.priority).to.equal(PRIORITY_INTERACTIVE) + }) + + it('runWithTransactionContext inherits priority from parent when omitted', async () => { + const tx = { commit: () => {}, rollback: () => {} } + let nestedCtx + + await runWithTransactionContext(tx, PRIORITY_BACKGROUND, async () => { + await runWithTransactionContext(tx, null, async () => { + nestedCtx = getActiveTransactionContext() + }) + }) + + expect(nestedCtx.priority).to.equal(PRIORITY_BACKGROUND) + }) +}) diff --git a/test/src/integration/first-fog-reconcile-sqlite.test.js b/test/src/integration/first-fog-reconcile-sqlite.test.js new file mode 100644 index 00000000..96fba581 --- /dev/null +++ b/test/src/integration/first-fog-reconcile-sqlite.test.js @@ -0,0 +1,100 @@ +'use strict' + +/** + * Plan 19-I-D — first-fog sqlite integration gate (R133). + * + * Skipped in default `npm test` unless RUN_INTEGRATION=1 (full DB migrate + reconcile). + * + * RUN_INTEGRATION=1 npm run test:integration:first-fog + */ + +describe('first-fog reconcile sqlite (R133)', function () { + this.timeout(30000) + + let harness + let busyRetries = 0 + let restoreBusyCounter + + before(function () { + if (process.env.RUN_INTEGRATION !== '1') { + this.skip() + } + }) + + before(async function () { + const { + createFirstFogSqliteHarness, + installBusyRetryCounter + } = require('../../support/first-fog-sqlite-harness') + + busyRetries = 0 + restoreBusyCounter = installBusyRetryCounter(() => { + busyRetries += 1 + }) + harness = await createFirstFogSqliteHarness() + }) + + after(async function () { + if (restoreBusyCounter) { + restoreBusyCounter() + } + if (harness) { + await harness.teardown() + } + }) + + it('reconciles first fog to Ready while concurrent operator API completes under 2s', async function () { + const { expect } = require('chai') + const { runInTransaction, PRIORITY_INTERACTIVE } = require('../../../src/helpers/transaction-runner') + const IofogService = require('../../../src/services/iofog-service') + const UserService = require('../../../src/services/user-service') + const FogPlatformStatusManager = require('../../../src/data/managers/fog-platform-status-manager') + const { drainOnce } = require('../../../src/jobs/reconcile-outbox-drainer-job') + const PlatformReconcileWorkerJob = require('../../../src/jobs/platform-reconcile-worker-job') + const { driveReconcileUntilReady } = require('../../support/first-fog-sqlite-harness') + + const fogPayload = { + name: 'hub-edge', + host: '127.0.0.1', + archId: 1, + containerEngine: 'edgelet', + bluetoothEnabled: false, + abstractedHardwareEnabled: false + } + + let fogUuid + let concurrentElapsedMs + + const createFogPromise = runInTransaction( + (transaction) => IofogService.createFogEndPoint(fogPayload, false, transaction), + { priority: PRIORITY_INTERACTIVE, label: 'integration.createFog' } + ).then((result) => { + fogUuid = result.uuid + }) + + const concurrentStart = Date.now() + const concurrentPromise = Promise.all([ + UserService.login({ email: 'admin', password: harness.bootstrapPassword }, false), + runInTransaction( + (transaction) => IofogService.getFogListEndPoint([], false, transaction), + { priority: PRIORITY_INTERACTIVE, label: 'integration.iofogList' } + ) + ]).then(() => { + concurrentElapsedMs = Date.now() - concurrentStart + }) + + await Promise.all([createFogPromise, concurrentPromise]) + + expect(concurrentElapsedMs).to.be.lessThan(2000) + + const status = await driveReconcileUntilReady(fogUuid, { + drainOnce, + processNextFogTask: PlatformReconcileWorkerJob.processNextFogTask, + getStatus: (uuid) => FogPlatformStatusManager.getParsedStatus(uuid) + }) + + expect(status.phase).to.equal('Ready') + expect(status.lastError).to.satisfy((value) => value == null || value === '') + expect(busyRetries).to.equal(0) + }) +}) diff --git a/test/src/jobs/platform-reconcile-worker-job.test.js b/test/src/jobs/platform-reconcile-worker-job.test.js index 2b4b199b..8086f9a7 100644 --- a/test/src/jobs/platform-reconcile-worker-job.test.js +++ b/test/src/jobs/platform-reconcile-worker-job.test.js @@ -5,12 +5,17 @@ const ClusterControllerService = require('../../../src/services/cluster-controll const FogPlatformService = require('../../../src/services/fog-platform-service') const ServicePlatformService = require('../../../src/services/service-platform-service') const FogPlatformReconcileTaskManager = require('../../../src/data/managers/fog-platform-reconcile-task-manager') +const FogPlatformStatusManager = require('../../../src/data/managers/fog-platform-status-manager') const ServicePlatformReconcileTaskManager = require('../../../src/data/managers/service-platform-reconcile-task-manager') const ServiceManager = require('../../../src/data/managers/service-manager') const FogManager = require('../../../src/data/managers/iofog-manager') -const databaseProvider = require('../../../src/data/providers/database-factory') +const transactionRunner = require('../../../src/helpers/transaction-runner') const PlatformReconcileWorkerJob = require('../../../src/jobs/platform-reconcile-worker-job') +function stubRunInTransaction (sandbox, transaction = {}) { + sandbox.stub(transactionRunner, 'runInTransaction').callsFake(async (fn) => fn(transaction)) +} + describe('platform-reconcile-worker-job', () => { def('sandbox', () => sinon.createSandbox()) @@ -23,10 +28,11 @@ describe('platform-reconcile-worker-job', () => { $sandbox.stub(ClusterControllerService, 'getCurrentControllerUuid').returns('controller-1') $sandbox.stub(FogPlatformReconcileTaskManager, 'claimNextFogTask').resolves(task) + $sandbox.stub(FogPlatformStatusManager, 'getParsedStatus').resolves({ phase: 'Ready' }) $sandbox.stub(FogPlatformService, 'reconcileFog').resolves({ fogUuid: 'fog-1', phase: 'Ready' }) $sandbox.stub(FogPlatformService, 'reconcileFogDelete') $sandbox.stub(FogPlatformReconcileTaskManager, 'getEntity').returns(entity) - $sandbox.stub(databaseProvider.sequelize, 'transaction').callsFake(async (fn) => fn(transaction)) + stubRunInTransaction($sandbox, transaction) await PlatformReconcileWorkerJob.processNextFogTask() @@ -34,36 +40,31 @@ describe('platform-reconcile-worker-job', () => { expect(FogPlatformService.reconcileFogDelete).to.not.have.been.called expect(entity.destroy).to.have.been.calledOnceWith({ where: { id: 11 }, - transaction + transaction: sinon.match.any }) }) - it('passes fakeTransaction into reconcileFog DB layer from worker (no reconcileFog stub)', async () => { + it('passes transaction into reconcileFog prepare phase from worker (no reconcileFog stub)', async () => { const task = { id: 14, fogUuid: 'fog-1', reason: 'spec-changed', attempts: 0 } - const appHelperPath = require.resolve('../../../src/helpers/app-helper') - const decoratorPath = require.resolve('../../../src/decorators/transaction-decorator') - const fogPlatformServicePath = require.resolve('../../../src/services/fog-platform-service') - const workerPath = require.resolve('../../../src/jobs/platform-reconcile-worker-job') - - $sandbox.stub(require(appHelperPath), 'isTest').returns(false) - delete require.cache[decoratorPath] - delete require.cache[fogPlatformServicePath] - delete require.cache[workerPath] - const WorkerJob = require('../../../src/jobs/platform-reconcile-worker-job') + const labels = [] $sandbox.stub(ClusterControllerService, 'getCurrentControllerUuid').returns('controller-1') $sandbox.stub(FogPlatformReconcileTaskManager, 'claimNextFogTask').resolves(task) + $sandbox.stub(FogPlatformStatusManager, 'getParsedStatus').resolves({ phase: 'Progressing' }) $sandbox.stub(FogManager, 'findOneWithTags').resolves(null) $sandbox.stub(FogPlatformReconcileTaskManager, 'recordFogTaskFailure').resolves(task) - const markFailedPath = require.resolve('../../../src/services/fog-platform-service') - $sandbox.stub(require(markFailedPath), 'markReconcileFailed').resolves() - $sandbox.stub(databaseProvider.sequelize, 'transaction').callsFake(async (fn) => fn({})) + $sandbox.stub(FogPlatformService, 'markReconcileFailed').resolves() + $sandbox.stub(transactionRunner, 'runInTransaction').callsFake(async (fn, options = {}) => { + labels.push(options.label) + return fn({ id: 'worker-tx' }) + }) - await WorkerJob.processNextFogTask() + await PlatformReconcileWorkerJob.processNextFogTask() + expect(labels).to.include('fogPlatform.prepare') expect(FogManager.findOneWithTags).to.have.been.calledOnceWith( { uuid: 'fog-1' }, - sinon.match({ fakeTransaction: true }) + { id: 'worker-tx' } ) }) @@ -76,7 +77,7 @@ describe('platform-reconcile-worker-job', () => { $sandbox.stub(FogPlatformService, 'reconcileFogDelete').resolves({ fogUuid: 'fog-2', deleted: true }) $sandbox.stub(FogPlatformService, 'reconcileFog') $sandbox.stub(FogPlatformReconcileTaskManager, 'getEntity').returns(entity) - $sandbox.stub(databaseProvider.sequelize, 'transaction').callsFake(async (fn) => fn({})) + stubRunInTransaction($sandbox) await PlatformReconcileWorkerJob.processNextFogTask() @@ -84,16 +85,76 @@ describe('platform-reconcile-worker-job', () => { expect(FogPlatformService.reconcileFog).to.not.have.been.called }) + it('runs delete reconcile when platform phase is Deleting even if task reason is spec-changed', async () => { + const task = { id: 15, fogUuid: 'fog-4', reason: 'spec-changed', attempts: 0 } + const entity = { destroy: $sandbox.stub().resolves(1) } + + $sandbox.stub(ClusterControllerService, 'getCurrentControllerUuid').returns('controller-1') + $sandbox.stub(FogPlatformReconcileTaskManager, 'claimNextFogTask').resolves(task) + $sandbox.stub(FogPlatformStatusManager, 'getParsedStatus').resolves({ phase: 'Deleting' }) + $sandbox.stub(FogPlatformService, 'reconcileFogDelete').resolves({ fogUuid: 'fog-4', deleted: true }) + $sandbox.stub(FogPlatformService, 'reconcileFog') + $sandbox.stub(FogPlatformReconcileTaskManager, 'getEntity').returns(entity) + stubRunInTransaction($sandbox) + + await PlatformReconcileWorkerJob.processNextFogTask() + + expect(FogPlatformService.reconcileFogDelete).to.have.been.calledOnceWith('fog-4') + expect(FogPlatformService.reconcileFog).to.not.have.been.called + }) + + it('runs delete reconcile when reconcileFog skips because fog is deleting', async () => { + const task = { id: 16, fogUuid: 'fog-5', reason: 'manual-retry', attempts: 0 } + const entity = { destroy: $sandbox.stub().resolves(1) } + + $sandbox.stub(ClusterControllerService, 'getCurrentControllerUuid').returns('controller-1') + $sandbox.stub(FogPlatformReconcileTaskManager, 'claimNextFogTask').resolves(task) + $sandbox.stub(FogPlatformStatusManager, 'getParsedStatus').resolves({ phase: 'Progressing' }) + $sandbox.stub(FogPlatformService, 'reconcileFog').resolves({ skipped: true, reason: 'deleting' }) + $sandbox.stub(FogPlatformService, 'reconcileFogDelete').resolves({ fogUuid: 'fog-5', deleted: true }) + $sandbox.stub(FogPlatformReconcileTaskManager, 'getEntity').returns(entity) + stubRunInTransaction($sandbox) + + await PlatformReconcileWorkerJob.processNextFogTask() + + expect(FogPlatformService.reconcileFog).to.have.been.calledOnceWith('fog-5') + expect(FogPlatformService.reconcileFogDelete).to.have.been.calledOnceWith('fog-5') + }) + + it('keeps Deleting phase when delete reconcile fails', async () => { + const task = { id: 17, fogUuid: 'fog-6', reason: 'delete', attempts: 1 } + const error = new Error('nats cleanup failed') + + $sandbox.stub(ClusterControllerService, 'getCurrentControllerUuid').returns('controller-1') + $sandbox.stub(FogPlatformReconcileTaskManager, 'claimNextFogTask').resolves(task) + $sandbox.stub(FogPlatformService, 'reconcileFogDelete').rejects(error) + $sandbox.stub(FogPlatformReconcileTaskManager, 'recordFogTaskFailure').resolves(task) + $sandbox.stub(FogPlatformStatusManager, 'setPhase').resolves() + $sandbox.stub(FogPlatformService, 'markReconcileFailed') + stubRunInTransaction($sandbox) + + await PlatformReconcileWorkerJob.processNextFogTask() + + expect(FogPlatformStatusManager.setPhase).to.have.been.calledOnceWith( + 'fog-6', + 'Deleting', + { lastError: 'nats cleanup failed' }, + sinon.match.any + ) + expect(FogPlatformService.markReconcileFailed).to.not.have.been.called + }) + it('records failure and updates fog status when reconcile throws', async () => { const task = { id: 13, fogUuid: 'fog-3', reason: 'spec-changed', attempts: 2 } const error = new Error('router create failed') $sandbox.stub(ClusterControllerService, 'getCurrentControllerUuid').returns('controller-1') $sandbox.stub(FogPlatformReconcileTaskManager, 'claimNextFogTask').resolves(task) + $sandbox.stub(FogPlatformStatusManager, 'getParsedStatus').resolves({ phase: 'Progressing' }) $sandbox.stub(FogPlatformService, 'reconcileFog').rejects(error) $sandbox.stub(FogPlatformReconcileTaskManager, 'recordFogTaskFailure').resolves(task) $sandbox.stub(FogPlatformService, 'markReconcileFailed').resolves() - $sandbox.stub(databaseProvider.sequelize, 'transaction').callsFake(async (fn) => fn({})) + stubRunInTransaction($sandbox) await PlatformReconcileWorkerJob.processNextFogTask() @@ -155,14 +216,14 @@ describe('platform-reconcile-worker-job', () => { provisioningStatus: 'ready' }) $sandbox.stub(ServicePlatformReconcileTaskManager, 'getEntity').returns(entity) - $sandbox.stub(databaseProvider.sequelize, 'transaction').callsFake(async (fn) => fn(transaction)) + stubRunInTransaction($sandbox, transaction) await PlatformReconcileWorkerJob.processNextServiceTask() expect(ServicePlatformService.reconcileService).to.have.been.calledOnceWith('api-gateway', task) expect(entity.destroy).to.have.been.calledOnceWith({ where: { id: 21 }, - transaction + transaction: sinon.match.any }) }) @@ -188,7 +249,7 @@ describe('platform-reconcile-worker-job', () => { $sandbox.stub(ServicePlatformService, 'reconcileService').rejects(error) $sandbox.stub(ServicePlatformReconcileTaskManager, 'recordServiceTaskFailure').resolves(task) $sandbox.stub(ServiceManager, 'update').resolves() - $sandbox.stub(databaseProvider.sequelize, 'transaction').callsFake(async (fn) => fn({})) + stubRunInTransaction($sandbox) await PlatformReconcileWorkerJob.processNextServiceTask() @@ -214,7 +275,7 @@ describe('platform-reconcile-worker-job', () => { $sandbox.stub(ServicePlatformService, 'reconcileService').rejects(error) $sandbox.stub(ServicePlatformReconcileTaskManager, 'recordServiceTaskFailure').resolves(task) $sandbox.stub(ServiceManager, 'update').resolves() - $sandbox.stub(databaseProvider.sequelize, 'transaction').callsFake(async (fn) => fn({})) + stubRunInTransaction($sandbox) await PlatformReconcileWorkerJob.processNextServiceTask() diff --git a/test/src/middlewares/rvaluesVarSubstitionMiddleware.test.js b/test/src/middlewares/rvaluesVarSubstitionMiddleware.test.js index b0263051..c1d097f5 100755 --- a/test/src/middlewares/rvaluesVarSubstitionMiddleware.test.js +++ b/test/src/middlewares/rvaluesVarSubstitionMiddleware.test.js @@ -144,13 +144,21 @@ describe('rvaluesVarSubstitionMiddleware', () => { host: 'myhost01', })) + beforeEach(() => { + const Transaction = require('sequelize/lib/transaction') + $sandbox.matchTransaction = sinon.match.instanceOf(Transaction) + }) + it('performs variable substitutions and applies filter', async () => { await $subject expect($nextfct).to.have.been.called expect(FogService.getFogEndPoint).to.have.been.called expect(FogService.getFogEndPoint).to.have.been.calledWith({ uuid: 'TkLh8wzcxb86CRnHQyJkx6VF468JFd4f' }, false) expect(ApplicationManager.findOnePopulated).to.have.been.calledOnce - expect(ApplicationManager.findOnePopulated).to.have.been.calledWith({ exclude: ['created_at', 'updated_at'] }, { fakeTransaction: true }) + expect(ApplicationManager.findOnePopulated).to.have.been.calledWith( + { exclude: ['created_at', 'updated_at'] }, + $sandbox.matchTransaction + ) expect(MicroservicesService.listMicroservicesEndPoint).to.have.been.called expect(MicroservicesService.listMicroservicesEndPoint).to.have.been.calledWith({ applicationName: $redisAppName }, false) diff --git a/test/src/schemas/utils.test.js b/test/src/schemas/utils.test.js new file mode 100644 index 00000000..69c451b1 --- /dev/null +++ b/test/src/schemas/utils.test.js @@ -0,0 +1,31 @@ +'use strict' + +const { expect } = require('chai') +const { versionRegex } = require('../../../src/schemas/utils/utils') + +describe('schemas/utils versionRegex', () => { + const re = new RegExp(versionRegex) + + for (const version of [ + '1.0.0', + 'v1.0.0', + '2.0.0-rc.2', + 'v2.0.0-rc.2', + '1.0.0-rc.10', + 'v1.0.0-rc.10', + '1.0.0-beta.11', + '1.0.0-alpha+001', + '1.0.0+20130313144700', + '1.0.0-x.7.z.92' + ]) { + it(`accepts ${version}`, () => { + expect(re.test(version)).to.equal(true) + }) + } + + for (const version of ['01.0.0', '1.0.0-rc.01', '1.0', 'not-a-version']) { + it(`rejects ${version}`, () => { + expect(re.test(version)).to.equal(false) + }) + } +}) diff --git a/test/src/services/auth-bootstrap-service.test.js b/test/src/services/auth-bootstrap-service.test.js index 2917f5b7..a06a702e 100644 --- a/test/src/services/auth-bootstrap-service.test.js +++ b/test/src/services/auth-bootstrap-service.test.js @@ -82,10 +82,9 @@ function restoreDbModels (snapshot) { function installBootstrapDb (sandbox, state) { const db = require('../../../src/data/models') + const transactionRunner = require('../../../src/helpers/transaction-runner') - db.sequelize = { - transaction: sandbox.stub().callsFake(async () => createNoopTransaction()) - } + sandbox.stub(transactionRunner, 'runInTransaction').callsFake(async (fn) => fn(createNoopTransaction())) db.AuthGroup = { findOrCreate: sandbox.stub().callsFake(async ({ where, defaults }) => { diff --git a/test/src/services/auth-interaction-service.test.js b/test/src/services/auth-interaction-service.test.js index 45c855dd..335a1da7 100644 --- a/test/src/services/auth-interaction-service.test.js +++ b/test/src/services/auth-interaction-service.test.js @@ -149,7 +149,7 @@ describe('Auth interaction service', () => { const req = { headers: {} } const res = {} - const completeResult = await AuthInteractionService.complete($interactionUid, req, res, false) + const completeResult = await AuthInteractionService.complete($interactionUid, req, res) expect(completeResult.step).to.equal('complete') }) @@ -167,7 +167,7 @@ describe('Auth interaction service', () => { }, false) try { - await AuthInteractionService.complete($interactionUid, { headers: {} }, {}, false) + await AuthInteractionService.complete($interactionUid, { headers: {} }, {}) expect.fail('expected completion to fail') } catch (error) { expect(error).to.be.instanceOf(Errors.ValidationError) @@ -266,7 +266,7 @@ describe('Auth interaction service', () => { const req = { headers: {} } const res = {} - const completeResult = await AuthInteractionService.complete($interactionUid, req, res, false) + const completeResult = await AuthInteractionService.complete($interactionUid, req, res) expect(completeResult.step).to.equal('complete') expect(completeResult.redirectTo).to.equal('https://controller.test/oidc/auth/test/resume') diff --git a/test/src/services/cluster-controller-service.test.js b/test/src/services/cluster-controller-service.test.js new file mode 100644 index 00000000..3eb7e8ef --- /dev/null +++ b/test/src/services/cluster-controller-service.test.js @@ -0,0 +1,59 @@ +const { expect } = require('chai') +const sinon = require('sinon') + +const ClusterControllerService = require('../../../src/services/cluster-controller-service') +const ClusterControllerManager = require('../../../src/data/managers/cluster-controller-manager') + +describe('Cluster Controller Service', () => { + def('sandbox', () => sinon.createSandbox()) + afterEach(() => $sandbox.restore()) + + describe('.listClusterControllers()', () => { + const transaction = {} + const activeRow = { + uuid: 'active-uuid', + host: 'controller-pod-a', + processId: 1, + lastHeartbeat: new Date(), + isActive: true, + createdAt: new Date(), + updatedAt: new Date() + } + const inactiveRow = { + uuid: 'inactive-uuid', + host: 'controller-pod-b', + processId: 1, + lastHeartbeat: new Date(), + isActive: false, + createdAt: new Date(), + updatedAt: new Date() + } + + beforeEach(() => { + $sandbox.stub(ClusterControllerManager, 'findAll').callsFake((where) => { + const rows = [activeRow, inactiveRow] + if (where && where.isActive === true) { + return Promise.resolve(rows.filter((row) => row.isActive)) + } + return Promise.resolve(rows) + }) + }) + + it('returns only active controllers by default', async () => { + const result = await ClusterControllerService.listClusterControllers(false, transaction) + + expect(ClusterControllerManager.findAll).to.have.been.calledWith({ isActive: true }, transaction) + expect(result).to.have.length(1) + expect(result[0].uuid).to.equal('active-uuid') + expect(result[0].isActive).to.equal(true) + }) + + it('returns all controllers when includeInactive is true', async () => { + const result = await ClusterControllerService.listClusterControllers(true, transaction) + + expect(ClusterControllerManager.findAll).to.have.been.calledWith({}, transaction) + expect(result).to.have.length(2) + expect(result.map((row) => row.uuid)).to.deep.equal(['active-uuid', 'inactive-uuid']) + }) + }) +}) diff --git a/test/src/services/controller-ms-service.test.js b/test/src/services/controller-ms-service.test.js index dfa2f2f9..c5841bc7 100644 --- a/test/src/services/controller-ms-service.test.js +++ b/test/src/services/controller-ms-service.test.js @@ -281,6 +281,17 @@ describe('Controller MS Service', () => { expect(MicroservicesService.updateChangeTracking).to.have.been.calledWith(true, fogUuid, transaction) }) + it('validates ports after clearing existing mappings on update', async () => { + await $subject + expect(MicroservicePortService.deletePortMappings).to.have.been.calledBefore( + MicroservicePortService.validatePortMappings + ) + expect(MicroservicePortService.validatePortMappings).to.have.been.calledWith( + { ports: registerData.ports, iofogUuid: fogUuid }, + transaction + ) + }) + context('when container workload fields are sent on update', () => { def('body', () => ({ ...registerData, diff --git a/test/src/services/fog-platform-service.test.js b/test/src/services/fog-platform-service.test.js index 5e87642e..87385414 100644 --- a/test/src/services/fog-platform-service.test.js +++ b/test/src/services/fog-platform-service.test.js @@ -2,6 +2,7 @@ const { expect } = require('chai') const sinon = require('sinon') const FogPlatformService = require('../../../src/services/fog-platform-service') +const Constants = require('../../../src/helpers/constants') const FogManager = require('../../../src/data/managers/iofog-manager') const FogPlatformSpecManager = require('../../../src/data/managers/fog-platform-spec-manager') const FogPlatformStatusManager = require('../../../src/data/managers/fog-platform-status-manager') @@ -11,6 +12,7 @@ const NatsInstanceManager = require('../../../src/data/managers/nats-instance-ma const NatsConnectionManager = require('../../../src/data/managers/nats-connection-manager') const IofogService = require('../../../src/services/iofog-service') const NatsService = require('../../../src/services/nats-service') +const ReconcileOutboxManager = require('../../../src/data/managers/reconcile-outbox-manager') const RouterService = require('../../../src/services/router-service') const ServiceBridgeConfig = require('../../../src/services/service-bridge-config') const ChangeTrackingService = require('../../../src/services/change-tracking-service') @@ -19,6 +21,7 @@ const MicroserviceService = require('../../../src/services/microservices-service const ApplicationManager = require('../../../src/data/managers/application-manager') const SecretManager = require('../../../src/data/managers/secret-manager') const FogPublicKeyManager = require('../../../src/data/managers/iofog-public-key-manager') +const transactionRunner = require('../../../src/helpers/transaction-runner') describe('Fog platform service', () => { def('sandbox', () => sinon.createSandbox()) @@ -27,6 +30,17 @@ describe('Fog platform service', () => { afterEach(() => $sandbox.restore()) + function stubPhasedRunInTransaction (sandbox, options = {}) { + const labels = options.labels || null + sandbox.stub(transactionRunner, 'runInTransaction').callsFake(async (fn, runOptions = {}) => { + if (labels && runOptions.label) { + labels.push(runOptions.label) + } + return fn(transaction) + }) + return labels + } + describe('.validateSystemFogInvariants()', () => { it('rejects non-interior router mode for system fog', () => { try { @@ -73,6 +87,7 @@ describe('Fog platform service', () => { } beforeEach(() => { + stubPhasedRunInTransaction($sandbox) $sandbox.stub(FogManager, 'findOneWithTags').resolves({ ...fog }) $sandbox.stub(FogManager, 'findOne').resolves({ ...fog }) $sandbox.stub(FogManager, 'update').resolves() @@ -94,12 +109,19 @@ describe('Fog platform service', () => { $sandbox.stub(NatsInstanceManager, 'findByFog').resolves({ id: 5, isLeaf: true }) $sandbox.stub(NatsConnectionManager, 'findAllWithNats').resolves([]) $sandbox.stub(IofogService, '_handleRouterCertificates').resolves() - $sandbox.stub(NatsService, 'ensureNatsForFog').resolves() - $sandbox.stub(NatsService, 'cleanupNatsForFog').resolves() - $sandbox.stub(NatsService, 'enqueueReconcileTask').resolves() + $sandbox.stub(NatsService, 'ensureNatsForFogPhased').resolves({}) + $sandbox.stub(NatsService, 'cleanupNatsForFogPhased').resolves() + $sandbox.stub(ReconcileOutboxManager, 'enqueueNats').resolves() + $sandbox.stub(ReconcileOutboxManager, 'enqueueFogPlatform').resolves() $sandbox.stub(RouterService, 'validateAndReturnUpstreamRouters').resolves([]) - $sandbox.stub(RouterService, 'updateRouter').resolves(router) - $sandbox.stub(IofogService, '_getRouterMicroserviceConfig').resolves({ bridges: { tcpListeners: {}, tcpConnectors: {} } }) + $sandbox.stub(RouterService, 'updateRouter').resolves({ + host: 'localhost', + messagingPort: router.messagingPort + }) + $sandbox.stub(RouterService, 'buildFreshRouterMicroserviceConfig').resolves({ + connectors: { 'default-router': { name: 'default-router', host: '10.0.0.1', port: '55671', role: 'edge' } }, + bridges: { tcpListeners: {}, tcpConnectors: {} } + }) $sandbox.stub(ServiceBridgeConfig, 'recomputeServiceBridgeConfig').resolves({ bridges: { tcpListeners: {}, tcpConnectors: {} } }) $sandbox.stub(ChangeTrackingService, 'create').resolves() $sandbox.stub(ChangeTrackingService, 'update').resolves() @@ -108,7 +130,7 @@ describe('Fog platform service', () => { it('skips reconcile when platform phase is Deleting', async () => { FogPlatformStatusManager.getParsedStatus.resolves({ fogUuid, phase: 'Deleting', observedGeneration: 1 }) - const result = await FogPlatformService.reconcileFog(fogUuid, transaction) + const result = await FogPlatformService.reconcileFog(fogUuid) expect(result).to.eql({ skipped: true, reason: 'deleting' }) expect(FogPlatformStatusManager.setPhase).to.not.have.been.called @@ -116,11 +138,16 @@ describe('Fog platform service', () => { }) it('runs ordered reconcile steps and marks platform Ready', async () => { - const result = await FogPlatformService.reconcileFog(fogUuid, transaction) + const result = await FogPlatformService.reconcileFog(fogUuid) expect(IofogService._handleRouterCertificates).to.have.been.calledOnce - expect(NatsService.ensureNatsForFog).to.have.been.calledOnce + expect(NatsService.ensureNatsForFogPhased).to.have.been.calledOnce expect(RouterService.updateRouter).to.have.been.calledOnce + expect(RouterService.buildFreshRouterMicroserviceConfig).to.have.been.calledOnceWith( + router.id, + spec.containerEngine, + transaction + ) expect(ServiceBridgeConfig.recomputeServiceBridgeConfig).to.have.been.calledOnce expect(FogPlatformStatusManager.setPhase).to.have.been.calledWith( fogUuid, @@ -137,31 +164,52 @@ describe('Fog platform service', () => { }) it('is safe to reconcile the same generation twice', async () => { - await FogPlatformService.reconcileFog(fogUuid, transaction) - await FogPlatformService.reconcileFog(fogUuid, transaction) + await FogPlatformService.reconcileFog(fogUuid) + await FogPlatformService.reconcileFog(fogUuid) expect(RouterService.updateRouter).to.have.been.calledTwice + expect(RouterService.buildFreshRouterMicroserviceConfig).to.have.been.calledTwice expect(ServiceBridgeConfig.recomputeServiceBridgeConfig).to.have.been.calledTwice }) - it('accepts worker call shape (fogUuid only) with decorator fakeTransaction outside test mode', async () => { - const appHelperPath = require.resolve('../../../src/helpers/app-helper') - const decoratorPath = require.resolve('../../../src/decorators/transaction-decorator') - const fogPlatformServicePath = require.resolve('../../../src/services/fog-platform-service') - - $sandbox.stub(require(appHelperPath), 'isTest').returns(false) - delete require.cache[decoratorPath] - delete require.cache[fogPlatformServicePath] - const FreshFogPlatformService = require('../../../src/services/fog-platform-service') + it('uses phased runInTransaction labels from worker call shape (fogUuid only)', async () => { + const labels = [] + transactionRunner.runInTransaction.callsFake(async (fn, runOptions = {}) => { + if (runOptions.label) { + labels.push(runOptions.label) + } + return fn(transaction) + }) FogPlatformStatusManager.getParsedStatus.resolves({ fogUuid, phase: 'Deleting', observedGeneration: 1 }) - const result = await FreshFogPlatformService.reconcileFog(fogUuid) + const result = await FogPlatformService.reconcileFog(fogUuid) expect(result).to.eql({ skipped: true, reason: 'deleting' }) - expect(FogManager.findOneWithTags).to.have.been.calledWith( - { uuid: fogUuid }, - sinon.match({ fakeTransaction: true }) + expect(labels).to.deep.equal(['fogPlatform.prepare']) + }) + + it('runs cert, nats, platform, and finalize in separate transaction phases', async () => { + const labels = [] + transactionRunner.runInTransaction.callsFake(async (fn, runOptions = {}) => { + if (runOptions.label) { + labels.push(runOptions.label) + } + return fn(transaction) + }) + + await FogPlatformService.reconcileFog(fogUuid) + + expect(labels).to.deep.equal([ + 'fogPlatform.prepare', + 'fogPlatform.certPrep', + 'fogPlatform.platform', + 'fogPlatform.finalize' + ]) + expect(NatsService.ensureNatsForFogPhased).to.have.been.calledOnce + expect(NatsService.ensureNatsForFogPhased).to.have.been.calledWith( + sinon.match.any, + sinon.match.any ) }) @@ -170,13 +218,202 @@ describe('Fog platform service', () => { .onCall(0).resolves(null) .onCall(1).resolves({ id: 5, isLeaf: true }) - await FogPlatformService.reconcileFog(fogUuid, transaction) + await FogPlatformService.reconcileFog(fogUuid) - expect(NatsService.enqueueReconcileTask).to.have.been.calledWithMatch({ + expect(ReconcileOutboxManager.enqueueNats).to.have.been.calledWithMatch({ reason: 'cluster-routes-changed', fogUuids: [fogUuid] }, transaction) }) + + context('when persisted router endpoints change during reconcile', () => { + const mutableRouter = { + id: 11, + iofogUuid: fogUuid, + isEdge: true, + host: '10.0.0.1', + messagingPort: 5671, + interRouterPort: null, + edgeRouterPort: null + } + + beforeEach(() => { + const hostChangedSpec = { + ...spec, + host: '10.0.0.2' + } + FogPlatformSpecManager.getParsedSpec.resolves({ + fogUuid, + generation: 2, + spec: hostChangedSpec + }) + FogManager.findOneWithTags.resolves({ ...fog, host: '10.0.0.2' }) + RouterManager.findOne.callsFake((query) => { + if (query && query.isDefault) { + return Promise.resolve({ id: 1, iofogUuid: 'default', isDefault: true }) + } + return Promise.resolve(mutableRouter) + }) + RouterService.updateRouter.callsFake(async (_router, updates) => { + Object.assign(mutableRouter, updates) + return mutableRouter + }) + RouterConnectionManager.findAllWithRouters.callsFake((query) => { + if (query && query.destRouter === mutableRouter.id) { + return Promise.resolve([{ source: { iofogUuid: 'edge-downstream' } }]) + } + if (query && query.sourceRouter === mutableRouter.id) { + return Promise.resolve([]) + } + return Promise.resolve([]) + }) + }) + + it('enqueues downstream fog platform reconcile after upstream finalize', async () => { + await FogPlatformService.reconcileFog(fogUuid) + + expect(ReconcileOutboxManager.enqueueFogPlatform).to.have.been.calledWith({ + fogUuid: 'edge-downstream', + reason: 'spec-changed' + }, transaction) + }) + }) + + context('when upstreamRouters is omitted from spec', () => { + it('passes undefined to validateAndReturnUpstreamRouters on first create', async () => { + RouterManager.findOne.callsFake((query) => { + if (query && query.isDefault) { + return Promise.resolve({ id: 1, iofogUuid: 'default', isDefault: true }) + } + if (query && query.iofogUuid === fogUuid) { + return Promise.resolve(null) + } + return Promise.resolve(null) + }) + RouterConnectionManager.findAllWithRouters.resolves([]) + RouterService.validateAndReturnUpstreamRouters.resolves([{ id: 1, iofogUuid: 'default' }]) + $sandbox.stub(RouterService, 'createRouterForFog').resolves({ + id: 99, + iofogUuid: fogUuid, + isEdge: true + }) + + await FogPlatformService.reconcileFog(fogUuid) + + expect(RouterService.validateAndReturnUpstreamRouters).to.have.been.calledWith( + undefined, + false, + sinon.match({ id: 1, isDefault: true }), + transaction + ) + expect(RouterService.createRouterForFog).to.have.been.calledOnce + expect(RouterService.updateRouter).to.not.have.been.called + }) + + it('preserves existing upstream connections when spec omits upstreamRouters', async () => { + const upstreamConnection = { + dest: { id: 1, iofogUuid: 'default', isDefault: true } + } + RouterConnectionManager.findAllWithRouters.resolves([upstreamConnection]) + RouterService.validateAndReturnUpstreamRouters.resolves([{ id: 1, iofogUuid: 'default' }]) + + await FogPlatformService.reconcileFog(fogUuid) + + expect(RouterService.validateAndReturnUpstreamRouters).to.have.been.calledWith( + [Constants.DEFAULT_ROUTER_NAME], + false, + sinon.match({ id: 1, isDefault: true }), + transaction + ) + }) + + it('passes explicit empty upstreamRouters without applying defaults', async () => { + FogPlatformSpecManager.getParsedSpec.resolves({ + fogUuid, + generation: 2, + spec: { ...spec, upstreamRouters: [] } + }) + RouterConnectionManager.findAllWithRouters.resolves([]) + + await FogPlatformService.reconcileFog(fogUuid) + + expect(RouterService.validateAndReturnUpstreamRouters).to.have.been.calledWith( + [], + false, + sinon.match({ id: 1, isDefault: true }), + transaction + ) + }) + }) + + context('when upstreamNatsServers is omitted from spec', () => { + it('preserves existing NATS upstream connections during reconcile prepare', async () => { + NatsInstanceManager.findOne.resolves({ id: 1, isHub: true, iofogUuid: 'hub' }) + NatsInstanceManager.findByFog.resolves({ id: 5, isLeaf: true, iofogUuid: fogUuid }) + NatsConnectionManager.findAllWithNats.resolves([ + { dest: { id: 1, isHub: true, iofogUuid: 'hub' } } + ]) + + await FogPlatformService.reconcileFog(fogUuid) + + expect(NatsService.ensureNatsForFogPhased).to.have.been.calledWith( + sinon.match.any, + sinon.match.has('upstreamNatsServers', ['default-nats-hub']) + ) + }) + }) + }) + + describe('.getDownstreamFogUuidsForUpstream()', () => { + it('returns downstream fogs connected via router and NATS upstream', async () => { + $sandbox.stub(RouterManager, 'findOne').resolves({ id: 10, iofogUuid: fogUuid }) + $sandbox.stub(RouterConnectionManager, 'findAllWithRouters').resolves([ + { source: { iofogUuid: 'edge-downstream' } } + ]) + $sandbox.stub(NatsInstanceManager, 'findByFog').resolves({ id: 20, iofogUuid: fogUuid }) + $sandbox.stub(NatsConnectionManager, 'findAllWithNats').resolves([ + { source: { iofogUuid: 'leaf-downstream' } } + ]) + + const result = await FogPlatformService.getDownstreamFogUuidsForUpstream(fogUuid, transaction) + + expect(result).to.have.members(['edge-downstream', 'leaf-downstream']) + }) + }) + + describe('.endpointsChanged()', () => { + it('detects host and port drift', () => { + expect(FogPlatformService.endpointsChanged( + { host: '10.0.0.1', routerHost: '10.0.0.1', messagingPort: '5671' }, + { host: '10.0.0.2', routerHost: '10.0.0.2', messagingPort: '5671' } + )).to.equal(true) + expect(FogPlatformService.endpointsChanged( + { host: '10.0.0.1', routerHost: '10.0.0.1', messagingPort: '5671' }, + { host: '10.0.0.2', routerHost: '10.0.0.1', messagingPort: '5671' } + )).to.equal(true) + expect(FogPlatformService.endpointsChanged( + { host: '10.0.0.1', routerHost: '10.0.0.1', messagingPort: '5671' }, + { host: '10.0.0.1', routerHost: '10.0.0.1', messagingPort: '5671' } + )).to.equal(false) + }) + }) + + describe('.resolveNatsConfigFromSpec()', () => { + it('preserves existing NATS upstream connections when spec omits upstreamNatsServers', async () => { + $sandbox.stub(NatsInstanceManager, 'findOne').resolves({ id: 1, isHub: true, iofogUuid: 'hub' }) + $sandbox.stub(NatsInstanceManager, 'findByFog').resolves({ id: 5, iofogUuid: fogUuid, isLeaf: true }) + $sandbox.stub(NatsConnectionManager, 'findAllWithNats').resolves([ + { dest: { id: 1, iofogUuid: 'hub', isHub: true } } + ]) + + const result = await FogPlatformService.resolveNatsConfigFromSpec( + fogUuid, + { natsMode: 'leaf' }, + transaction + ) + + expect(result.upstreamNatsServers).to.eql(['default-nats-hub']) + }) }) describe('.markReconcileFailed()', () => { diff --git a/test/src/services/iofog-service.test.js b/test/src/services/iofog-service.test.js index 9f8199d7..ff4660a4 100644 --- a/test/src/services/iofog-service.test.js +++ b/test/src/services/iofog-service.test.js @@ -27,7 +27,7 @@ const Errors = require('../../../src/helpers/errors') const config = require('../../../src/config') const FogPlatformSpecManager = require('../../../src/data/managers/fog-platform-spec-manager') const FogPlatformStatusManager = require('../../../src/data/managers/fog-platform-status-manager') -const FogPlatformReconcileTaskManager = require('../../../src/data/managers/fog-platform-reconcile-task-manager') +const ReconcileOutboxManager = require('../../../src/data/managers/reconcile-outbox-manager') const isCLI = false const transaction = {} @@ -89,7 +89,7 @@ function stubCreateFogDeps (sandbox, { uuid = 'testUuid', existingFogs = [{ uuid sandbox.stub(ioFogService, '_handleRouterCertificates').resolves() sandbox.stub(FogPlatformSpecManager, 'upsertSpec').resolves({ fogUuid: uuid, generation: 1 }) sandbox.stub(FogPlatformStatusManager, 'ensurePending').resolves() - sandbox.stub(FogPlatformReconcileTaskManager, 'enqueueFogPlatformReconcileTask').resolves() + sandbox.stub(ReconcileOutboxManager, 'enqueueFogPlatform').resolves() } function stubUpdateFogDeps (sandbox, oldFog) { @@ -113,7 +113,9 @@ function stubUpdateFogDeps (sandbox, oldFog) { sandbox.stub(FogPlatformSpecManager, 'getParsedSpec').resolves(null) sandbox.stub(FogPlatformSpecManager, 'upsertSpec').resolves({ fogUuid: oldFog.uuid, generation: 2 }) sandbox.stub(FogPlatformStatusManager, 'ensurePending').resolves() - sandbox.stub(FogPlatformReconcileTaskManager, 'enqueueFogPlatformReconcileTask').resolves() + sandbox.stub(ReconcileOutboxManager, 'enqueueFogPlatform').resolves() + sandbox.stub(NatsInstanceManager, 'findByFog').resolves(null) + sandbox.stub(NatsConnectionManager, 'findAllWithNats').resolves([]) } describe('ioFog Service', () => { @@ -172,7 +174,7 @@ describe('ioFog Service', () => { natsMode: 'leaf' }) expect(FogPlatformStatusManager.ensurePending).to.have.been.calledWith(uuid, transaction) - expect(FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask).to.have.been.calledWith({ + expect(ReconcileOutboxManager.enqueueFogPlatform).to.have.been.calledWith({ fogUuid: uuid, reason: 'spec-changed', specGeneration: 1 @@ -295,7 +297,7 @@ describe('ioFog Service', () => { await $subject expect(FogPlatformSpecManager.upsertSpec).to.have.been.calledOnce expect(FogPlatformStatusManager.ensurePending).to.have.been.calledWith(uuid, transaction) - expect(FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask).to.have.been.calledWith({ + expect(ReconcileOutboxManager.enqueueFogPlatform).to.have.been.calledWith({ fogUuid: uuid, reason: 'spec-changed', specGeneration: 2 @@ -315,6 +317,44 @@ describe('ioFog Service', () => { .to.be.rejectedWith('Agent Resource Name is immutable') }) + context('when upstream endpoint changes', () => { + beforeEach(() => { + RouterManager.findOne.callsFake((query) => { + if (query && query.iofogUuid === uuid) { + return Promise.resolve({ id: 10, iofogUuid: uuid, messagingPort: 5671, host: '1.2.3.4' }) + } + return Promise.resolve({ id: 1, isDefault: true }) + }) + RouterConnectionManager.findAllWithRouters.callsFake((query) => { + if (query && query.destRouter === 10) { + return Promise.resolve([{ source: { iofogUuid: 'edge-downstream' } }]) + } + return Promise.resolve([]) + }) + NatsInstanceManager.findByFog.resolves({ id: 20, iofogUuid: uuid, serverPort: 4222 }) + NatsConnectionManager.findAllWithNats.resolves([]) + FogPlatformSpecManager.getParsedSpec.resolves({ + fogUuid: uuid, + generation: 1, + spec: { + routerMode: 'edge', + natsMode: 'leaf', + host: '1.2.3.4', + messagingPort: 5671 + } + }) + }) + + it('does not enqueue downstream platform reconcile on PATCH', async () => { + await $subject + + expect(ReconcileOutboxManager.enqueueFogPlatform).to.not.have.been.calledWith({ + fogUuid: 'edge-downstream', + reason: 'spec-changed' + }, transaction) + }) + }) + context('when fog is not found', () => { beforeEach(() => { ioFogManager.findOne.resolves(null) @@ -416,7 +456,7 @@ describe('ioFog Service', () => { $sandbox.stub(ioFogManager, 'delete').resolves() $sandbox.stub(NatsService, 'cleanupNatsForFog').resolves() $sandbox.stub(FogPlatformStatusManager, 'setPhase').resolves() - $sandbox.stub(FogPlatformReconcileTaskManager, 'enqueueFogPlatformReconcileTask').resolves() + $sandbox.stub(ReconcileOutboxManager, 'enqueueFogPlatform').resolves() }) it('marks fog deleting and enqueues async teardown', async () => { @@ -424,7 +464,7 @@ describe('ioFog Service', () => { expect(Validator.validate).to.have.been.calledWith(fogData, Validator.schemas.iofogDelete) expect(result).to.eql({ uuid }) expect(FogPlatformStatusManager.setPhase).to.have.been.calledWith(uuid, 'Deleting', {}, transaction) - expect(FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask).to.have.been.calledWith({ + expect(ReconcileOutboxManager.enqueueFogPlatform).to.have.been.calledWith({ fogUuid: uuid, reason: 'delete' }, transaction) @@ -459,7 +499,7 @@ describe('ioFog Service', () => { generation: 4, spec: { routerMode: 'edge', natsMode: 'leaf' } }) - $sandbox.stub(FogPlatformReconcileTaskManager, 'enqueueFogPlatformReconcileTask').resolves() + $sandbox.stub(ReconcileOutboxManager, 'enqueueFogPlatform').resolves() }) it('resets failed platform status and enqueues manual retry', async () => { @@ -471,7 +511,7 @@ describe('ioFog Service', () => { { lastError: null }, transaction ) - expect(FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask).to.have.been.calledWith({ + expect(ReconcileOutboxManager.enqueueFogPlatform).to.have.been.calledWith({ fogUuid: uuid, reason: 'manual-retry', specGeneration: 4 diff --git a/test/src/services/microservice-port.test.js b/test/src/services/microservice-port.test.js index 4527861c..d706ecaa 100644 --- a/test/src/services/microservice-port.test.js +++ b/test/src/services/microservice-port.test.js @@ -22,7 +22,7 @@ describe('Microservice Port Service', () => { const iofogUuid = 'fog-uuid' const agent = { uuid: iofogUuid, - getMicroservice: () => Promise.resolve([]) + getMicroservice: sinon.stub().resolves([]) } def('microserviceData', () => ({ @@ -42,4 +42,47 @@ describe('Microservice Port Service', () => { ) }) }) + + describe('.validatePortMappings() duplicate check', () => { + const transaction = { id: 'tx-1' } + const iofogUuid = 'fog-uuid' + const occupiedPort = 8080 + let agent + let microserviceOnAgent + + def('microserviceData', () => ({ + iofogUuid, + ports: [{ internal: 80, external: occupiedPort }] + })) + def('subject', () => MicroservicePortService.validatePortMappings($microserviceData, transaction)) + + beforeEach(() => { + microserviceOnAgent = { + uuid: 'other-ms-uuid', + getPorts: sinon.stub().resolves([{ portExternal: occupiedPort }]) + } + agent = { + uuid: iofogUuid, + getMicroservice: sinon.stub().resolves([microserviceOnAgent]) + } + $sandbox.stub(ioFogManager, 'findOne').resolves(agent) + }) + + it('passes the caller transaction to association reads', async () => { + try { + await $subject + } catch (error) { + // expected when port is taken + } + expect(agent.getMicroservice).to.have.been.calledWith({ transaction }) + expect(microserviceOnAgent.getPorts).to.have.been.calledWith({ transaction }) + }) + + it('rejects when external port is already allocated on the agent', () => { + return expect($subject).to.be.rejectedWith( + Errors.ValidationError, + /Port '8080' is not available/ + ) + }) + }) }) diff --git a/test/src/services/microservices-service.test.js b/test/src/services/microservices-service.test.js index ae547fe1..920b1d0b 100644 --- a/test/src/services/microservices-service.test.js +++ b/test/src/services/microservices-service.test.js @@ -22,6 +22,7 @@ const MicroserviceCdiDevManager = require('../../../src/data/managers/microservi const MicroserviceCapAddManager = require('../../../src/data/managers/microservice-cap-add-manager') const MicroserviceCapDropManager = require('../../../src/data/managers/microservice-cap-drop-manager') const MicroserviceHealthCheckManager = require('../../../src/data/managers/microservice-healthcheck-manager') +const VolumeMountService = require('../../../src/services/volume-mount-service') const RbacRoleManager = require('../../../src/data/managers/rbac-role-manager') const RbacServiceAccountManager = require('../../../src/data/managers/rbac-service-account-manager') const NatsAuthService = require('../../../src/services/nats-auth-service') @@ -343,6 +344,45 @@ describe('Microservices Service', () => { it('rejects updates', () => expect($subject).to.be.rejectedWith(Errors.ValidationError)) }) + context('when disabling natsAccess via natsConfig', () => { + const natsEnabled = buildMicroserviceRecord({ + uuid: msvcUuid, + name: 'immutable-name', + catalogItem: null, + natsAccess: true, + natsCredsSecretName: 'nats-creds-msvc' + }) + + def('updateData', () => ({ natsConfig: { natsAccess: false } })) + + beforeEach(() => { + MicroserviceManager.findOne.resolves(natsEnabled) + MicroserviceManager.findOneWithCategory.resolves({ + ...natsEnabled, + catalogItem: null, + getPorts: () => Promise.resolve([]), + getImages: () => Promise.resolve([]) + }) + MicroserviceManager.updateAndFind.resolves({ ...natsEnabled, natsAccess: false }) + $sandbox.stub(NatsAuthService, 'revokeMicroserviceUser').resolves() + $sandbox.stub(NatsAuthService, 'reissueUserForMicroservice').resolves() + $sandbox.stub(NatsAuthService, 'ensureUserForMicroservice').resolves() + $sandbox.stub(MicroserviceEnvManager, 'delete').resolves() + $sandbox.stub(VolumeMountService, 'unlinkVolumeMountEndpoint').resolves() + $sandbox.stub(MicroserviceManager, 'update').resolves() + }) + + it('revokes credentials and does not reissue when disabling', async () => { + await $subject + + expect(NatsAuthService.revokeMicroserviceUser).to.have.been.calledOnceWith(msvcUuid, transaction) + expect(NatsAuthService.reissueUserForMicroservice).to.not.have.been.called + expect(NatsAuthService.ensureUserForMicroservice).to.not.have.been.called + expect(MicroserviceEnvManager.delete).to.have.been.calledOnce + expect(VolumeMappingManager.delete).to.have.been.calledOnce + }) + }) + context('when volumeMappings include a system serviceAccount volume', () => { const userVolume = { hostDestination: 'nats-creds-data', @@ -521,7 +561,7 @@ describe('Microservices Service', () => { const result = await $subject expect(Validator.validate).to.have.been.calledWith(portMappingData, Validator.schemas.portsCreate) expect(MicroserviceManager.findMicroserviceOnGet).to.have.been.calledWith({ uuid: msvcUuid }, transaction) - expect(MicroservicePortService.validatePortMapping).to.have.been.calledWith(agent, portMappingData, {}, transaction) + expect(MicroservicePortService.validatePortMapping).to.have.been.calledWith(agent, portMappingData, transaction) expect(MicroservicePortService.createPortMapping).to.have.been.calledWith(microservice, portMappingData, transaction) expect(result).to.equal(createdMapping) }) diff --git a/test/src/services/nats-auth-orchestration.test.js b/test/src/services/nats-auth-orchestration.test.js new file mode 100644 index 00000000..ae0970f1 --- /dev/null +++ b/test/src/services/nats-auth-orchestration.test.js @@ -0,0 +1,33 @@ +const { expect } = require('chai') +const sinon = require('sinon') + +const ReconcileOutboxManager = require('../../../src/data/managers/reconcile-outbox-manager') +const NatsAuthService = require('../../../src/services/nats-auth-service') + +describe('NATS auth orchestration', () => { + def('sandbox', () => sinon.createSandbox()) + + afterEach(() => { + $sandbox.restore() + }) + + describe('scheduleReissueForAccountRule', () => { + it('does not enqueue reconcile outbox synchronously when scheduling reissue', () => { + const enqueueSpy = $sandbox.spy(ReconcileOutboxManager, 'enqueueNats') + + NatsAuthService.scheduleReissueForAccountRule(7) + + expect(enqueueSpy).to.not.have.been.called + }) + }) + + describe('scheduleReissueForUserRule', () => { + it('does not enqueue reconcile outbox synchronously when scheduling reissue', () => { + const enqueueSpy = $sandbox.spy(ReconcileOutboxManager, 'enqueueNats') + + NatsAuthService.scheduleReissueForUserRule(9) + + expect(enqueueSpy).to.not.have.been.called + }) + }) +}) diff --git a/test/src/services/nats-auth-service.test.js b/test/src/services/nats-auth-service.test.js index a665254f..576c165f 100644 --- a/test/src/services/nats-auth-service.test.js +++ b/test/src/services/nats-auth-service.test.js @@ -10,6 +10,7 @@ const ApplicationManager = require('../../../src/data/managers/application-manag const NatsOperatorManager = require('../../../src/data/managers/nats-operator-manager') const SecretService = require('../../../src/services/secret-service') const NatsService = require('../../../src/services/nats-service') +const ReconcileOutboxManager = require('../../../src/data/managers/reconcile-outbox-manager') const NatsAuthService = require('../../../src/services/nats-auth-service') const NatsSystemRules = require('../../../src/config/nats-system-rules') const { createOperator, createAccount } = require('@nats-io/nkeys') @@ -73,7 +74,7 @@ describe('NATS Auth Service', () => { return Promise.resolve(null) }) $sandbox.stub(NatsUserRuleManager, 'findOne').resolves(defaultUserRule) - $sandbox.stub(NatsService, 'enqueueReconcileTask').callsFake(() => Promise.resolve()) + $sandbox.stub(ReconcileOutboxManager, 'enqueueNats').callsFake(() => Promise.resolve()) }) context('when existing user has same account and same rule (ensure-only)', () => { @@ -183,7 +184,7 @@ describe('NATS Auth Service', () => { createdUser = null $sandbox.stub(NatsAccountRuleManager, 'updateOrCreate').resolves() $sandbox.stub(NatsUserRuleManager, 'updateOrCreate').resolves() - $sandbox.stub(NatsService, 'enqueueReconcileTask').resolves() + $sandbox.stub(ReconcileOutboxManager, 'enqueueNats').resolves() $sandbox.stub(NatsOperatorManager, 'findOne').resolves(operator) $sandbox.stub(SecretService, 'getSecretEndpoint').callsFake((secretName) => { if (secretName === operator.seedSecretName) { diff --git a/test/src/services/nats-relay-transport.test.js b/test/src/services/nats-relay-transport.test.js index 2c872ad1..638c5c3d 100644 --- a/test/src/services/nats-relay-transport.test.js +++ b/test/src/services/nats-relay-transport.test.js @@ -2,6 +2,7 @@ const { expect } = require('chai') const sinon = require('sinon') const msgpack = require('@msgpack/msgpack') const WebSocket = require('ws') +const { headers: natsHeaders } = require('@nats-io/transport-node') const { NatsRelayTransportImpl, @@ -214,4 +215,56 @@ describe('NatsRelayTransportImpl', () => { await expect(slowTransport.publishToAgent(execId, Buffer.from('x'))) .to.be.rejectedWith(/not flushed within 20ms/) }) + + it('CLOSE relay closes open socket without invoking cleanupCallback', async () => { + const userWs = createMockWebSocket() + const cleanup = sinon.stub().resolves() + + await transport.enableForSession({ execId, user: userWs }, cleanup) + + const hdrs = natsHeaders() + hdrs.set('messageType', '4') + nc.publish(execUserSubject(execId), Buffer.from(''), { headers: hdrs }) + + await new Promise((resolve) => setImmediate(resolve)) + await new Promise((resolve) => setImmediate(resolve)) + + expect(userWs.close).to.have.been.calledOnce + expect(cleanup).to.not.have.been.called + }) + + it('CLOSE ack relay does not invoke cleanupCallback', async () => { + const userWs = createMockWebSocket() + const cleanup = sinon.stub().resolves() + + await transport.enableForSession({ execId, user: userWs }, cleanup) + + const hdrs = natsHeaders() + hdrs.set('messageType', '4') + hdrs.set('closeAck', 'true') + nc.publish(execUserSubject(execId), Buffer.from(''), { headers: hdrs }) + + await new Promise((resolve) => setImmediate(resolve)) + await new Promise((resolve) => setImmediate(resolve)) + + expect(cleanup).to.not.have.been.called + expect(userWs.close).to.not.have.been.called + }) + + it('CLOSE relay invokes cleanupCallback when socket is already closed', async () => { + const userWs = createMockWebSocket() + userWs.readyState = WebSocket.CLOSED + const cleanup = sinon.stub().resolves() + + await transport.enableForSession({ execId, user: userWs }, cleanup) + + const hdrs = natsHeaders() + hdrs.set('messageType', '4') + nc.publish(execUserSubject(execId), Buffer.from(''), { headers: hdrs }) + + await new Promise((resolve) => setImmediate(resolve)) + await new Promise((resolve) => setImmediate(resolve)) + + expect(cleanup).to.have.been.calledOnceWith(execId) + }) }) diff --git a/test/src/services/nats-resolver-bundle.test.js b/test/src/services/nats-resolver-bundle.test.js new file mode 100644 index 00000000..0c7a1a53 --- /dev/null +++ b/test/src/services/nats-resolver-bundle.test.js @@ -0,0 +1,100 @@ +const { expect } = require('chai') +const sinon = require('sinon') +const { Op } = require('sequelize') + +const NatsAccountManager = require('../../../src/data/managers/nats-account-manager') +const NatsInstanceManager = require('../../../src/data/managers/nats-instance-manager') +const NatsAccountRuleManager = require('../../../src/data/managers/nats-account-rule-manager') +const NatsConnectionManager = require('../../../src/data/managers/nats-connection-manager') +const ConfigMapManager = require('../../../src/data/managers/config-map-manager') +const FogManager = require('../../../src/data/managers/iofog-manager') +const ApplicationManager = require('../../../src/data/managers/application-manager') +const MicroserviceManager = require('../../../src/data/managers/microservice-manager') +const ConfigMapService = require('../../../src/services/config-map-service') +const NatsAuthService = require('../../../src/services/nats-auth-service') +const config = require('../../../src/config') +const transactionRunner = require('../../../src/helpers/transaction-runner') + +describe('NATS resolver bundle freshness', () => { + def('sandbox', () => sinon.createSandbox()) + const transaction = {} + + afterEach(() => { + $sandbox.restore() + }) + + it('uses refreshed account JWT in leaf bundle after in-reconcile reissue', async () => { + const fog = { uuid: 'fog-leaf-1', name: 'leaf-fog' } + const app = { id: 10, natsAccess: true, isSystem: false, natsRuleId: null } + const microservice = { + uuid: 'ms-1', + applicationId: 10, + iofogUuid: fog.uuid, + natsAccess: true + } + const staleAccount = { + id: 100, + applicationId: 10, + publicKey: 'STALEPK', + jwt: 'stale.jwt.token' + } + const freshAccount = { + id: 100, + applicationId: 10, + publicKey: 'STALEPK', + jwt: 'fresh.jwt.token' + } + + $sandbox.stub(config, 'getBoolean').returns(false) + $sandbox.stub(config, 'get').callsFake((key, defaultValue) => defaultValue) + $sandbox.stub(transactionRunner, 'isSqliteProvider').returns(false) + $sandbox.stub(transactionRunner, 'runInTransaction').callsFake(async (fn) => fn(transaction)) + $sandbox.stub(FogManager, 'findAll').resolves([fog]) + $sandbox.stub(ApplicationManager, 'findAll').resolves([app]) + $sandbox.stub(NatsInstanceManager, 'findAll').resolves([{ iofogUuid: fog.uuid, isLeaf: true, isHub: false }]) + $sandbox.stub(NatsConnectionManager, 'findAllWithNats').resolves([]) + $sandbox.stub(MicroserviceManager, 'findAll').callsFake((query) => { + if (query.name) { + return Promise.resolve([{ uuid: 'nats-ms', iofogUuid: fog.uuid, name: 'nats' }]) + } + return Promise.resolve([microservice]) + }) + $sandbox.stub(NatsAccountRuleManager, 'findOne').resolves({ id: 1, name: 'default-account' }) + $sandbox.stub(NatsAccountManager, 'findAll').callsFake((query) => { + if (query.isSystem) { + return Promise.resolve([]) + } + if (query.applicationId && query.applicationId[Op.in]) { + return Promise.resolve([freshAccount]) + } + return Promise.resolve([staleAccount]) + }) + $sandbox.stub(NatsAccountManager, 'findOne').resolves(null) + $sandbox.stub(NatsAuthService, 'ensureSystemAccount').resolves() + $sandbox.stub(NatsAuthService, 'ensureLeafSystemAccount').resolves({ + publicKey: 'LEAFSYS', + jwt: 'leaf-sys.jwt' + }) + $sandbox.stub(NatsAuthService, 'reissueUserForMicroservice').callsFake(async () => { + staleAccount.jwt = freshAccount.jwt + }) + $sandbox.stub(ConfigMapManager, 'getConfigMap').resolves(null) + + const capturedBundles = [] + $sandbox.stub(ConfigMapService, 'createConfigMapEndpoint').callsFake(async (payload) => { + capturedBundles.push(payload.data) + return payload + }) + + const NatsService = require('../../../src/services/nats-service') + await NatsService.reconcileResolverArtifacts({ + reason: 'account-created', + applicationId: 10, + fogUuids: [fog.uuid] + }) + + const leafBundle = capturedBundles.find((bundle) => bundle && bundle['STALEPK.jwt']) + expect(leafBundle).to.not.equal(undefined) + expect(leafBundle['STALEPK.jwt']).to.equal('fresh.jwt.token') + }) +}) diff --git a/test/src/services/nats-service.test.js b/test/src/services/nats-service.test.js index 36b0a93b..4dcb11ae 100644 --- a/test/src/services/nats-service.test.js +++ b/test/src/services/nats-service.test.js @@ -5,6 +5,7 @@ const NatsService = require('../../../src/services/nats-service') const NatsInstanceManager = require('../../../src/data/managers/nats-instance-manager') const NatsConnectionManager = require('../../../src/data/managers/nats-connection-manager') const NatsAccountManager = require('../../../src/data/managers/nats-account-manager') +const NatsAccountRuleManager = require('../../../src/data/managers/nats-account-rule-manager') const NatsUserManager = require('../../../src/data/managers/nats-user-manager') const MicroserviceManager = require('../../../src/data/managers/microservice-manager') const VolumeMappingManager = require('../../../src/data/managers/volume-mapping-manager') @@ -26,7 +27,7 @@ describe('NATS Service', () => { const natsInstance = { id: 77, isLeaf: true, isHub: false } const microservices = [{ uuid: 'ms-1' }] - def('subject', () => NatsService.cleanupNatsForFog(fog, transaction)) + def('subject', () => NatsService.cleanupNatsForFogDb(fog, transaction)) beforeEach(() => { $sandbox.stub(NatsInstanceManager, 'findByFog').returns(Promise.resolve(natsInstance)) @@ -221,4 +222,180 @@ describe('NATS Service', () => { expect(payload.nats.operator_service_urls).to.eql(['https://hub:4222']) }) }) + + describe('K8s I/O outside transactions (R-04–R-06)', () => { + const k8sClient = require('../../../src/utils/k8s-client') + const config = require('../../../src/config') + + function loadNatsServiceWithTxStub (runInTransactionImpl) { + const txRunnerPath = require.resolve('../../../src/helpers/transaction-runner') + const natsPath = require.resolve('../../../src/services/nats-service') + delete require.cache[natsPath] + delete require.cache[txRunnerPath] + const transactionRunner = require('../../../src/helpers/transaction-runner') + $sandbox.stub(transactionRunner, 'runInTransaction').callsFake(runInTransactionImpl) + return require('../../../src/services/nats-service') + } + + function stubKubernetesControlPlane () { + $sandbox.stub(config, 'get').callsFake((key, defaultValue) => { + if (key === 'app.ControlPlane') return 'kubernetes' + if (key === 'nats.enabled') return false + return defaultValue + }) + } + + function stubCleanupDb (NatsServiceFresh, natsInstance) { + $sandbox.stub(NatsInstanceManager, 'findByFog').resolves(natsInstance) + $sandbox.stub(NatsInstanceManager, 'findAll').resolves([]) + $sandbox.stub(NatsAccountManager, 'findOne').resolves({ id: 1, isSystem: true }) + $sandbox.stub(NatsUserManager, 'findOne').resolves({ credsSecretName: 'nats-creds-sys-admin' }) + $sandbox.stub(NatsConnectionManager, 'delete').resolves() + $sandbox.stub(NatsInstanceManager, 'delete').resolves() + $sandbox.stub(NatsAuthService, 'deleteServerSysUserForFog').resolves() + $sandbox.stub(MicroserviceManager, 'findAll').resolves([]) + $sandbox.stub(VolumeMappingManager, 'delete').resolves() + $sandbox.stub(VolumeMountService, 'unlinkVolumeMountEndpoint').resolves() + $sandbox.stub(VolumeMountService, 'findVolumeMountedFogNodes').resolves([]) + $sandbox.stub(VolumeMountService, 'deleteVolumeMountEndpoint').resolves() + $sandbox.stub(ConfigMapService, 'deleteConfigMapEndpoint').resolves() + $sandbox.stub(SecretService, 'deleteSecretEndpoint').resolves() + return NatsServiceFresh + } + + it('cleanupNatsForFog applies K8s patch and rollout after runInTransaction', async () => { + stubKubernetesControlPlane() + const fog = { uuid: 'fog-1', name: 'local-agent' } + const natsInstance = { id: 77, isLeaf: false, isHub: false } + const callOrder = [] + const txLabels = [] + + const NatsServiceFresh = loadNatsServiceWithTxStub(async (fn, runOptions = {}) => { + if (runOptions.label) { + txLabels.push(runOptions.label) + } + callOrder.push('tx-start') + const result = await fn({}) + callOrder.push('tx-end') + return result + }) + stubCleanupDb(NatsServiceFresh, natsInstance) + $sandbox.stub(k8sClient, 'getConfigMap').callsFake(async () => { + callOrder.push('k8s-get') + return { data: { 'server.conf': 'routes: []' } } + }) + $sandbox.stub(k8sClient, 'patchConfigMap').callsFake(async () => { + callOrder.push('k8s-patch') + }) + $sandbox.stub(k8sClient, 'rolloutStatefulSet').callsFake(async () => { + callOrder.push('k8s-rollout') + }) + + await NatsServiceFresh.cleanupNatsForFog(fog) + + expect(txLabels).to.deep.equal(['nats.cleanupForFog']) + expect(callOrder).to.deep.equal(['tx-start', 'tx-end', 'k8s-get', 'k8s-patch', 'k8s-rollout']) + }) + + it('cleanupNatsForFog reuses parent transaction when provided', async () => { + const fog = { uuid: 'fog-1', name: 'local-agent' } + const parentTx = { + commit: $sandbox.stub(), + rollback: $sandbox.stub(), + afterCommit: $sandbox.stub() + } + const txLabels = [] + const natsInstance = { id: 77, isLeaf: false, isHub: false } + + const NatsServiceFresh = loadNatsServiceWithTxStub(async (fn, runOptions = {}) => { + if (runOptions.label) { + txLabels.push(runOptions.label) + } + return fn({}) + }) + stubCleanupDb(NatsServiceFresh, natsInstance) + + await NatsServiceFresh.cleanupNatsForFog(fog, parentTx) + + expect(txLabels).to.deep.equal([]) + expect(parentTx.afterCommit).to.have.been.calledOnce + }) + + it('ensureNatsForFog uses phased cert-prep, auth-prep, and topology transaction labels', async () => { + const txLabels = [] + + const NatsServiceFresh = loadNatsServiceWithTxStub(async (fn, runOptions = {}) => { + if (runOptions.label) { + txLabels.push(runOptions.label) + } + if (runOptions.label === 'nats.ensure.certPrep') { + return { + serverCertName: 'nats-server-local-agent', + mqttCertName: 'nats-mqtt-server-local-agent', + jetstreamKey: { secretName: 'jsk', jsk: 'key' } + } + } + if (runOptions.label === 'nats.ensure.authPrep') { + return { + mode: 'leaf', + isHub: false, + isLeaf: true, + serverPort: 4222, + leafPort: 7422, + clusterPort: 6222, + mqttPort: 1883, + httpPort: 8222, + configMapName: 'nats-server-conf-local-agent', + configKey: 'server.conf', + template: 'leaf', + jwtBundleConfigMapName: 'nats-jwt-bundle-local-agent', + sysCredsSecretName: null + } + } + if (runOptions.label === 'nats.ensure.topology') { + return { microservice: { uuid: 'ms-1' }, k8sHubPatch: null } + } + return fn({}) + }) + + await NatsServiceFresh.ensureNatsForFog( + { uuid: 'fog-1', name: 'local-agent' }, + { mode: 'leaf' } + ) + + expect(txLabels).to.deep.equal(['nats.ensure.certPrep', 'nats.ensure.authPrep', 'nats.ensure.topology']) + }) + + it('reconcileResolverArtifacts applies JWT bundle K8s patch after runInTransaction', async () => { + stubKubernetesControlPlane() + const callOrder = [] + + const NatsServiceFresh = loadNatsServiceWithTxStub(async (fn) => { + callOrder.push('tx-start') + const result = await fn({}) + callOrder.push('tx-end') + return result + }) + + $sandbox.stub(require('../../../src/data/managers/iofog-manager'), 'findAll').resolves([]) + $sandbox.stub(require('../../../src/data/managers/application-manager'), 'findAll').resolves([]) + $sandbox.stub(NatsInstanceManager, 'findAll').resolves([]) + $sandbox.stub(NatsAccountRuleManager, 'findOne').resolves({ id: 1, name: 'default-account' }) + $sandbox.stub(NatsAccountManager, 'findOne').resolves({ id: 1, isSystem: true }) + $sandbox.stub(require('../../../src/services/nats-auth-service'), 'ensureSystemAccount').resolves() + $sandbox.stub(ConfigMapManager, 'getConfigMap').resolves(null) + $sandbox.stub(ConfigMapService, 'createConfigMapEndpoint').resolves({ name: 'iofog-nats-jwt-bundle' }) + $sandbox.stub(k8sClient, 'getConfigMap').callsFake(async () => { + callOrder.push('k8s-get') + return null + }) + $sandbox.stub(k8sClient, 'patchConfigMap').callsFake(async () => { + callOrder.push('k8s-patch') + }) + + await NatsServiceFresh.reconcileResolverArtifacts({ fogUuids: [] }) + + expect(callOrder).to.deep.equal(['tx-start', 'tx-end', 'k8s-get', 'k8s-patch']) + }) + }) }) diff --git a/test/src/services/network-topology-service.test.js b/test/src/services/network-topology-service.test.js new file mode 100644 index 00000000..e23d3b18 --- /dev/null +++ b/test/src/services/network-topology-service.test.js @@ -0,0 +1,293 @@ +const { expect } = require('chai') +const sinon = require('sinon') + +const constants = require('../../../src/helpers/constants') +const Errors = require('../../../src/helpers/errors') +const RouterManager = require('../../../src/data/managers/router-manager') +const RouterConnectionManager = require('../../../src/data/managers/router-connection-manager') +const NatsInstanceManager = require('../../../src/data/managers/nats-instance-manager') +const NatsConnectionManager = require('../../../src/data/managers/nats-connection-manager') +const FogManager = require('../../../src/data/managers/iofog-manager') +const NetworkTopologyService = require('../../../src/services/network-topology-service') + +describe('Network Topology Service', () => { + const transaction = {} + const originalControlPlane = process.env.CONTROL_PLANE + + def('sandbox', () => sinon.createSandbox()) + + afterEach(() => { + $sandbox.restore() + if (originalControlPlane === undefined) { + delete process.env.CONTROL_PLANE + } else { + process.env.CONTROL_PLANE = originalControlPlane + } + }) + + function stubFogFindAll (rows = []) { + $sandbox.stub(FogManager, 'findAll').resolves(rows) + } + + function makeRouter (overrides = {}) { + return { + id: 1, + isEdge: true, + isDefault: false, + iofogUuid: 'edge-uuid', + host: '10.0.0.1', + messagingPort: 5671, + edgeRouterPort: null, + interRouterPort: null, + ...overrides + } + } + + function makeDefaultRouter () { + return makeRouter({ + id: 99, + isEdge: false, + isDefault: true, + iofogUuid: null, + host: 'router.local', + edgeRouterPort: 45671, + interRouterPort: 55671 + }) + } + + function makeNats (overrides = {}) { + return { + id: 2, + isLeaf: true, + isHub: false, + iofogUuid: 'edge-uuid', + host: '10.0.0.1', + serverPort: 4222, + leafPort: 7422, + clusterPort: 6222, + mqttPort: 8883, + httpPort: 8222, + jsStorageSize: null, + jsMemoryStoreSize: null, + ...overrides + } + } + + describe('getSummary()', () => { + beforeEach(() => { + process.env.CONTROL_PLANE = 'remote' + $sandbox.stub(RouterManager, 'getEntity').returns({ + count: $sandbox.stub().resolves(3) + }) + $sandbox.stub(RouterConnectionManager, 'getEntity').returns({ + count: $sandbox.stub().resolves(2) + }) + $sandbox.stub(NatsInstanceManager, 'getEntity').returns({ + count: $sandbox.stub().resolves(3) + }) + $sandbox.stub(NatsConnectionManager, 'getEntity').returns({ + count: $sandbox.stub().resolves(2) + }) + }) + + it('returns control plane and counts', async () => { + const result = await NetworkTopologyService.getSummary({}, transaction) + expect(result.controlPlane).to.equal('remote') + expect(result.router.totalNodes).to.equal(3) + expect(result.router.totalConnections).to.equal(2) + expect(result.nats.totalNodes).to.equal(3) + expect(result.nats.totalConnections).to.equal(2) + }) + }) + + describe('listRouterNodes()', () => { + beforeEach(() => { + process.env.CONTROL_PLANE = 'remote' + $sandbox.stub(RouterManager, 'findOne').resolves(makeDefaultRouter()) + stubFogFindAll([{ uuid: 'edge-uuid', name: 'edge-1', host: '10.0.0.1' }]) + $sandbox.stub(RouterManager, 'getEntity').returns({ + findAndCountAll: $sandbox.stub().resolves({ + count: 1, + rows: [makeRouter()] + }) + }) + }) + + it('returns paginated router nodes with fog metadata', async () => { + const result = await NetworkTopologyService.listRouterNodes({ query: { limit: '10', offset: '0' } }, transaction) + expect(result.total).to.equal(1) + expect(result.nodes).to.have.length(1) + expect(result.nodes[0]).to.include({ + id: 'edge-uuid', + iofogUuid: 'edge-uuid', + fogName: 'edge-1', + host: '10.0.0.1', + deploymentTarget: 'edgelet', + displayName: 'edge-1', + role: 'edge', + mode: 'edge' + }) + }) + }) + + describe('getRouterNode()', () => { + beforeEach(() => { + process.env.CONTROL_PLANE = 'kubernetes' + $sandbox.stub(RouterManager, 'findOne') + .onFirstCall().resolves(makeDefaultRouter()) + stubFogFindAll([]) + }) + + it('returns default router detail with kubernetes deployment target', async () => { + const result = await NetworkTopologyService.getRouterNode({ + params: { id: constants.DEFAULT_ROUTER_NAME } + }, transaction) + + expect(result.id).to.equal(constants.DEFAULT_ROUTER_NAME) + expect(result.deploymentTarget).to.equal('kubernetes') + expect(result.displayName).to.equal('Kubernetes Router') + expect(result.isDefault).to.equal(true) + }) + + it('throws when router node is missing', async () => { + RouterManager.findOne.reset() + RouterManager.findOne.resolves(null) + await expect(NetworkTopologyService.getRouterNode({ + params: { id: constants.DEFAULT_ROUTER_NAME } + }, transaction)).to.be.rejectedWith(Errors.NotFoundError) + }) + }) + + describe('getRouterNodeConnections()', () => { + beforeEach(() => { + const defaultRouter = makeDefaultRouter() + const edgeRouter = makeRouter() + $sandbox.stub(RouterManager, 'findOne') + .onFirstCall().resolves(defaultRouter) + .onSecondCall().resolves(edgeRouter) + $sandbox.stub(RouterConnectionManager, 'findAllWithRouters') + .onFirstCall().resolves([{ + id: 7, + source: edgeRouter, + dest: defaultRouter + }]) + .onSecondCall().resolves([]) + }) + + it('returns upstream and downstream connections', async () => { + const result = await NetworkTopologyService.getRouterNodeConnections({ + params: { id: 'edge-uuid' } + }, transaction) + + expect(result.upstream).to.eql([{ + id: 7, + source: 'edge-uuid', + dest: constants.DEFAULT_ROUTER_NAME + }]) + expect(result.downstream).to.eql([]) + }) + }) + + describe('listRouterConnections()', () => { + beforeEach(() => { + const defaultRouter = makeDefaultRouter() + const edgeRouter = makeRouter() + $sandbox.stub(RouterManager, 'findOne').resolves(defaultRouter) + $sandbox.stub(RouterManager, 'getEntity').returns({}) + $sandbox.stub(RouterConnectionManager, 'getEntity').returns({ + findAndCountAll: $sandbox.stub().resolves({ + count: 1, + rows: [{ + id: 7, + source: edgeRouter, + dest: defaultRouter + }] + }) + }) + }) + + it('returns paginated formatted connections', async () => { + const result = await NetworkTopologyService.listRouterConnections({ query: {} }, transaction) + expect(result.connections).to.eql([{ + id: 7, + source: 'edge-uuid', + dest: constants.DEFAULT_ROUTER_NAME + }]) + }) + }) + + describe('getRouterOverview()', () => { + beforeEach(() => { + process.env.CONTROL_PLANE = 'remote' + const defaultRouter = makeDefaultRouter() + const edgeRouter = makeRouter() + $sandbox.stub(RouterManager, 'findOne').resolves(defaultRouter) + $sandbox.stub(RouterManager, 'findAll').resolves([]) + $sandbox.stub(RouterConnectionManager, 'findAllWithRouters').resolves([{ + id: 7, + source: edgeRouter, + dest: defaultRouter + }]) + stubFogFindAll([]) + }) + + it('returns default node and spoke groups', async () => { + const result = await NetworkTopologyService.getRouterOverview({}, transaction) + expect(result.defaultNode.id).to.equal(constants.DEFAULT_ROUTER_NAME) + expect(result.spokeGroups).to.eql([{ + upstreamOf: constants.DEFAULT_ROUTER_NAME, + role: 'edge', + count: 1 + }]) + }) + }) + + describe('getRouterSubgraph()', () => { + beforeEach(() => { + const defaultRouter = makeDefaultRouter() + const edgeRouter = makeRouter() + $sandbox.stub(RouterManager, 'findOne') + .onFirstCall().resolves(defaultRouter) + .onSecondCall().resolves(edgeRouter) + $sandbox.stub(RouterConnectionManager, 'findAllWithRouters') + .onFirstCall().resolves([{ id: 7, source: edgeRouter, dest: defaultRouter }]) + .onSecondCall().resolves([]) + stubFogFindAll([{ uuid: 'edge-uuid', name: 'edge-1', host: '10.0.0.1' }]) + }) + + it('requires center query parameter', async () => { + await expect(NetworkTopologyService.getRouterSubgraph({ query: {} }, transaction)) + .to.be.rejectedWith(Errors.ValidationError) + }) + + it('returns nodes and connections around center', async () => { + const result = await NetworkTopologyService.getRouterSubgraph({ + query: { center: constants.DEFAULT_ROUTER_NAME, depth: '1' } + }, transaction) + + expect(result.nodes.map((node) => node.id)).to.include(constants.DEFAULT_ROUTER_NAME) + expect(result.connections).to.have.length(1) + }) + }) + + describe('listNatsNodes()', () => { + beforeEach(() => { + process.env.CONTROL_PLANE = 'remote' + const defaultHub = makeNats({ id: 50, isLeaf: false, isHub: true, iofogUuid: null, host: 'nats.local' }) + $sandbox.stub(NatsInstanceManager, 'findOne').resolves(defaultHub) + stubFogFindAll([]) + $sandbox.stub(NatsInstanceManager, 'getEntity').returns({ + findAndCountAll: $sandbox.stub().resolves({ + count: 1, + rows: [defaultHub] + }) + }) + }) + + it('returns default hub node id', async () => { + const result = await NetworkTopologyService.listNatsNodes({ query: {} }, transaction) + expect(result.nodes[0].id).to.equal(constants.DEFAULT_NATS_HUB_NAME) + expect(result.nodes[0].deploymentTarget).to.equal('remote') + }) + }) +}) diff --git a/test/src/services/rbac-service.test.js b/test/src/services/rbac-service.test.js index 1d12d9a5..8e6f015a 100644 --- a/test/src/services/rbac-service.test.js +++ b/test/src/services/rbac-service.test.js @@ -68,6 +68,7 @@ describe('Rbac Service', () => { describe('.updateRoleEndpoint()', () => { const roleName = 'custom-role' const roleId = 99 + const roleRef = { kind: 'Role', name: roleName } const roleData = { rules: [{ apiGroups: ['edgelet.iofog.org/v1'], @@ -87,11 +88,14 @@ describe('Rbac Service', () => { .onFirstCall().resolves({ id: roleId, name: roleName }) .onSecondCall().resolves({ id: roleId, name: roleName }) $sandbox.stub(RbacRoleManager, 'updateRole').resolves({ name: roleName }) - $sandbox.stub(RbacRoleBindingManager, 'findAll').resolves([]) + $sandbox.stub(RbacRoleBindingManager, 'findAll').resolves([ + { name: 'binding1', roleRef: roleRef } + ]) + $sandbox.stub(RbacRoleBindingManager, 'updateRoleBinding').resolves({}) $sandbox.stub(RbacServiceAccountManager, 'findAll').resolves([ - { name: 'sa1', microserviceUuid: 'msvc-1', applicationId: 1, roleRef: roleName }, - { name: 'sa2', microserviceUuid: 'msvc-2', applicationId: 1, roleRef: roleName }, - { name: 'sa3', microserviceUuid: 'msvc-3', applicationId: 2, roleRef: roleName } + { name: 'sa1', microserviceUuid: 'msvc-1', applicationId: 1, roleRef }, + { name: 'sa2', microserviceUuid: 'msvc-2', applicationId: 1, roleRef }, + { name: 'sa3', microserviceUuid: 'msvc-3', applicationId: 2, roleRef } ]) $sandbox.stub(ApplicationManager, 'findOne') .withArgs({ id: 1 }).resolves({ name: 'app1' }) @@ -118,6 +122,50 @@ describe('Rbac Service', () => { transaction ) }) + + it('refreshes roleRef.name on linked bindings and service accounts', async () => { + await $subject + expect(RbacRoleBindingManager.updateRoleBinding).to.have.been.calledOnceWith( + 'binding1', + { roleRef: { kind: 'Role', name: roleName } }, + transaction + ) + expect(RbacServiceAccountManager.updateServiceAccount).to.have.been.calledWith( + 'app1', + 'sa1', + { roleRef: { kind: 'Role', name: roleName } }, + transaction + ) + }) + + context('when the role is renamed', () => { + const renamedRole = 'custom-role-v2' + const renameRoleData = { name: renamedRole } + + def('subject', () => RbacService.updateRoleEndpoint(roleName, renameRoleData, transaction)) + + beforeEach(() => { + RbacRoleManager.findOne.restore() + $sandbox.stub(RbacRoleManager, 'findOne') + .onFirstCall().resolves({ id: roleId, name: roleName }) + .onSecondCall().resolves({ id: roleId, name: renamedRole }) + }) + + it('rewrites roleRef.name on linked bindings and service accounts', async () => { + await $subject + expect(RbacRoleBindingManager.updateRoleBinding).to.have.been.calledOnceWith( + 'binding1', + { roleRef: { kind: 'Role', name: renamedRole } }, + transaction + ) + expect(RbacServiceAccountManager.updateServiceAccount).to.have.been.calledWith( + 'app2', + 'sa3', + { roleRef: { kind: 'Role', name: renamedRole } }, + transaction + ) + }) + }) }) describe('.updateRoleBindingEndpoint()', () => { diff --git a/test/src/services/registry-service.test.js b/test/src/services/registry-service.test.js index 95bc9f28..c2e6e23a 100644 --- a/test/src/services/registry-service.test.js +++ b/test/src/services/registry-service.test.js @@ -9,6 +9,7 @@ const FogManager = require('../../../src/data/managers/iofog-manager') const ChangeTrackingService = require('../../../src/services/change-tracking-service') const MicroserviceManager = require('../../../src/data/managers/microservice-manager') const SecretHelper = require('../../../src/helpers/secret-helper') +const vaultManager = require('../../../src/vault/vault-manager') const ErrorMessages = require('../../../src/helpers/error-messages') const Errors = require('../../../src/helpers/errors') @@ -54,12 +55,13 @@ describe('Registry Service', () => { $sandbox.stub(Validator, 'validate').resolves(true) $sandbox.stub(AppHelper, 'deleteUndefinedFields').callsFake((value) => value) $sandbox.stub(RegistryManager, 'create').resolves(created) - $sandbox.stub(SecretHelper, 'encryptSecret').resolves('encrypted-password') + $sandbox.stub(SecretHelper, 'encryptSecretInternal').resolves('encrypted-password') + $sandbox.stub(SecretHelper, 'encryptSecret').resolves('vault-ref') $sandbox.stub(RegistryManager, 'update').resolves() stubChangeTrackingDeps($sandbox) }) - it('validates input, encrypts password, and returns registry id', async () => { + it('validates input, encrypts password internally in tx, and returns registry id', async () => { const result = await $subject expect(Validator.validate).to.have.been.calledWith(registryData, Validator.schemas.registryCreate) expect(RegistryManager.create).to.have.been.calledWithMatch({ @@ -67,11 +69,11 @@ describe('Registry Service', () => { username: registryData.username, userEmail: registryData.email }, transaction) - expect(SecretHelper.encryptSecret).to.have.been.calledWith( + expect(SecretHelper.encryptSecretInternal).to.have.been.calledWith( { value: registryData.password }, - 'registry-16', - 'registry' + 'registry-16' ) + expect(SecretHelper.encryptSecret).to.not.have.been.called expect(ChangeTrackingService.update).to.have.been.calledWith( 'fog-uuid', ChangeTrackingService.events.registries, @@ -95,6 +97,7 @@ describe('Registry Service', () => { it('skips password encryption', async () => { await $subject + expect(SecretHelper.encryptSecretInternal).to.not.have.been.called expect(SecretHelper.encryptSecret).to.not.have.been.called expect(RegistryManager.update).to.not.have.been.called }) @@ -231,6 +234,7 @@ describe('Registry Service', () => { context('when password is cleared and vault reference exists', () => { beforeEach(() => { + $sandbox.stub(vaultManager, 'isEnabled').returns(true) RegistryManager.findOne.resolves({ ...existing, password: 'vault:ref' }) $sandbox.stub(SecretHelper, 'isVaultReference').returns(true) $sandbox.stub(SecretHelper, 'deleteSecret').resolves() diff --git a/test/src/services/router-service.test.js b/test/src/services/router-service.test.js index a385075a..48a3483a 100644 --- a/test/src/services/router-service.test.js +++ b/test/src/services/router-service.test.js @@ -340,6 +340,65 @@ describe('Router Service', () => { return expect(true).to.eql(false) }) }) + + it('preserves existing bridges when regenerating router config', async () => { + const preservedBridges = { + tcpConnectors: { + 'api-connector': { + name: 'api-connector', + host: '127.0.0.1', + port: '8080', + address: 'api' + } + }, + tcpListeners: { + 'api-listener': { + name: 'api-listener', + port: '9001', + address: 'api' + } + } + } + MicroserviceManager.findOne.resolves({ + id: 1, + uuid: 'routerMsvcUuid', + iofogUuid: router.iofogUuid, + catalogItemId: routerCatalogItem.id, + config: JSON.stringify({ bridges: preservedBridges }) + }) + + await RouterService.updateConfig(routerID, containerEngine, transaction) + + expect(MicroserviceManager.update).to.have.been.called + const updatedConfig = JSON.parse(MicroserviceManager.update.firstCall.args[1].config) + expect(updatedConfig.bridges).to.eql(preservedBridges) + }) + + it('persists router config when upstream connector fingerprint changes', async () => { + MicroserviceManager.findOne.resolves({ + id: 1, + uuid: 'routerMsvcUuid', + iofogUuid: router.iofogUuid, + catalogItemId: routerCatalogItem.id, + config: JSON.stringify({ + connectors: { + 'old-upstream': { + name: 'old-upstream', + host: '10.0.0.9', + port: '55671', + role: 'edge', + sslProfile: 'router-site-server-test-fog' + } + } + }) + }) + + await RouterService.updateConfig(routerID, containerEngine, transaction) + + expect(MicroserviceManager.update).to.have.been.called + const updatedConfig = JSON.parse(MicroserviceManager.update.firstCall.args[1].config) + expect(updatedConfig.connectors).to.not.have.property('old-upstream') + }) }) describe('.updateRouter', () => { diff --git a/test/src/services/service-bridge-config.test.js b/test/src/services/service-bridge-config.test.js index 949ebdfe..810d3f12 100644 --- a/test/src/services/service-bridge-config.test.js +++ b/test/src/services/service-bridge-config.test.js @@ -15,7 +15,7 @@ describe('Service bridge config', () => { afterEach(() => $sandbox.restore()) describe('.stripServiceDerivedBridges()', () => { - it('removes service-derived listeners and connectors while preserving router bridges', () => { + it('removes service-derived listeners while preserving connectors and router bridges', () => { const baseConfig = { bridges: { tcpListeners: { @@ -35,6 +35,7 @@ describe('Service bridge config', () => { 'fog-amqp': { name: 'fog-amqp', port: '5672', address: 'amqp' } }) expect(stripped.bridges.tcpConnectors).to.eql({ + 'api-connector': { name: 'api-connector', host: 'hub', port: '8080' }, 'upstream-router': { name: 'upstream-router', host: '10.0.0.2', port: '55671' } }) }) @@ -106,5 +107,40 @@ describe('Service bridge config', () => { expect(result.bridges.tcpListeners).to.eql({}) expect(MicroserviceManager.update).to.have.been.calledOnce }) + + it('preserves hub-managed tcpConnectors while rebuilding listeners', async () => { + const baseConfig = { + bridges: { + tcpListeners: { + 'stale-listener': { name: 'stale-listener', port: '8000', address: 'stale' } + }, + tcpConnectors: { + 'api-connector': { + name: 'api-connector', + host: '127.0.0.1', + port: '8080', + address: 'api', + processId: 'ms-uuid' + } + } + } + } + + const result = await ServiceBridgeConfig.recomputeServiceBridgeConfig(fogUuid, baseConfig, transaction) + + expect(result.bridges.tcpListeners).to.eql({ + 'api-listener': { name: 'api-listener', port: '9001', address: 'api' }, + 'mqtt-listener': { name: 'mqtt-listener', port: '9002', address: 'mqtt' } + }) + expect(result.bridges.tcpConnectors).to.eql({ + 'api-connector': { + name: 'api-connector', + host: '127.0.0.1', + port: '8080', + address: 'api', + processId: 'ms-uuid' + } + }) + }) }) }) diff --git a/test/src/services/service-platform-service.test.js b/test/src/services/service-platform-service.test.js index 2ab1c874..fd4c3561 100644 --- a/test/src/services/service-platform-service.test.js +++ b/test/src/services/service-platform-service.test.js @@ -4,12 +4,13 @@ const sinon = require('sinon') const ServicePlatformService = require('../../../src/services/service-platform-service') const ServiceManager = require('../../../src/data/managers/service-manager') const ServicePlatformReconcileTaskManager = require('../../../src/data/managers/service-platform-reconcile-task-manager') -const FogPlatformReconcileTaskManager = require('../../../src/data/managers/fog-platform-reconcile-task-manager') +const ReconcileOutboxManager = require('../../../src/data/managers/reconcile-outbox-manager') const HubRouterConfigLockManager = require('../../../src/data/managers/hub-router-config-lock-manager') const RouterManager = require('../../../src/data/managers/router-manager') const ServicesService = require('../../../src/services/services-service') const K8sClient = require('../../../src/utils/k8s-client') const config = require('../../../src/config') +const transactionRunner = require('../../../src/helpers/transaction-runner') describe('Service platform service', () => { def('sandbox', () => sinon.createSandbox()) @@ -58,6 +59,14 @@ describe('Service platform service', () => { } beforeEach(() => { + $sandbox.stub(transactionRunner, 'runInTransaction').callsFake(async (fn, options = {}) => { + const result = await fn(transaction) + if (options.label === 'servicePlatform.hubReconcile') { + expect(K8sClient.getConfigMap).to.not.have.been.called + expect(K8sClient.patchConfigMap).to.not.have.been.called + } + return result + }) $sandbox.stub(config, 'get').callsFake((key, defaultValue) => { if (key === 'app.uuid') { return 'controller-uuid-1' @@ -92,31 +101,30 @@ describe('Service platform service', () => { } }) $sandbox.stub(K8sClient, 'patchConfigMap').resolves() - $sandbox.stub(ServicesService, '_updateK8sService').resolves() - $sandbox.stub(K8sClient, 'watchLoadBalancerIP').resolves('203.0.113.10') + $sandbox.stub(ServicesService, '_syncK8sServiceResource').resolves('203.0.113.10') $sandbox.stub(ServicesService, 'handleServiceDistribution').resolves(['fog-a']) - $sandbox.stub(FogPlatformReconcileTaskManager, 'enqueueFogPlatformReconcileTask').resolves({ id: 1 }) + $sandbox.stub(ReconcileOutboxManager, 'enqueueFogPlatform').resolves({ id: 1 }) $sandbox.stub(ServiceManager, 'findOneWithTags').resolves({ ...service, tags: [...service.tags] }) $sandbox.stub(ServiceManager, 'update').resolves() $sandbox.stub(ServicePlatformReconcileTaskManager, 'delete').resolves() }) it('runs hub reconcile, fan-out, and marks provisioning ready', async () => { - const result = await ServicePlatformService.reconcileService(serviceName, task, transaction) + const result = await ServicePlatformService.reconcileService(serviceName, task) expect(HubRouterConfigLockManager.tryAcquire).to.have.been.calledOnce - expect(K8sClient.patchConfigMap).to.have.been.called - expect(ServicesService._updateK8sService).to.have.been.calledOnce - expect(K8sClient.watchLoadBalancerIP).to.have.been.calledOnce + expect(K8sClient.getConfigMap).to.have.been.calledOnce + expect(K8sClient.patchConfigMap).to.have.been.calledOnce + expect(ServicesService._syncK8sServiceResource).to.have.been.calledOnce expect(HubRouterConfigLockManager.release).to.have.been.calledOnce - expect(FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask).to.have.been.calledWith({ + expect(ReconcileOutboxManager.enqueueFogPlatform).to.have.been.calledWith({ fogUuid: 'fog-a', reason: 'service-changed' - }, transaction) + }, sinon.match.any) expect(ServiceManager.update).to.have.been.calledWith( { name: serviceName }, { provisioningStatus: 'ready', provisioningError: null }, - transaction + sinon.match.any ) expect(result.provisioningStatus).to.equal('ready') }) @@ -136,27 +144,27 @@ describe('Service platform service', () => { }) ServicesService.handleServiceDistribution.resolves(['fog-a', 'fog-b', 'fog-c']) - await ServicePlatformService.reconcileService(serviceName, tagChangeTask, transaction) + await ServicePlatformService.reconcileService(serviceName, tagChangeTask) expect(ServicesService.handleServiceDistribution).to.have.been.calledWith( ['site-a', 'site-b'], - transaction + sinon.match.any ) - expect(FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask).to.have.callCount(3) + expect(ReconcileOutboxManager.enqueueFogPlatform).to.have.callCount(3) }) it('is safe to reconcile the same service twice', async () => { - await ServicePlatformService.reconcileService(serviceName, task, transaction) - await ServicePlatformService.reconcileService(serviceName, task, transaction) + await ServicePlatformService.reconcileService(serviceName, task) + await ServicePlatformService.reconcileService(serviceName, task) - expect(K8sClient.patchConfigMap.callCount).to.be.at.least(4) + expect(K8sClient.patchConfigMap.callCount).to.equal(2) }) it('throws when LoadBalancer IP watch times out', async () => { - K8sClient.watchLoadBalancerIP.resolves(null) + ServicesService._syncK8sServiceResource.resolves(null) try { - await ServicePlatformService.reconcileService(serviceName, task, transaction) + await ServicePlatformService.reconcileService(serviceName, task) throw new Error('expected reconcile to fail') } catch (error) { expect(error.message).to.include('LoadBalancer IP not assigned') @@ -166,7 +174,7 @@ describe('Service platform service', () => { expect(ServiceManager.update).to.not.have.been.calledWith( { name: serviceName }, { provisioningStatus: 'ready', provisioningError: null }, - transaction + sinon.match.any ) }) }) @@ -191,6 +199,7 @@ describe('Service platform service', () => { } beforeEach(() => { + $sandbox.stub(transactionRunner, 'runInTransaction').callsFake(async (fn) => fn(transaction)) $sandbox.stub(config, 'get').callsFake((key, defaultValue) => { if (key === 'app.uuid') { return 'controller-uuid-1' @@ -212,33 +221,35 @@ describe('Service platform service', () => { $sandbox.stub(K8sClient, 'patchConfigMap').resolves() $sandbox.stub(ServicesService, '_deleteK8sService').resolves() $sandbox.stub(ServicesService, 'handleServiceDistribution').resolves(['fog-a', 'fog-b']) - $sandbox.stub(FogPlatformReconcileTaskManager, 'enqueueFogPlatformReconcileTask').resolves({ id: 1 }) + $sandbox.stub(ReconcileOutboxManager, 'enqueueFogPlatform').resolves({ id: 1 }) $sandbox.stub(ServicePlatformReconcileTaskManager, 'delete').resolves() $sandbox.stub(ServiceManager, 'findOneWithTags') $sandbox.stub(ServiceManager, 'update') }) it('uses spec_snapshot for hub teardown, fan-out, and destroys the task', async () => { - const result = await ServicePlatformService.reconcileService(serviceName, deleteTask, transaction) + const result = await ServicePlatformService.reconcileService(serviceName, deleteTask) expect(ServiceManager.findOneWithTags).to.not.have.been.called - expect(K8sClient.patchConfigMap).to.have.been.calledTwice + expect(K8sClient.getConfigMap).to.have.been.calledOnce + expect(K8sClient.patchConfigMap).to.have.been.calledOnce + const patchData = K8sClient.patchConfigMap.firstCall.args[1] + const routerConfig = JSON.parse(patchData.data['skrouterd.json']) + expect(routerConfig).to.eql([]) expect(ServicesService._deleteK8sService).to.have.been.calledWith(serviceName) expect(ServicesService.handleServiceDistribution).to.have.been.calledWith( ['site-a', 'site-b'], - transaction + sinon.match.any ) - expect(ServicePlatformReconcileTaskManager.delete).to.have.been.calledWith({ id: 99 }, transaction) + expect(ServicePlatformReconcileTaskManager.delete).to.have.been.calledWith({ id: 99 }, sinon.match.any) expect(ServiceManager.update).to.not.have.been.called expect(result.isDelete).to.equal(true) }) }) describe('.acquireHubLockWithTimeout()', () => { - let clock - beforeEach(() => { - clock = sinon.useFakeTimers() + $sandbox.stub(transactionRunner, 'runInTransaction').callsFake(async (fn) => fn(transaction)) $sandbox.stub(config, 'get').callsFake((key, defaultValue) => { if (key === 'settings.hubRouterConfigLockTimeoutSeconds') { return 1 @@ -248,16 +259,11 @@ describe('Service platform service', () => { $sandbox.stub(HubRouterConfigLockManager, 'tryAcquire').resolves(false) }) - afterEach(() => { - clock.restore() - }) - - it('times out when hub lock is held by another controller', async () => { - const acquirePromise = ServicePlatformService.acquireHubLockWithTimeout('controller-uuid-1', transaction) - await clock.runAllAsync() + it('times out when hub lock is held by another controller', async function () { + this.timeout(5000) try { - await acquirePromise + await ServicePlatformService.acquireHubLockWithTimeout('controller-uuid-1') throw new Error('expected lock acquire to fail') } catch (error) { expect(error.message).to.include('Timed out waiting for hub router ConfigMap lock') @@ -270,15 +276,15 @@ describe('Service platform service', () => { describe('.fanOutFogReconcile()', () => { beforeEach(() => { $sandbox.stub(ServicesService, 'handleServiceDistribution').resolves(['fog-a', 'fog-b']) - $sandbox.stub(FogPlatformReconcileTaskManager, 'enqueueFogPlatformReconcileTask').resolves({ id: 1 }) + $sandbox.stub(ReconcileOutboxManager, 'enqueueFogPlatform').resolves({ id: 1 }) }) it('enqueues fog platform reconcile tasks for distributed fogs', async () => { const fogUuids = await ServicePlatformService.fanOutFogReconcile(['site-a'], transaction) expect(fogUuids).to.eql(['fog-a', 'fog-b']) - expect(FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask).to.have.been.calledTwice - expect(FogPlatformReconcileTaskManager.enqueueFogPlatformReconcileTask).to.have.been.calledWith({ + expect(ReconcileOutboxManager.enqueueFogPlatform).to.have.been.calledTwice + expect(ReconcileOutboxManager.enqueueFogPlatform).to.have.been.calledWith({ fogUuid: 'fog-a', reason: 'service-changed' }, transaction) diff --git a/test/src/services/services-connector-host.test.js b/test/src/services/services-connector-host.test.js index 2195bda1..d2d4478b 100644 --- a/test/src/services/services-connector-host.test.js +++ b/test/src/services/services-connector-host.test.js @@ -401,122 +401,6 @@ describe('services-service connector host', () => { }) }) - describe('_addTcpConnector()', () => { - def('fogUuid', () => 'fog-edge') - def('serviceConfig', () => ({ - type: 'microservice', - name: 'bridge-svc', - resource: 'ms-1', - targetPort: 8080 - })) - def('capture', () => ({ config: null })) - - beforeEach(() => { - delete process.env.CONTROL_PLANE - $sandbox.stub(RouterManager, 'findOne').callsFake((where) => { - if (where.iofogUuid === $fogUuid) { - return Promise.resolve({ isEdge: true }) - } - return Promise.resolve(null) - }) - $sandbox.stub(FogManager, 'findOne').callsFake((where) => { - if (where.uuid === $fogUuid) { - return Promise.resolve({ uuid: $fogUuid, name: 'edge-fog' }) - } - return Promise.resolve(null) - }) - $sandbox.stub(ApplicationManager, 'findOne').callsFake((where) => { - if (where.id === 42) { - return Promise.resolve({ id: 42, name: 'myapp' }) - } - return Promise.resolve({ id: 10, name: 'system-edge-fog', isSystem: true }) - }) - $sandbox.stub(MicroserviceManager, 'findOne').callsFake((where) => { - if (where.uuid === 'ms-1') { - return Promise.resolve({ - uuid: 'ms-1', - name: 'worker', - applicationId: 42, - hostNetworkMode: false, - iofogUuid: $fogUuid - }) - } - if (where.name === 'router' && where.applicationId === 10) { - return Promise.resolve({ uuid: 'router-ms-1', config: '{}' }) - } - return Promise.resolve(null) - }) - $sandbox.stub(MicroserviceManager, 'update').callsFake((where, data) => { - if (data.config) { - $capture.config = JSON.parse(data.config) - } - return Promise.resolve() - }) - $sandbox.stub(ChangeTrackingService, 'update').resolves() - }) - - it('persists connector on the microservice fog router without siteId', async () => { - await ServicesService._addTcpConnector($serviceConfig, $transaction) - - const connector = $capture.config.bridges.tcpConnectors['bridge-svc-connector'] - expect(connector).to.include({ - name: 'bridge-svc-connector', - host: 'myapp.worker', - port: '8080', - address: 'bridge-svc', - processId: 'ms-1' - }) - expect(connector).to.not.have.property('siteId') - }) - }) - - describe('_addTcpConnector() default router target', () => { - def('capture', () => ({ config: null })) - - beforeEach(() => { - delete process.env.CONTROL_PLANE - $sandbox.stub(RouterManager, 'findOne').callsFake((where) => { - if (where.isDefault === true) { - return Promise.resolve({ iofogUuid: 'default-fog' }) - } - return Promise.resolve(null) - }) - $sandbox.stub(FogManager, 'findOne').callsFake((where) => { - if (where.uuid === 'default-fog') { - return Promise.resolve({ uuid: 'default-fog', name: 'default' }) - } - return Promise.resolve(null) - }) - $sandbox.stub(ApplicationManager, 'findOne').resolves({ id: 99, name: 'system-default', isSystem: true }) - $sandbox.stub(MicroserviceManager, 'findOne').callsFake((where) => { - if (where.name === 'router') { - return Promise.resolve({ uuid: 'router-default', config: '{}' }) - } - return Promise.resolve(null) - }) - $sandbox.stub(MicroserviceManager, 'update').callsFake((where, data) => { - if (data.config) { - $capture.config = JSON.parse(data.config) - } - return Promise.resolve() - }) - $sandbox.stub(ChangeTrackingService, 'update').resolves() - }) - - it('persists connector on default router for external services without siteId', async () => { - await ServicesService._addTcpConnector({ - type: 'external', - name: 'ext-svc', - resource: 'example.com', - targetPort: 443 - }, $transaction) - - const connector = $capture.config.bridges.tcpConnectors['ext-svc-connector'] - expect(connector.host).to.equal('example.com') - expect(connector).to.not.have.property('siteId') - }) - }) - describe('iofog _buildTcpListenerForFog()', () => { it('builds listener without siteId', () => { const listener = ioFogService._buildTcpListenerForFog({ diff --git a/test/src/services/services-service.test.js b/test/src/services/services-service.test.js index 6785ae45..f7823228 100644 --- a/test/src/services/services-service.test.js +++ b/test/src/services/services-service.test.js @@ -5,11 +5,12 @@ const ServiceController = require('../../../src/controllers/service-controller') const YamlParserService = require('../../../src/services/yaml-parser-service') const ServicesService = require('../../../src/services/services-service') const ServiceManager = require('../../../src/data/managers/service-manager') -const ServicePlatformReconcileTaskManager = require('../../../src/data/managers/service-platform-reconcile-task-manager') +const ReconcileOutboxManager = require('../../../src/data/managers/reconcile-outbox-manager') const RouterManager = require('../../../src/data/managers/router-manager') const TagsManager = require('../../../src/data/managers/tags-manager') const Validator = require('../../../src/schemas') const Errors = require('../../../src/helpers/errors') +const K8sClient = require('../../../src/utils/k8s-client') describe('services-service platform reconcile enqueue', () => { def('sandbox', () => sinon.createSandbox()) @@ -41,12 +42,28 @@ describe('services-service platform reconcile enqueue', () => { return service } + function buildSequelizeLikeService (fields = {}) { + const data = buildServiceModel(fields) + return Object.create({ + get name () { return data.name }, + get type () { return data.type }, + get resource () { return data.resource }, + get defaultBridge () { return data.defaultBridge }, + get bridgePort () { return data.bridgePort }, + get targetPort () { return data.targetPort }, + get servicePort () { return data.servicePort }, + get k8sType () { return data.k8sType }, + get serviceEndpoint () { return data.serviceEndpoint }, + get tags () { return data.tags } + }) + } + function stubCreateDeps () { delete process.env.CONTROL_PLANE $sandbox.stub(Validator, 'validate').resolves(true) $sandbox.stub(ServiceManager, 'findAll').resolves([]) $sandbox.stub(ServiceManager, 'create').callsFake((data) => Promise.resolve(buildServiceModel(data))) - $sandbox.stub(ServicePlatformReconcileTaskManager, 'enqueueServicePlatformReconcileTask').resolves() + $sandbox.stub(ReconcileOutboxManager, 'enqueueServicePlatform').resolves() $sandbox.stub(RouterManager, 'findOne').resolves({ isDefault: true, host: 'hub.example.com', @@ -79,7 +96,7 @@ describe('services-service platform reconcile enqueue', () => { expect(createPayload.provisioningStatus).to.equal('pending') expect(createPayload.provisioningError).to.be.null - expect(ServicePlatformReconcileTaskManager.enqueueServicePlatformReconcileTask).to.have.been.calledWith({ + expect(ReconcileOutboxManager.enqueueServicePlatform).to.have.been.calledWith({ serviceName: 'api-gateway', reason: 'spec-changed', specSnapshot: { @@ -104,14 +121,10 @@ describe('services-service platform reconcile enqueue', () => { }) it('does not run hub provisioning on the synchronous path', async () => { - $sandbox.stub(ServicesService, '_addTcpConnector').resolves() - $sandbox.stub(ServicesService, '_addTcpListener').resolves() $sandbox.stub(ServicesService, '_createK8sService').resolves() await $subject - expect(ServicesService._addTcpConnector).to.not.have.been.called - expect(ServicesService._addTcpListener).to.not.have.been.called expect(ServicesService._createK8sService).to.not.have.been.called }) }) @@ -135,7 +148,7 @@ describe('services-service platform reconcile enqueue', () => { $sandbox.stub(ServiceManager, 'update').callsFake((where, data) => Promise.resolve(buildServiceModel({ ...existingService, ...data })) ) - $sandbox.stub(ServicePlatformReconcileTaskManager, 'enqueueServicePlatformReconcileTask').resolves() + $sandbox.stub(ReconcileOutboxManager, 'enqueueServicePlatform').resolves() $sandbox.stub(RouterManager, 'findOne').resolves({ isDefault: true, host: 'hub.example.com', @@ -148,7 +161,7 @@ describe('services-service platform reconcile enqueue', () => { it('enqueues reconcile with old and new tags in snapshot', async () => { await $subject - expect(ServicePlatformReconcileTaskManager.enqueueServicePlatformReconcileTask).to.have.been.calledWith({ + expect(ReconcileOutboxManager.enqueueServicePlatform).to.have.been.calledWith({ serviceName: 'api-gateway', reason: 'spec-changed', specSnapshot: { @@ -183,19 +196,17 @@ describe('services-service platform reconcile enqueue', () => { beforeEach(() => { $sandbox.stub(ServiceManager, 'findOneWithTags').resolves(existingService) $sandbox.stub(ServiceManager, 'delete').resolves() - $sandbox.stub(ServicePlatformReconcileTaskManager, 'enqueueServicePlatformReconcileTask').resolves() - $sandbox.stub(ServicesService, '_deleteTcpConnector').resolves() - $sandbox.stub(ServicesService, '_deleteTcpListener').resolves() + $sandbox.stub(ReconcileOutboxManager, 'enqueueServicePlatform').resolves() $sandbox.stub(ServicesService, '_deleteK8sService').resolves() }) it('captures spec snapshot and enqueues delete reconcile before DB delete', async () => { await $subject - expect(ServicePlatformReconcileTaskManager.enqueueServicePlatformReconcileTask).to.have.been.calledBefore( + expect(ReconcileOutboxManager.enqueueServicePlatform).to.have.been.calledBefore( ServiceManager.delete ) - expect(ServicePlatformReconcileTaskManager.enqueueServicePlatformReconcileTask).to.have.been.calledWith({ + expect(ReconcileOutboxManager.enqueueServicePlatform).to.have.been.calledWith({ serviceName: 'api-gateway', reason: 'delete', specSnapshot: { @@ -217,10 +228,42 @@ describe('services-service platform reconcile enqueue', () => { it('does not run hub teardown on the synchronous path', async () => { await $subject - expect(ServicesService._deleteTcpConnector).to.not.have.been.called - expect(ServicesService._deleteTcpListener).to.not.have.been.called expect(ServicesService._deleteK8sService).to.not.have.been.called }) + + it('captures full spec snapshot from Sequelize model instances', async () => { + ServiceManager.findOneWithTags.resolves(buildSequelizeLikeService({ + name: 'snapshot-service', + type: 'agent', + resource: 'fog-uuid-1', + defaultBridge: 'fog-uuid-1', + bridgePort: 9200, + targetPort: 8090, + servicePort: 9200, + k8sType: null, + serviceEndpoint: 'edge.example.com', + tags: [{ value: 'site-a' }] + })) + + await ServicesService.deleteServiceEndpoint('snapshot-service', $transaction) + + expect(ReconcileOutboxManager.enqueueServicePlatform).to.have.been.calledWith({ + serviceName: 'snapshot-service', + reason: 'delete', + specSnapshot: { + name: 'snapshot-service', + type: 'agent', + resource: 'fog-uuid-1', + defaultBridge: 'fog-uuid-1', + bridgePort: 9200, + targetPort: 8090, + servicePort: 9200, + k8sType: null, + serviceEndpoint: 'edge.example.com', + tags: ['site-a'] + } + }, $transaction) + }) }) describe('YAML endpoints', () => { @@ -261,7 +304,7 @@ spec: await ServiceController.createServiceYAMLEndpoint(req) expect(YamlParserService.parseServiceFile).to.have.been.calledOnceWith(serviceYaml) - expect(ServicePlatformReconcileTaskManager.enqueueServicePlatformReconcileTask).to.have.been.calledWith({ + expect(ReconcileOutboxManager.enqueueServicePlatform).to.have.been.calledWith({ serviceName: 'api-gateway', reason: 'spec-changed', specSnapshot: sinon.match({ @@ -284,7 +327,7 @@ spec: $sandbox.stub(ServiceManager, 'update').callsFake((where, data) => Promise.resolve(buildServiceModel({ ...existingService, ...data })) ) - $sandbox.stub(ServicePlatformReconcileTaskManager, 'enqueueServicePlatformReconcileTask').resolves() + $sandbox.stub(ReconcileOutboxManager, 'enqueueServicePlatform').resolves() $sandbox.stub(RouterManager, 'findOne').resolves({ isDefault: true, host: 'hub.example.com', @@ -313,7 +356,7 @@ spec: isUpdate: true, serviceName: 'api-gateway' }) - expect(ServicePlatformReconcileTaskManager.enqueueServicePlatformReconcileTask).to.have.been.calledWith({ + expect(ReconcileOutboxManager.enqueueServicePlatform).to.have.been.calledWith({ serviceName: 'api-gateway', reason: 'spec-changed', specSnapshot: sinon.match({ @@ -335,7 +378,7 @@ spec: tags: [{ value: 'site-a' }] })) $sandbox.stub(ServiceManager, 'update').resolves() - $sandbox.stub(ServicePlatformReconcileTaskManager, 'enqueueServicePlatformReconcileTask').resolves() + $sandbox.stub(ReconcileOutboxManager, 'enqueueServicePlatform').resolves() }) it('resets failed provisioning and enqueues manual retry', async () => { @@ -346,18 +389,53 @@ spec: { provisioningStatus: 'pending', provisioningError: null }, $transaction ) - expect(ServicePlatformReconcileTaskManager.enqueueServicePlatformReconcileTask).to.have.been.calledWith({ + expect(ReconcileOutboxManager.enqueueServicePlatform).to.have.been.calledWith({ serviceName: 'api-gateway', reason: 'manual-retry', - specSnapshot: sinon.match({ + specSnapshot: { name: 'api-gateway', + type: 'external', + resource: '10.0.0.8', + defaultBridge: 'default-router', + bridgePort: 9100, + targetPort: 8080, + servicePort: 9100, + k8sType: 'LoadBalancer', + serviceEndpoint: 'hub.example.com', tags: ['site-a'] - }) + } }, $transaction) expect(result.provisioningStatus).to.equal('pending') expect(result.provisioningError).to.be.null }) + it('captures full spec snapshot from Sequelize model instances', async () => { + ServiceManager.findOneWithTags.resolves(buildSequelizeLikeService({ + provisioningStatus: 'failed', + provisioningError: 'hub lock timeout', + tags: [{ value: 'site-a' }] + })) + + await ServicesService.reconcileServiceEndpoint('api-gateway', $transaction) + + expect(ReconcileOutboxManager.enqueueServicePlatform).to.have.been.calledWith({ + serviceName: 'api-gateway', + reason: 'manual-retry', + specSnapshot: { + name: 'api-gateway', + type: 'external', + resource: '10.0.0.8', + defaultBridge: 'default-router', + bridgePort: 9100, + targetPort: 8080, + servicePort: 9100, + k8sType: 'LoadBalancer', + serviceEndpoint: 'hub.example.com', + tags: ['site-a'] + } + }, $transaction) + }) + context('when service is missing', () => { beforeEach(() => { ServiceManager.findOneWithTags.resolves(null) @@ -367,4 +445,49 @@ spec: expect($subject).to.be.rejectedWith(Errors.NotFoundError)) }) }) + + describe('._syncK8sServiceResource()', () => { + const serviceConfig = { + name: 'snapshot-service', + k8sType: 'ClusterIP', + bridgePort: 10024, + servicePort: 10024, + tags: ['site-a'] + } + + beforeEach(() => { + $sandbox.stub(K8sClient, 'getService').resolves(null) + $sandbox.stub(K8sClient, 'createService').resolves({ metadata: { name: 'snapshot-service' } }) + $sandbox.stub(K8sClient, 'updateService').resolves({ metadata: { name: 'snapshot-service' } }) + $sandbox.stub(K8sClient, 'isK8sNotFound').returns(false) + }) + + it('creates the K8s service when it does not exist', async () => { + await ServicesService._syncK8sServiceResource(serviceConfig) + + expect(K8sClient.getService).to.have.been.calledWith('snapshot-service', { ignoreNotFound: true }) + expect(K8sClient.createService).to.have.been.calledOnce + expect(K8sClient.updateService).to.not.have.been.called + }) + + it('updates the K8s service when it already exists', async () => { + K8sClient.getService.resolves({ metadata: { name: 'snapshot-service' } }) + + await ServicesService._syncK8sServiceResource(serviceConfig) + + expect(K8sClient.createService).to.not.have.been.called + expect(K8sClient.updateService).to.have.been.calledOnceWith('snapshot-service', sinon.match.object) + }) + + it('creates the K8s service when update returns not found', async () => { + K8sClient.getService.resolves({ metadata: { name: 'snapshot-service' } }) + K8sClient.updateService.rejects(new Error('not found')) + K8sClient.isK8sNotFound.returns(true) + + await ServicesService._syncK8sServiceResource(serviceConfig) + + expect(K8sClient.updateService).to.have.been.calledOnce + expect(K8sClient.createService).to.have.been.calledOnce + }) + }) }) diff --git a/test/src/services/transaction-safety-19h.test.js b/test/src/services/transaction-safety-19h.test.js new file mode 100644 index 00000000..da5d3e6a --- /dev/null +++ b/test/src/services/transaction-safety-19h.test.js @@ -0,0 +1,189 @@ +'use strict' + +const { expect } = require('chai') +const sinon = require('sinon') +const Transaction = require('sequelize/lib/transaction') + +const SecretService = require('../../../src/services/secret-service') +const CertificateService = require('../../../src/services/certificate-service') +const Errors = require('../../../src/helpers/errors') + +describe('Plan 19-H transaction safety fixes', () => { + def('sandbox', () => sinon.createSandbox()) + def('parentTransaction', () => Object.create(Transaction.prototype)) + + afterEach(() => { + $sandbox.restore() + }) + + describe('certificate-service transaction propagation', () => { + it('passes parent transaction to SecretService.getSecretEndpoint from getCAEndpoint', async () => { + const CertificateManager = require('../../../src/data/managers/certificate-manager') + $sandbox.stub(CertificateManager, 'findCertificateByName').resolves({ + name: 'router-local-ca', + subject: 'router-local-ca', + isCA: true, + validFrom: new Date(), + validTo: new Date(), + serialNumber: '1', + isExpired: () => false + }) + $sandbox.stub(SecretService, 'getSecretEndpoint').resolves({ + type: 'tls', + data: { + 'tls.crt': Buffer.from('cert').toString('base64'), + 'tls.key': Buffer.from('key').toString('base64') + } + }) + + await CertificateService.getCAEndpoint('router-local-ca', $parentTransaction) + + expect(SecretService.getSecretEndpoint).to.have.been.calledOnceWith( + 'router-local-ca', + $parentTransaction + ) + }) + + it('passes parent transaction through createCAEndpoint SecretService calls', async () => { + $sandbox.stub(SecretService, 'getSecretEndpoint').rejects(new Errors.NotFoundError('missing')) + $sandbox.stub(require('../../../src/utils/cert'), 'generateSelfSignedCA').resolves({ + cert: '-----BEGIN CERTIFICATE-----\ncert\n-----END CERTIFICATE-----', + key: '-----BEGIN PRIVATE KEY-----\nkey\n-----END PRIVATE KEY-----' + }) + $sandbox.stub(require('../../../src/utils/cert'), 'storeCA').resolves() + $sandbox.stub(SecretService, 'createSecretEndpoint').resolves({ id: 1, name: 'test-ca' }) + $sandbox.stub(require('../../../src/data/managers/secret-manager'), 'findOne').resolves({ id: 1 }) + $sandbox.stub(require('../../../src/data/managers/certificate-manager'), 'createCertificateRecord').resolves() + + await CertificateService.createCAEndpoint({ + name: 'test-ca', + subject: 'test-ca', + expiration: 60, + type: 'self-signed' + }, $parentTransaction) + + expect(SecretService.getSecretEndpoint).to.have.been.calledWith('test-ca', $parentTransaction) + }) + + it('passes parent transaction to loadCA from createCAEndpoint direct type', async () => { + const certUtil = require('../../../src/utils/cert') + const forge = require('node-forge') + + const keys = forge.pki.rsa.generateKeyPair(2048) + const caCert = forge.pki.createCertificate() + caCert.publicKey = keys.publicKey + caCert.serialNumber = '01' + caCert.validity.notBefore = new Date() + caCert.validity.notAfter = new Date(Date.now() + 86400000) + caCert.setSubject([{ name: 'commonName', value: 'router-site-ca' }]) + caCert.setIssuer([{ name: 'commonName', value: 'router-site-ca' }]) + caCert.sign(keys.privateKey, forge.md.sha256.create()) + + const caCertPem = forge.pki.certificateToPem(caCert) + const caKeyPem = forge.pki.privateKeyToPem(keys.privateKey) + + $sandbox.stub(SecretService, 'getSecretEndpoint').resolves({ type: 'tls', data: {} }) + $sandbox.stub(require('../../../src/data/managers/certificate-manager'), 'findCertificateByName').resolves(null) + $sandbox.stub(certUtil, 'loadCA').resolves({ cert: caCertPem, key: caKeyPem }) + $sandbox.stub(require('../../../src/data/managers/secret-manager'), 'findOne').resolves({ id: 1 }) + $sandbox.stub(require('../../../src/data/managers/certificate-manager'), 'createCertificateRecord').resolves() + + await CertificateService.createCAEndpoint({ + name: 'router-site-ca', + secretName: 'router-site-ca', + type: 'direct' + }, $parentTransaction) + + expect(certUtil.loadCA).to.have.been.calledOnceWith('router-site-ca', $parentTransaction) + }) + }) + + describe('cert.js transaction propagation', () => { + it('loadCA uses parent transaction without enqueueing runInTransaction', async () => { + const transactionRunner = require('../../../src/helpers/transaction-runner') + const SecretManager = require('../../../src/data/managers/secret-manager') + const { loadCA } = require('../../../src/utils/cert') + + $sandbox.stub(transactionRunner, 'runInTransaction').throws(new Error('should not enqueue')) + $sandbox.stub(SecretManager, 'getSecret').resolves({ + type: 'tls', + data: { + 'tls.crt': Buffer.from('cert').toString('base64'), + 'tls.key': Buffer.from('key').toString('base64') + } + }) + + const result = await loadCA('router-site-ca', $parentTransaction) + + expect(SecretManager.getSecret).to.have.been.calledOnceWith('router-site-ca', $parentTransaction) + expect(transactionRunner.runInTransaction).to.not.have.been.called + expect(result).to.include.keys('cert', 'key') + }) + + it('getCAFromK8sSecret uses parent transaction without enqueueing runInTransaction', async () => { + const transactionRunner = require('../../../src/helpers/transaction-runner') + const SecretManager = require('../../../src/data/managers/secret-manager') + const k8sClient = require('../../../src/utils/k8s-client') + const { getCAFromK8sSecret } = require('../../../src/utils/cert') + + $sandbox.stub(transactionRunner, 'runInTransaction').throws(new Error('should not enqueue')) + $sandbox.stub(k8sClient, 'getSecret').resolves({ + data: { + 'tls.crt': Buffer.from('cert').toString('base64'), + 'tls.key': Buffer.from('key').toString('base64') + } + }) + $sandbox.stub(SecretManager, 'findOne').resolves({ id: 1, name: 'k8s-ca' }) + + await getCAFromK8sSecret('k8s-ca', $parentTransaction) + + expect(SecretManager.findOne).to.have.been.calledOnceWith({ name: 'k8s-ca' }, $parentTransaction) + expect(transactionRunner.runInTransaction).to.not.have.been.called + }) + + it('generateCertificate passes transaction through getCAFromInput loadCA path', async () => { + const transactionRunner = require('../../../src/helpers/transaction-runner') + const SecretManager = require('../../../src/data/managers/secret-manager') + const certUtil = require('../../../src/utils/cert') + const forge = require('node-forge') + + const keys = forge.pki.rsa.generateKeyPair(2048) + const caCert = forge.pki.createCertificate() + caCert.publicKey = keys.publicKey + caCert.serialNumber = '01' + caCert.validity.notBefore = new Date() + caCert.validity.notAfter = new Date(Date.now() + 86400000) + caCert.setSubject([{ name: 'commonName', value: 'router-site-ca' }]) + caCert.setIssuer([{ name: 'commonName', value: 'router-site-ca' }]) + caCert.sign(keys.privateKey, forge.md.sha256.create()) + + const caCertPem = forge.pki.certificateToPem(caCert) + const caKeyPem = forge.pki.privateKeyToPem(keys.privateKey) + + $sandbox.stub(transactionRunner, 'runInTransaction').throws(new Error('should not enqueue')) + $sandbox.stub(SecretManager, 'getSecret').resolves({ + type: 'tls', + data: { + 'tls.crt': Buffer.from(caCertPem).toString('base64'), + 'tls.key': Buffer.from(caKeyPem).toString('base64') + } + }) + $sandbox.stub(SecretService, 'createSecretEndpoint').resolves() + + await certUtil.generateCertificate({ + name: 'site-server', + subject: '/CN=site-server', + hosts: '127.0.0.1', + ca: { type: 'direct', secretName: 'router-site-ca' }, + transaction: $parentTransaction + }) + + expect(SecretManager.getSecret).to.have.been.calledOnceWith('router-site-ca', $parentTransaction) + expect(transactionRunner.runInTransaction).to.not.have.been.called + expect(SecretService.createSecretEndpoint).to.have.been.calledWith( + sinon.match.has('name', 'site-server'), + $parentTransaction + ) + }) + }) +}) diff --git a/test/src/services/transaction-safety-vault.test.js b/test/src/services/transaction-safety-vault.test.js new file mode 100644 index 00000000..50ac2d89 --- /dev/null +++ b/test/src/services/transaction-safety-vault.test.js @@ -0,0 +1,119 @@ +'use strict' + +const { expect } = require('chai') +const sinon = require('sinon') +const Transaction = require('sequelize/lib/transaction') + +const SecretService = require('../../../src/services/secret-service') +const RegistryService = require('../../../src/services/registry-service') +const ConfigMapManager = require('../../../src/data/managers/config-map-manager') +const RegistryManager = require('../../../src/data/managers/registry-manager') +const BaseManager = require('../../../src/data/managers/base-manager') +const SecretHelper = require('../../../src/helpers/secret-helper') +const vaultManager = require('../../../src/vault/vault-manager') + +describe('Plan 19-H vault transaction safety (R-09–R-11)', () => { + def('sandbox', () => sinon.createSandbox()) + def('parentTransaction', () => { + const tx = Object.create(Transaction.prototype) + tx.afterCommit = sinon.spy((fn) => fn()) + return tx + }) + + afterEach(() => { + $sandbox.restore() + }) + + describe('secret-service deleteSecretEndpoint', () => { + beforeEach(() => { + $sandbox.stub(vaultManager, 'isEnabled').returns(true) + $sandbox.stub(SecretHelper, 'deleteSecret').resolves() + $sandbox.stub(require('../../../src/data/managers/secret-manager'), 'findOne').resolves({ + name: 'test-secret', + type: 'Opaque' + }) + $sandbox.stub(require('../../../src/data/managers/secret-manager'), 'deleteSecret').resolves() + $sandbox.stub(require('../../../src/data/managers/volume-mounting-manager'), 'findAll').resolves([]) + }) + + it('schedules vault delete after commit instead of calling SecretHelper inside the tx body', async () => { + const deferredTx = Object.create(Transaction.prototype) + let deferredFn + deferredTx.afterCommit = sinon.spy((fn) => { + deferredFn = fn + }) + + await SecretService.deleteSecretEndpoint('test-secret', deferredTx) + + expect(deferredTx.afterCommit).to.have.been.calledOnce + expect(SecretHelper.deleteSecret).to.not.have.been.called + + await deferredFn() + + expect(SecretHelper.deleteSecret).to.have.been.calledOnceWith('test-secret', 'Opaque') + }) + }) + + describe('config-map-manager deleteConfigMap', () => { + beforeEach(() => { + $sandbox.stub(vaultManager, 'isEnabled').returns(true) + $sandbox.stub(ConfigMapManager, 'findOne').resolves({ + name: 'cfg', + useVault: true + }) + $sandbox.stub(ConfigMapManager, 'delete').resolves(1) + $sandbox.stub(SecretHelper, 'deleteSecret').resolves() + }) + + it('deletes DB row in tx and schedules vault cleanup after commit', async () => { + await ConfigMapManager.deleteConfigMap('cfg', $parentTransaction) + + expect(ConfigMapManager.delete).to.have.been.calledBefore(SecretHelper.deleteSecret) + expect(SecretHelper.deleteSecret).to.have.been.calledOnceWith('cfg', 'configmap') + }) + }) + + describe('registry-service createRegistry', () => { + beforeEach(() => { + $sandbox.stub(vaultManager, 'isEnabled').returns(true) + $sandbox.stub(require('../../../src/schemas'), 'validate').resolves(true) + $sandbox.stub(require('../../../src/helpers/app-helper'), 'deleteUndefinedFields').callsFake((v) => v) + $sandbox.stub(RegistryManager, 'create').resolves({ id: 16 }) + $sandbox.stub(SecretHelper, 'encryptSecretInternal').resolves('internal-encrypted') + $sandbox.stub(SecretHelper, 'encryptSecret').resolves('vault-ref') + $sandbox.stub(RegistryManager, 'update').resolves() + $sandbox.stub(require('../../../src/data/managers/iofog-manager'), 'findAll').resolves([]) + $sandbox.stub(require('../../../src/services/change-tracking-service'), 'update').resolves() + $sandbox.stub(require('../../../src/helpers/transaction-runner'), 'runInTransaction').resolves() + }) + + it('stores internal encryption in tx and promotes to vault after commit', async () => { + await RegistryService.createRegistry({ + url: 'https://registry.example.com', + username: 'user', + password: 'plain-password', + isPublic: false, + email: 'user@example.com' + }, $parentTransaction) + + expect(SecretHelper.encryptSecretInternal).to.have.been.calledOnce + expect(SecretHelper.encryptSecret).to.not.have.been.called + }) + }) + + describe('registry-manager delete', () => { + beforeEach(() => { + $sandbox.stub(vaultManager, 'isEnabled').returns(true) + $sandbox.stub(RegistryManager, 'findOne').resolves({ id: 16 }) + $sandbox.stub(BaseManager.prototype, 'delete').resolves(1) + $sandbox.stub(SecretHelper, 'deleteSecret').resolves() + }) + + it('deletes DB row first and schedules vault cleanup after commit', async () => { + await RegistryManager.delete({ id: 16 }, $parentTransaction) + + expect(BaseManager.prototype.delete).to.have.been.calledBefore(SecretHelper.deleteSecret) + expect(SecretHelper.deleteSecret).to.have.been.calledOnceWith('registry-16', 'registry') + }) + }) +}) diff --git a/test/src/utils/k8s-client.test.js b/test/src/utils/k8s-client.test.js new file mode 100644 index 00000000..8e20f9e3 --- /dev/null +++ b/test/src/utils/k8s-client.test.js @@ -0,0 +1,48 @@ +'use strict' + +const { expect } = require('chai') + +const { isK8sNotFound, isK8sConflict } = require('../../../src/utils/k8s-client') + +describe('k8s-client error helpers', () => { + describe('.isK8sNotFound()', () => { + it('detects ApiException-style 404 from client-node v1', () => { + const error = { + code: 404, + message: 'Unknown API Status Code!', + body: JSON.stringify({ + kind: 'Status', + status: 'Failure', + reason: 'NotFound', + code: 404 + }) + } + expect(isK8sNotFound(error)).to.equal(true) + }) + + it('detects axios-style 404', () => { + expect(isK8sNotFound({ response: { status: 404 } })).to.equal(true) + }) + + it('detects parsed Status body', () => { + expect(isK8sNotFound({ + body: { reason: 'NotFound', code: 404 } + })).to.equal(true) + }) + + it('returns false for other errors', () => { + expect(isK8sNotFound({ code: 500 })).to.equal(false) + expect(isK8sNotFound(null)).to.equal(false) + }) + }) + + describe('.isK8sConflict()', () => { + it('detects ApiException-style 409 from client-node v1', () => { + const error = { + code: 409, + body: JSON.stringify({ reason: 'Conflict', code: 409 }) + } + expect(isK8sConflict(error)).to.equal(true) + }) + }) +}) diff --git a/test/src/websocket/ws-cross-replica-nats.test.js b/test/src/websocket/ws-cross-replica-nats.test.js index 277177ac..fd313354 100644 --- a/test/src/websocket/ws-cross-replica-nats.test.js +++ b/test/src/websocket/ws-cross-replica-nats.test.js @@ -81,12 +81,12 @@ describe('WebSocket exec/log — cross-replica mock NATS', () => { userWs, transaction ) - await wsServer.setupExecMessageForwarding(sessionId, transaction) + await wsServer.setupExecMessageForwarding(sessionId) await delay(50) const session = wsServer.execSessionManager.getExecSession(sessionId) session.agent = agentWs - await wsServer.setupExecMessageForwarding(sessionId, transaction) + await wsServer.setupExecMessageForwarding(sessionId) await delay(50) expect(mockRelay.shouldUseRelay(sessionId)).to.equal(true) @@ -164,6 +164,7 @@ describe('WebSocket exec/log — cross-replica mock NATS', () => { false, transaction ) + await delay(50) expect(mockRelay.shouldUseRelay($ids.sessionId)).to.equal(true) diff --git a/test/src/websocket/ws-cross-replica-split.test.js b/test/src/websocket/ws-cross-replica-split.test.js new file mode 100644 index 00000000..542259ae --- /dev/null +++ b/test/src/websocket/ws-cross-replica-split.test.js @@ -0,0 +1,446 @@ +const { expect } = require('chai') +const sinon = require('sinon') +const WebSocket = require('ws') + +const WebSocketServerClass = require('../../../src/websocket/server') +const MicroserviceExecSessionManager = require('../../../src/data/managers/microservice-exec-session-manager') +const MicroserviceLogStatusManager = require('../../../src/data/managers/microservice-log-status-manager') +const MicroserviceManager = require('../../../src/data/managers/microservice-manager') +const FogManager = require('../../../src/data/managers/iofog-manager') +const ChangeTrackingService = require('../../../src/services/change-tracking-service') +const AppHelper = require('../../../src/helpers/app-helper') +const EventService = require('../../../src/services/event-service') +const { + MESSAGE_TYPES, + createMockWebSocket, + createMockRequest, + buildExecFrame, + decodeExecMessage, + createMockNatsRelayTransport, + resetWebSocketServerSingleton, + newTestIds, + waitForSent, + waitUntil, + delay +} = require('../../support/ws-session-harness') +const { resetTransportForTests } = require('../../../src/services/ws-relay-transport-factory') + +function lastSent (ws) { + return ws._sentMessages[ws._sentMessages.length - 1].data +} + +function hasSentMessageType (ws, type) { + return ws._sentMessages.some((entry) => { + try { + return decodeExecMessage(entry.data).type === type + } catch (e) { + return false + } + }) +} + +function hasSentText (ws, needle) { + return ws._sentMessages.some((entry) => { + try { + const msg = decodeExecMessage(entry.data) + return msg.data && msg.data.toString().includes(needle) + } catch (e) { + return false + } + }) +} + +describe('WebSocket exec/log — split replica pairing', () => { + def('sandbox', () => sinon.createSandbox()) + def('ids', () => newTestIds()) + + let serverA + let serverB + let sharedRelay + let transaction + let execRow + + beforeEach(() => { + resetTransportForTests() + resetWebSocketServerSingleton(WebSocketServerClass) + + sharedRelay = createMockNatsRelayTransport() + serverA = new WebSocketServerClass() + serverB = new WebSocketServerClass() + serverA.relayTransport = sharedRelay + serverB.relayTransport = sharedRelay + serverA.sessionConfig.execPendingTimeoutMs = 500 + serverA.sessionConfig.logPendingTimeoutMs = 500 + serverB.sessionConfig.execPendingTimeoutMs = 500 + serverB.sessionConfig.logPendingTimeoutMs = 500 + + transaction = { fakeTransaction: true } + execRow = { + sessionId: $ids.sessionId, + microserviceUuid: $ids.microserviceUuid, + status: 'PENDING', + userConnected: true, + agentConnected: false + } + + $sandbox.stub(MicroserviceManager, 'update').resolves() + $sandbox.stub(MicroserviceManager, 'findOne').resolves({ iofogUuid: $ids.fogUuid }) + $sandbox.stub(FogManager, 'findOne').resolves({ uuid: $ids.fogUuid }) + $sandbox.stub(EventService, 'createWsConnectEvent').resolves() + $sandbox.stub(EventService, 'createWsDisconnectEvent').resolves() + $sandbox.stub(ChangeTrackingService, 'update').resolves() + $sandbox.stub(MicroserviceExecSessionManager, 'create').resolves() + $sandbox.stub(MicroserviceExecSessionManager, 'update').callsFake(async (_where, patch) => { + Object.assign(execRow, patch) + }) + $sandbox.stub(MicroserviceExecSessionManager, 'deleteBySessionId').resolves() + $sandbox.stub(MicroserviceExecSessionManager, 'findAll').resolves([]) + $sandbox.stub(MicroserviceExecSessionManager, 'findBySessionId').callsFake(async () => ({ ...execRow })) + $sandbox.stub(serverA, 'validateUserConnection').resolves({ uuid: $ids.microserviceUuid }) + $sandbox.stub(serverB, 'validateAgentExecConnection').resolves({ uuid: $ids.fogUuid }) + $sandbox.stub(AppHelper, 'generateUUID').returns($ids.sessionId) + $sandbox.stub(serverA, 'countExecSessionsInDb').resolves(0) + $sandbox.stub(serverB, 'countExecSessionsInDb').resolves(0) + }) + + afterEach(() => { + $sandbox.restore() + resetTransportForTests() + resetWebSocketServerSingleton(WebSocketServerClass) + }) + + it('relays ACTIVATION when each replica has its own relay bridge map', async () => { + serverA.relayTransport = createMockNatsRelayTransport() + serverB.relayTransport = createMockNatsRelayTransport() + + const userWs = createMockWebSocket() + const agentWs = createMockWebSocket() + const userReq = createMockRequest(`/api/v3/microservices/exec/${$ids.microserviceUuid}`) + userReq.headers.authorization = 'Bearer user-jwt' + + await serverA.handleUserExecConnection( + userWs, + userReq, + 'Bearer user-jwt', + $ids.microserviceUuid, + false, + transaction + ) + await delay(50) + + const agentReq = createMockRequest( + `/api/v3/agent/exec/microservice/${$ids.microserviceUuid}/${$ids.sessionId}`, + '127.0.0.2' + ) + agentReq.headers.authorization = 'Bearer fog-token' + await serverB.handleAgentExecConnection( + agentWs, + agentReq, + 'Bearer fog-token', + $ids.microserviceUuid, + $ids.sessionId, + transaction + ) + await delay(50) + + expect(userWs.readyState).to.equal(WebSocket.OPEN) + expect(hasSentMessageType(agentWs, MESSAGE_TYPES.ACTIVATION)).to.equal(true) + expect(serverB.relayTransport.shouldUseRelay($ids.sessionId)).to.equal(true) + }) + + it('keeps exec user open when agent connects on replica B and relays ACTIVATION', async () => { + const userWs = createMockWebSocket() + const agentWs = createMockWebSocket() + const userReq = createMockRequest(`/api/v3/microservices/exec/${$ids.microserviceUuid}`) + userReq.headers.authorization = 'Bearer user-jwt' + + await serverA.handleUserExecConnection( + userWs, + userReq, + 'Bearer user-jwt', + $ids.microserviceUuid, + false, + transaction + ) + await delay(50) + + const agentReq = createMockRequest( + `/api/v3/agent/exec/microservice/${$ids.microserviceUuid}/${$ids.sessionId}`, + '127.0.0.2' + ) + agentReq.headers.authorization = 'Bearer fog-token' + await serverB.handleAgentExecConnection( + agentWs, + agentReq, + 'Bearer fog-token', + $ids.microserviceUuid, + $ids.sessionId, + transaction + ) + await delay(50) + + expect(userWs.readyState).to.equal(WebSocket.OPEN) + expect(hasSentMessageType(agentWs, MESSAGE_TYPES.ACTIVATION)).to.equal(true) + expect(hasSentText(userWs, 'Interactive exec is ready')).to.equal(true) + + const stdinFrame = buildExecFrame( + MESSAGE_TYPES.STDIN, + $ids.sessionId, + $ids.microserviceUuid, + 'echo hi\n' + ) + userWs.emit('message', stdinFrame, true) + await waitForSent(agentWs, 1) + + const agentReceived = decodeExecMessage(lastSent(agentWs)) + expect(agentReceived.type).to.equal(MESSAGE_TYPES.STDIN) + }) + + it('uses DB fallback for exec pending timeout when agentConnected is true', async () => { + execRow.agentConnected = true + const userWs = createMockWebSocket() + const userReq = createMockRequest(`/api/v3/microservices/exec/${$ids.microserviceUuid}`) + userReq.headers.authorization = 'Bearer user-jwt' + + await serverA.handleUserExecConnection( + userWs, + userReq, + 'Bearer user-jwt', + $ids.microserviceUuid, + false, + transaction + ) + + await delay(700) + expect(userWs.readyState).to.equal(WebSocket.OPEN) + const session = serverA.execSessionManager.getExecSession($ids.sessionId) + expect(session.remoteAgentPaired).to.equal(true) + }) + + it('relays log agent-ready LOG_LINE to user on replica A', async () => { + const logRow = { + sessionId: $ids.sessionId, + microserviceUuid: $ids.microserviceUuid, + tailConfig: JSON.stringify({ lines: 100, follow: true, since: null, until: null }), + agentConnected: false, + userConnected: true + } + + $sandbox.stub(MicroserviceLogStatusManager, 'create').resolves() + $sandbox.stub(MicroserviceLogStatusManager, 'update').callsFake(async (_where, patch) => { + Object.assign(logRow, patch) + }) + $sandbox.stub(MicroserviceLogStatusManager, 'delete').resolves() + $sandbox.stub(MicroserviceLogStatusManager, 'findOne').callsFake(async () => ({ ...logRow })) + $sandbox.stub(serverA, 'validateUserLogsConnection').resolves() + $sandbox.stub(serverB, 'validateAgentLogsConnection').resolves({ uuid: $ids.fogUuid }) + $sandbox.stub(serverA, 'countLogSessionsInDb').resolves(0) + + const userWs = createMockWebSocket() + const agentWs = createMockWebSocket() + const userReq = createMockRequest(`/api/v3/microservices/logs/${$ids.microserviceUuid}?tail=100`) + userReq.headers.authorization = 'Bearer user-jwt' + + await serverA.handleUserLogsConnection( + userWs, + userReq, + 'Bearer user-jwt', + $ids.microserviceUuid, + null, + false, + transaction + ) + await delay(50) + + const agentReq = createMockRequest( + `/api/v3/agent/logs/microservice/${$ids.microserviceUuid}/${$ids.sessionId}`, + '127.0.0.2' + ) + agentReq.headers.authorization = 'Bearer fog-token' + await serverB.handleAgentLogsConnection( + agentWs, + agentReq, + 'Bearer fog-token', + $ids.microserviceUuid, + null, + $ids.sessionId, + transaction + ) + await delay(50) + + expect(userWs.readyState).to.equal(WebSocket.OPEN) + expect(hasSentMessageType(agentWs, MESSAGE_TYPES.LOG_START)).to.equal(true) + expect(hasSentText(userWs, 'Log streaming started')).to.equal(true) + expect(hasSentMessageType(userWs, MESSAGE_TYPES.LOG_LINE)).to.equal(true) + }) + + it('notifies user and preserves DB row when agent disconnects on replica B', async () => { + const userWs = createMockWebSocket() + const agentWs = createMockWebSocket() + const userReq = createMockRequest(`/api/v3/microservices/exec/${$ids.microserviceUuid}`) + userReq.headers.authorization = 'Bearer user-jwt' + + await serverA.handleUserExecConnection( + userWs, + userReq, + 'Bearer user-jwt', + $ids.microserviceUuid, + false, + transaction + ) + await delay(50) + + const agentReq = createMockRequest( + `/api/v3/agent/exec/microservice/${$ids.microserviceUuid}/${$ids.sessionId}`, + '127.0.0.2' + ) + agentReq.headers.authorization = 'Bearer fog-token' + await serverB.handleAgentExecConnection( + agentWs, + agentReq, + 'Bearer fog-token', + $ids.microserviceUuid, + $ids.sessionId, + transaction + ) + await delay(50) + + execRow.agentConnected = true + MicroserviceExecSessionManager.deleteBySessionId.resetHistory() + + agentWs.close() + await delay(50) + + expect(MicroserviceExecSessionManager.deleteBySessionId).to.not.have.been.called + expect(execRow.agentConnected).to.equal(false) + expect(hasSentMessageType(userWs, MESSAGE_TYPES.CLOSE)).to.equal(true) + expect(serverB.execSessionManager.getExecSession($ids.sessionId)).to.equal(null) + expect(serverA.execSessionManager.getExecSession($ids.sessionId)).to.not.equal(null) + }) + + it('keeps paired exec user open past pending timeout window', async () => { + const userWs = createMockWebSocket() + const agentWs = createMockWebSocket() + const userReq = createMockRequest(`/api/v3/microservices/exec/${$ids.microserviceUuid}`) + userReq.headers.authorization = 'Bearer user-jwt' + + await serverA.handleUserExecConnection( + userWs, + userReq, + 'Bearer user-jwt', + $ids.microserviceUuid, + false, + transaction + ) + await delay(50) + + const agentReq = createMockRequest( + `/api/v3/agent/exec/microservice/${$ids.microserviceUuid}/${$ids.sessionId}`, + '127.0.0.2' + ) + agentReq.headers.authorization = 'Bearer fog-token' + await serverB.handleAgentExecConnection( + agentWs, + agentReq, + 'Bearer fog-token', + $ids.microserviceUuid, + $ids.sessionId, + transaction + ) + await delay(100) + + expect(hasSentText(userWs, 'Interactive exec is ready')).to.equal(true) + + await delay(700) + expect(userWs.readyState).to.equal(WebSocket.OPEN) + }) + + it('keeps paired exec agent on replica B past pending timeout window', async () => { + const userWs = createMockWebSocket() + const agentWs = createMockWebSocket() + const userReq = createMockRequest(`/api/v3/microservices/exec/${$ids.microserviceUuid}`) + userReq.headers.authorization = 'Bearer user-jwt' + + await serverA.handleUserExecConnection( + userWs, + userReq, + 'Bearer user-jwt', + $ids.microserviceUuid, + false, + transaction + ) + await delay(50) + + const agentReq = createMockRequest( + `/api/v3/agent/exec/microservice/${$ids.microserviceUuid}/${$ids.sessionId}`, + '127.0.0.2' + ) + agentReq.headers.authorization = 'Bearer fog-token' + await serverB.handleAgentExecConnection( + agentWs, + agentReq, + 'Bearer fog-token', + $ids.microserviceUuid, + $ids.sessionId, + transaction + ) + await delay(100) + + const agentSession = serverB.execSessionManager.getExecSession($ids.sessionId) + expect(agentSession.remoteUserPaired).to.equal(true) + + MicroserviceExecSessionManager.deleteBySessionId.resetHistory() + await delay(700) + + expect(agentWs.readyState).to.equal(WebSocket.OPEN) + expect(MicroserviceExecSessionManager.deleteBySessionId).to.not.have.been.called + expect(serverB.execSessionManager.getExecSession($ids.sessionId)).to.not.equal(null) + }) + + it('preserves DB row when user disconnects on replica A with agent on replica B', async () => { + const userWs = createMockWebSocket() + const agentWs = createMockWebSocket() + const userReq = createMockRequest(`/api/v3/microservices/exec/${$ids.microserviceUuid}`) + userReq.headers.authorization = 'Bearer user-jwt' + + await serverA.handleUserExecConnection( + userWs, + userReq, + 'Bearer user-jwt', + $ids.microserviceUuid, + false, + transaction + ) + await delay(50) + + const agentReq = createMockRequest( + `/api/v3/agent/exec/microservice/${$ids.microserviceUuid}/${$ids.sessionId}`, + '127.0.0.2' + ) + agentReq.headers.authorization = 'Bearer fog-token' + await serverB.handleAgentExecConnection( + agentWs, + agentReq, + 'Bearer fog-token', + $ids.microserviceUuid, + $ids.sessionId, + transaction + ) + await delay(50) + + execRow.agentConnected = true + MicroserviceExecSessionManager.deleteBySessionId.resetHistory() + + userWs.close() + await waitUntil( + () => serverA.execSessionManager.getExecSession($ids.sessionId) === null + ) + + expect(MicroserviceExecSessionManager.deleteBySessionId).to.not.have.been.called + expect(execRow.userConnected).to.equal(false) + expect(execRow.agentConnected).to.equal(true) + expect(serverA.execSessionManager.getExecSession($ids.sessionId)).to.equal(null) + expect(serverB.execSessionManager.getExecSession($ids.sessionId)).to.not.equal(null) + expect(hasSentMessageType(agentWs, MESSAGE_TYPES.CLOSE)).to.equal(true) + }) +}) diff --git a/test/src/websocket/ws-cross-replica.test.js b/test/src/websocket/ws-cross-replica.test.js index 0712ffac..5042c9fc 100644 --- a/test/src/websocket/ws-cross-replica.test.js +++ b/test/src/websocket/ws-cross-replica.test.js @@ -79,12 +79,12 @@ describe('WebSocket exec/log — cross-replica mock AMQP', () => { userWs, transaction ) - await wsServer.setupExecMessageForwarding(sessionId, transaction) + await wsServer.setupExecMessageForwarding(sessionId) await delay(50) const session = wsServer.execSessionManager.getExecSession(sessionId) session.agent = agentWs - await wsServer.setupExecMessageForwarding(sessionId, transaction) + await wsServer.setupExecMessageForwarding(sessionId) await delay(50) expect(mockRelay.shouldUseRelay(sessionId)).to.equal(true) @@ -164,6 +164,7 @@ describe('WebSocket exec/log — cross-replica mock AMQP', () => { false, transaction ) + await delay(50) expect(mockRelay.shouldUseRelay($ids.sessionId)).to.equal(true) expect(wsServer.execSessionManager.getExecSession($ids.sessionId).agent).to.equal(null) diff --git a/test/src/websocket/ws-exec-activation-failfast.test.js b/test/src/websocket/ws-exec-activation-failfast.test.js index 4052a708..2405fb01 100644 --- a/test/src/websocket/ws-exec-activation-failfast.test.js +++ b/test/src/websocket/ws-exec-activation-failfast.test.js @@ -40,7 +40,7 @@ describe('WebSocket exec activation fail-fast', () => { $sandbox.stub(logger, 'warn') $sandbox.stub(logger, 'debug') $sandbox.stub(wsServer, 'sendMessageToAgent').resolves(false) - $sandbox.stub(wsServer, 'cleanupExecSession').resolves() + $sandbox.stub(wsServer, '_cleanupExecSessionInTransaction').resolves() }) afterEach(() => { @@ -62,7 +62,7 @@ describe('WebSocket exec activation fail-fast', () => { transaction ) - await wsServer.setupExecMessageForwarding(sessionId, transaction) + await wsServer.setupExecMessageForwarding(sessionId) await delay(20) const setupCompleteCalls = logger.info.getCalls().filter((call) => { diff --git a/test/src/websocket/ws-exec-same-replica.test.js b/test/src/websocket/ws-exec-same-replica.test.js index aea148b5..fd8f2053 100644 --- a/test/src/websocket/ws-exec-same-replica.test.js +++ b/test/src/websocket/ws-exec-same-replica.test.js @@ -196,7 +196,7 @@ describe('WebSocket exec — same-replica integration (Plan 17)', () => { ) }) - it('allows three concurrent exec sessions on same microservice', async () => { + it('allows five concurrent exec sessions on same microservice', async () => { let dbSessionCount = 0 $sandbox.stub(wsServer, 'countExecSessionsInDb').callsFake(async () => dbSessionCount) MicroserviceExecSessionManager.create.restore() @@ -213,7 +213,7 @@ describe('WebSocket exec — same-replica integration (Plan 17)', () => { }) const userSockets = [] - for (let i = 0; i < 3; i++) { + for (let i = 0; i < 5; i++) { const ws = createMockWebSocket() userSockets.push(ws) const req = createMockRequest(`/api/v3/microservices/exec/${$ids.microserviceUuid}`) @@ -229,7 +229,7 @@ describe('WebSocket exec — same-replica integration (Plan 17)', () => { expect(ws.readyState).to.equal(WebSocket.OPEN) } - expect(wsServer.execSessionManager.countSessionsForResource($ids.microserviceUuid)).to.equal(3) + expect(wsServer.execSessionManager.countSessionsForResource($ids.microserviceUuid)).to.equal(5) const rejectedWs = createMockWebSocket() const rejectedReq = createMockRequest(`/api/v3/microservices/exec/${$ids.microserviceUuid}`) diff --git a/test/src/websocket/ws-heartbeat.test.js b/test/src/websocket/ws-heartbeat.test.js new file mode 100644 index 00000000..d05a58eb --- /dev/null +++ b/test/src/websocket/ws-heartbeat.test.js @@ -0,0 +1,146 @@ +const { expect } = require('chai') +const sinon = require('sinon') +const WebSocket = require('ws') + +const WebSocketServerClass = require('../../../src/websocket/server') +const { createMockWebSocket, resetWebSocketServerSingleton } = require('../../support/ws-session-harness') + +describe('WebSocket protocol heartbeat', () => { + let sandbox + let wsServer + + beforeEach(() => { + sandbox = sinon.createSandbox() + resetWebSocketServerSingleton(WebSocketServerClass) + wsServer = new WebSocketServerClass() + wsServer.config.pingInterval = 1000 + }) + + afterEach(() => { + sandbox.restore() + resetWebSocketServerSingleton(WebSocketServerClass) + }) + + function createHeartbeatSocket () { + const ws = createMockWebSocket() + ws.ping = sandbox.spy() + return ws + } + + it('sends ws.ping on interval while socket is open', () => { + const clock = sandbox.useFakeTimers() + const ws = createHeartbeatSocket() + + wsServer._startWebSocketHeartbeat(ws, { label: 'user-exec', sessionId: 'sess-1' }) + + expect(ws.ping.called).to.equal(false) + clock.tick(1000) + expect(ws.ping.calledOnce).to.equal(true) + clock.tick(1000) + expect(ws.ping.calledTwice).to.equal(true) + + wsServer._stopWebSocketHeartbeat(ws) + clock.tick(5000) + expect(ws.ping.calledTwice).to.equal(true) + }) + + it('does not ping after socket closes', () => { + const clock = sandbox.useFakeTimers() + const ws = createHeartbeatSocket() + + wsServer._startWebSocketHeartbeat(ws, { label: 'agent-log', sessionId: 'sess-2' }) + clock.tick(1000) + expect(ws.ping.calledOnce).to.equal(true) + + ws.readyState = WebSocket.CLOSED + clock.tick(3000) + expect(ws.ping.calledOnce).to.equal(true) + }) + + it('stops heartbeat and clears timer on close event', () => { + const clock = sandbox.useFakeTimers() + const ws = createHeartbeatSocket() + + wsServer._startWebSocketHeartbeat(ws, { label: 'user-log', sessionId: 'sess-3' }) + clock.tick(1000) + expect(ws.ping.calledOnce).to.equal(true) + + ws.emit('close', 1000, 'normal') + expect(ws._heartbeatTimer).to.equal(null) + + clock.tick(5000) + expect(ws.ping.calledOnce).to.equal(true) + }) + + it('replaces existing timer when heartbeat is restarted', () => { + const clock = sandbox.useFakeTimers() + const ws = createHeartbeatSocket() + + wsServer._startWebSocketHeartbeat(ws, { label: 'agent-exec', sessionId: 'sess-4' }) + const firstTimer = ws._heartbeatTimer + wsServer._startWebSocketHeartbeat(ws, { label: 'agent-exec', sessionId: 'sess-4' }) + + expect(ws._heartbeatTimer).to.not.equal(firstTimer) + clock.tick(1000) + expect(ws.ping.calledOnce).to.equal(true) + }) + + it('skips heartbeat when pingInterval is disabled', () => { + const clock = sandbox.useFakeTimers() + const ws = createHeartbeatSocket() + wsServer.config.pingInterval = 0 + + wsServer._startWebSocketHeartbeat(ws, { label: 'user-exec', sessionId: 'sess-5' }) + + expect(ws._heartbeatTimer).to.equal(undefined) + clock.tick(5000) + expect(ws.ping.called).to.equal(false) + }) + + it('stops heartbeat for both peers during exec session cleanup', async () => { + const user = createHeartbeatSocket() + const agent = createHeartbeatSocket() + const sessionId = 'exec-cleanup' + const transaction = { fakeTransaction: true } + + wsServer.execSessionManager.createExecSession(sessionId, 'ms-uuid', agent, user, transaction) + wsServer._startWebSocketHeartbeat(user, { label: 'user-exec', sessionId }) + wsServer._startWebSocketHeartbeat(agent, { label: 'agent-exec', sessionId }) + + sandbox.stub(wsServer.execSessionManager, 'removeExecSession').resolves() + sandbox.stub(wsServer.relayTransport, 'cleanup').resolves() + sandbox.stub(wsServer, '_notifyExecRemotePeerClose').resolves() + + await wsServer.cleanupExecSession(sessionId, transaction) + + expect(user._heartbeatTimer).to.equal(null) + expect(agent._heartbeatTimer).to.equal(null) + }) + + it('stops heartbeat for both peers during log session cleanup', async () => { + const user = createHeartbeatSocket() + const agent = createHeartbeatSocket() + const sessionId = 'log-cleanup' + const transaction = { fakeTransaction: true } + + wsServer.logSessionManager.createLogSession( + sessionId, + 'ms-uuid', + null, + agent, + user, + { tail: 100, follow: true }, + transaction + ) + wsServer._startWebSocketHeartbeat(user, { label: 'user-log', sessionId }) + wsServer._startWebSocketHeartbeat(agent, { label: 'agent-log', sessionId }) + + sandbox.stub(wsServer.logSessionManager, 'removeLogSession').resolves() + sandbox.stub(wsServer.relayTransport, 'cleanupLogSession').resolves() + + await wsServer.cleanupLogSession(sessionId, transaction) + + expect(user._heartbeatTimer).to.equal(null) + expect(agent._heartbeatTimer).to.equal(null) + }) +}) diff --git a/test/src/websocket/ws-lifecycle.test.js b/test/src/websocket/ws-lifecycle.test.js index 2d598e0e..59299310 100644 --- a/test/src/websocket/ws-lifecycle.test.js +++ b/test/src/websocket/ws-lifecycle.test.js @@ -9,6 +9,7 @@ const FogManager = require('../../../src/data/managers/iofog-manager') const WebSocketServerClass = require('../../../src/websocket/server') const MicroserviceExecSessionManager = require('../../../src/data/managers/microservice-exec-session-manager') const MicroserviceLogStatusManager = require('../../../src/data/managers/microservice-log-status-manager') +const AppHelper = require('../../../src/helpers/app-helper') const MicroserviceManager = require('../../../src/data/managers/microservice-manager') const { createMockWebSocket, @@ -24,7 +25,7 @@ const FAST_CONFIG = { execMaxDurationMs: 200, logPendingTimeoutMs: 100, logIdleTimeoutMs: 500, - logMaxConcurrentPerResource: 3, + logMaxConcurrentPerResource: 5, logTailMaxLines: 5000, cleanupInterval: 50 } @@ -39,22 +40,22 @@ describe('WebSocket session lifecycle', () => { $sandbox.restore() }) - describe('exec 3-session quota', () => { + describe('exec 5-session quota', () => { let wsServer beforeEach(() => { resetWebSocketServerSingleton(WebSocketServerClass) wsServer = new WebSocketServerClass() - wsServer.sessionConfig = { ...wsServer.sessionConfig, execMaxConcurrentPerResource: 3 } + wsServer.sessionConfig = { ...wsServer.sessionConfig, execMaxConcurrentPerResource: 5 } }) afterEach(() => { resetWebSocketServerSingleton(WebSocketServerClass) }) - it('rejects fourth concurrent exec session for same microservice', async () => { + it('rejects sixth concurrent exec session for same microservice', async () => { $sandbox.stub(wsServer, 'validateUserConnection').resolves({ uuid: $ids.microserviceUuid }) - $sandbox.stub(wsServer, 'countExecSessionsInDb').resolves(3) + $sandbox.stub(wsServer, 'countExecSessionsInDb').resolves(5) const ws = createMockWebSocket() const req = createMockRequest(`/api/v3/microservices/exec/${$ids.microserviceUuid}`) @@ -72,22 +73,22 @@ describe('WebSocket session lifecycle', () => { }) }) - describe('log 3-viewer quota', () => { + describe('log 5-viewer quota', () => { let wsServer beforeEach(() => { resetWebSocketServerSingleton(WebSocketServerClass) wsServer = new WebSocketServerClass() - wsServer.sessionConfig = { ...wsServer.sessionConfig, logMaxConcurrentPerResource: 3 } + wsServer.sessionConfig = { ...wsServer.sessionConfig, logMaxConcurrentPerResource: 5 } }) afterEach(() => { resetWebSocketServerSingleton(WebSocketServerClass) }) - it('rejects fourth concurrent log session for same microservice', async () => { + it('rejects sixth concurrent log session for same microservice', async () => { $sandbox.stub(wsServer, 'validateUserLogsConnection').resolves({ success: true }) - $sandbox.stub(wsServer, 'countLogSessionsInDb').resolves(3) + $sandbox.stub(wsServer, 'countLogSessionsInDb').resolves(5) $sandbox.stub(wsServer, 'isValidISO8601').returns(true) const ws = createMockWebSocket() @@ -258,4 +259,49 @@ describe('WebSocket session lifecycle', () => { ) }) }) + + describe('relay setup deferred until transaction commits', () => { + let wsServer + + beforeEach(() => { + resetWebSocketServerSingleton(WebSocketServerClass) + wsServer = new WebSocketServerClass() + }) + + afterEach(() => { + resetWebSocketServerSingleton(WebSocketServerClass) + }) + + it('does not await relay setup before handleUserLogsConnection returns', async () => { + let setupStarted = false + $sandbox.stub(wsServer, 'validateUserLogsConnection').resolves({ success: true }) + $sandbox.stub(wsServer, 'countLogSessionsInDb').resolves(0) + $sandbox.stub(wsServer, 'isValidISO8601').returns(true) + $sandbox.stub(wsServer, 'setupLogMessageForwarding').callsFake(async () => { + setupStarted = true + }) + $sandbox.stub(MicroserviceManager, 'findOne').resolves({ iofogUuid: $ids.fogUuid, uuid: $ids.microserviceUuid }) + $sandbox.stub(FogManager, 'findOne').resolves({ uuid: $ids.fogUuid }) + $sandbox.stub(ChangeTrackingService, 'update').resolves() + $sandbox.stub(MicroserviceLogStatusManager, 'create').resolves() + $sandbox.stub(AppHelper, 'generateUUID').returns($ids.sessionId) + + const ws = createMockWebSocket() + const req = createMockRequest(`/api/v3/microservices/${$ids.microserviceUuid}/logs?tail=100`) + + await wsServer.handleUserLogsConnection( + ws, + req, + 'Bearer token', + $ids.microserviceUuid, + null, + false, + $transaction + ) + + expect(setupStarted).to.equal(false) + await delay(10) + expect(setupStarted).to.equal(true) + }) + }) }) diff --git a/test/src/websocket/ws-session-cleanup.test.js b/test/src/websocket/ws-session-cleanup.test.js new file mode 100644 index 00000000..baca6799 --- /dev/null +++ b/test/src/websocket/ws-session-cleanup.test.js @@ -0,0 +1,73 @@ +const { expect } = require('chai') +const sinon = require('sinon') + +const WebSocketServerClass = require('../../../src/websocket/server') +const transactionRunner = require('../../../src/helpers/transaction-runner') +const { PRIORITY_BACKGROUND } = transactionRunner +const { resetWebSocketServerSingleton } = require('../../support/ws-session-harness') + +describe('WebSocket session cleanup dedupe', () => { + def('sandbox', () => sinon.createSandbox()) + + let wsServer + + beforeEach(() => { + resetWebSocketServerSingleton(WebSocketServerClass) + wsServer = new WebSocketServerClass() + }) + + afterEach(() => { + $sandbox.restore() + resetWebSocketServerSingleton(WebSocketServerClass) + }) + + it('dedupes concurrent exec cleanups for the same sessionId', async () => { + let resolveCleanup + const gate = new Promise((resolve) => { + resolveCleanup = resolve + }) + + $sandbox.stub(transactionRunner, 'runInTransaction').callsFake(async (fn, options) => { + expect(options).to.include({ priority: PRIORITY_BACKGROUND, label: 'ws.exec.cleanup' }) + await gate + return fn({ id: 'tx-exec' }) + }) + $sandbox.stub(wsServer, 'cleanupExecSession').resolves() + + const first = wsServer._cleanupExecSessionInTransaction('session-1') + const second = wsServer._cleanupExecSessionInTransaction('session-1') + + expect(transactionRunner.runInTransaction).to.have.been.calledOnce + + resolveCleanup() + await Promise.all([first, second]) + + expect(wsServer.cleanupExecSession).to.have.been.calledOnceWith('session-1', { id: 'tx-exec' }) + expect(wsServer._execCleanupInflight.has('session-1')).to.equal(false) + }) + + it('dedupes concurrent log cleanups for the same sessionId', async () => { + let resolveCleanup + const gate = new Promise((resolve) => { + resolveCleanup = resolve + }) + + $sandbox.stub(transactionRunner, 'runInTransaction').callsFake(async (fn, options) => { + expect(options).to.include({ priority: PRIORITY_BACKGROUND, label: 'ws.log.cleanup' }) + await gate + return fn({ id: 'tx-log' }) + }) + $sandbox.stub(wsServer, 'cleanupLogSession').resolves() + + const first = wsServer._cleanupLogSessionInTransaction('log-session-1') + const second = wsServer._cleanupLogSessionInTransaction('log-session-1') + + expect(transactionRunner.runInTransaction).to.have.been.calledOnce + + resolveCleanup() + await Promise.all([first, second]) + + expect(wsServer.cleanupLogSession).to.have.been.calledOnceWith('log-session-1', { id: 'tx-log' }) + expect(wsServer._logCleanupInflight.has('log-session-1')).to.equal(false) + }) +}) diff --git a/test/src/websocket/ws-session-expiry.test.js b/test/src/websocket/ws-session-expiry.test.js new file mode 100644 index 00000000..f11f726f --- /dev/null +++ b/test/src/websocket/ws-session-expiry.test.js @@ -0,0 +1,160 @@ +const { expect } = require('chai') +const sinon = require('sinon') + +const ExecSessionManager = require('../../../src/websocket/exec-session-manager') +const LogSessionManager = require('../../../src/websocket/log-session-manager') +const MicroserviceExecSessionManager = require('../../../src/data/managers/microservice-exec-session-manager') +const MicroserviceLogStatusManager = require('../../../src/data/managers/microservice-log-status-manager') +const MicroserviceManager = require('../../../src/data/managers/microservice-manager') +const FogManager = require('../../../src/data/managers/iofog-manager') +const ChangeTrackingService = require('../../../src/services/change-tracking-service') + +describe('WebSocket session expiry — remoteAgentPaired / remoteUserPaired', () => { + def('sandbox', () => sinon.createSandbox()) + + beforeEach(() => { + $sandbox.stub(MicroserviceExecSessionManager, 'deleteBySessionId').resolves() + $sandbox.stub(MicroserviceManager, 'findOne').resolves({ iofogUuid: 'fog-1' }) + $sandbox.stub(FogManager, 'findOne').resolves({ uuid: 'fog-1' }) + $sandbox.stub(ChangeTrackingService, 'update').resolves() + $sandbox.stub(MicroserviceLogStatusManager, 'delete').resolves() + }) + + afterEach(() => { + $sandbox.restore() + }) + + it('does not expire cross-replica paired exec user before maxDuration', async () => { + const manager = new ExecSessionManager({ + session: { + execPendingTimeoutMs: 60000, + execMaxDurationMs: 28800000, + cleanupInterval: 30000 + } + }) + manager.stopCleanupInterval() + + const now = Date.now() + manager.execSessions.set('exec-1', { + sessionId: 'exec-1', + microserviceUuid: 'ms-1', + user: { readyState: 1, close: () => {} }, + agent: null, + remoteAgentPaired: true, + createdAt: now - 120000, + lastActivity: now - 1000 + }) + + const expired = await manager.cleanupExpiredSessions({}) + expect(expired).to.equal(0) + expect(manager.execSessions.has('exec-1')).to.equal(true) + expect(MicroserviceExecSessionManager.deleteBySessionId).to.not.have.been.called + }) + + it('expires cross-replica paired exec user after maxDuration idle', async () => { + const manager = new ExecSessionManager({ + session: { + execPendingTimeoutMs: 60000, + execMaxDurationMs: 1000, + cleanupInterval: 30000 + } + }) + manager.stopCleanupInterval() + + const now = Date.now() + manager.execSessions.set('exec-1', { + sessionId: 'exec-1', + microserviceUuid: 'ms-1', + user: { readyState: 1, close: $sandbox.spy() }, + agent: null, + remoteAgentPaired: true, + createdAt: now - 5000, + lastActivity: now - 2000 + }) + + const expired = await manager.cleanupExpiredSessions({}) + expect(expired).to.equal(1) + expect(manager.execSessions.has('exec-1')).to.equal(false) + }) + + it('does not expire cross-replica paired log user before idleTimeout', async () => { + const manager = new LogSessionManager({ + session: { + logPendingTimeoutMs: 120000, + logIdleTimeoutMs: 7200000, + cleanupInterval: 30000 + } + }) + manager.stopCleanupInterval() + + const now = Date.now() + manager.logSessions.set('log-1', { + sessionId: 'log-1', + microserviceUuid: 'ms-1', + fogUuid: null, + user: { readyState: 1, close: () => {} }, + agent: null, + remoteAgentPaired: true, + createdAt: now - 180000, + lastActivity: now - 1000 + }) + + const expired = await manager.cleanupExpiredSessions({}) + expect(expired).to.equal(0) + expect(manager.logSessions.has('log-1')).to.equal(true) + }) + + it('does not expire cross-replica paired exec agent before maxDuration', async () => { + const manager = new ExecSessionManager({ + session: { + execPendingTimeoutMs: 60000, + execMaxDurationMs: 28800000, + cleanupInterval: 30000 + } + }) + manager.stopCleanupInterval() + + const now = Date.now() + manager.execSessions.set('exec-1', { + sessionId: 'exec-1', + microserviceUuid: 'ms-1', + user: null, + agent: { readyState: 1, close: () => {} }, + remoteUserPaired: true, + createdAt: now - 120000, + lastActivity: now - 1000 + }) + + const expired = await manager.cleanupExpiredSessions({}) + expect(expired).to.equal(0) + expect(manager.execSessions.has('exec-1')).to.equal(true) + expect(MicroserviceExecSessionManager.deleteBySessionId).to.not.have.been.called + }) + + it('does not expire cross-replica paired log agent before idleTimeout', async () => { + const manager = new LogSessionManager({ + session: { + logPendingTimeoutMs: 120000, + logIdleTimeoutMs: 7200000, + cleanupInterval: 30000 + } + }) + manager.stopCleanupInterval() + + const now = Date.now() + manager.logSessions.set('log-1', { + sessionId: 'log-1', + microserviceUuid: 'ms-1', + fogUuid: null, + user: null, + agent: { readyState: 1, close: () => {} }, + remoteUserPaired: true, + createdAt: now - 180000, + lastActivity: now - 1000 + }) + + const expired = await manager.cleanupExpiredSessions({}) + expect(expired).to.equal(0) + expect(manager.logSessions.has('log-1')).to.equal(true) + }) +}) diff --git a/test/src/websocket/ws-session-orphan.test.js b/test/src/websocket/ws-session-orphan.test.js new file mode 100644 index 00000000..5f61024f --- /dev/null +++ b/test/src/websocket/ws-session-orphan.test.js @@ -0,0 +1,216 @@ +const { expect } = require('chai') +const sinon = require('sinon') +const WebSocket = require('ws') + +const WebSocketServerClass = require('../../../src/websocket/server') +const Sequelize = require('sequelize') +const Op = Sequelize.Op +const MicroserviceLogStatusManager = require('../../../src/data/managers/microservice-log-status-manager') +const MicroserviceExecSessionManager = require('../../../src/data/managers/microservice-exec-session-manager') +const FogLogStatusManager = require('../../../src/data/managers/fog-log-status-manager') +const MicroserviceManager = require('../../../src/data/managers/microservice-manager') +const FogManager = require('../../../src/data/managers/iofog-manager') +const ChangeTrackingService = require('../../../src/services/change-tracking-service') +const EventService = require('../../../src/services/event-service') +const { reconcileStaleSessionsInTransaction } = require('../../../src/jobs/ws-session-reconcile-job') +const agentService = require('../../../src/services/agent-service') +const { + createMockWebSocket, + createMockRequest, + createMockNatsRelayTransport, + resetWebSocketServerSingleton, + newTestIds, + delay +} = require('../../support/ws-session-harness') +const { resetTransportForTests } = require('../../../src/services/ws-relay-transport-factory') + +describe('WebSocket session orphan cleanup', () => { + def('sandbox', () => sinon.createSandbox()) + def('ids', () => newTestIds()) + + let wsServer + let transaction + + beforeEach(() => { + resetTransportForTests() + resetWebSocketServerSingleton(WebSocketServerClass) + wsServer = new WebSocketServerClass() + transaction = { fakeTransaction: true } + $sandbox.stub(EventService, 'createWsConnectEvent').resolves() + $sandbox.stub(EventService, 'createWsDisconnectEvent').resolves() + }) + + afterEach(() => { + $sandbox.restore() + resetTransportForTests() + resetWebSocketServerSingleton(WebSocketServerClass) + }) + + it('countLogSessionsInDb counts only userConnected PENDING/ACTIVE rows', async () => { + const findAll = $sandbox.stub(MicroserviceLogStatusManager, 'findAll').resolves([ + { sessionId: 'live-1' } + ]) + + const count = await wsServer.countLogSessionsInDb($ids.microserviceUuid, null, transaction) + + expect(count).to.equal(1) + expect(findAll).to.have.been.calledOnce + const query = findAll.firstCall.args[0] + expect(query.microserviceUuid).to.equal($ids.microserviceUuid) + expect(query.userConnected).to.equal(true) + expect(query.status[Op.in]).to.deep.equal(['PENDING', 'ACTIVE']) + }) + + it('countExecSessionsInDb counts only userConnected PENDING/ACTIVE rows', async () => { + const findAll = $sandbox.stub(MicroserviceExecSessionManager, 'findAll').resolves([]) + + await wsServer.countExecSessionsInDb($ids.microserviceUuid, transaction) + + expect(findAll).to.have.been.calledOnce + const query = findAll.firstCall.args[0] + expect(query.userConnected).to.equal(true) + expect(query.status[Op.in]).to.deep.equal(['PENDING', 'ACTIVE']) + }) + + it('full-cleans log session when agent disconnects after user already left', async () => { + const logRow = { + sessionId: $ids.sessionId, + microserviceUuid: $ids.microserviceUuid, + iofogUuid: null, + tailConfig: JSON.stringify({ lines: 100, follow: true, since: null, until: null }), + agentConnected: true, + userConnected: false + } + + wsServer.relayTransport = createMockNatsRelayTransport() + + $sandbox.stub(MicroserviceLogStatusManager, 'findOne').callsFake(async () => ({ ...logRow })) + $sandbox.stub(MicroserviceLogStatusManager, 'update').callsFake(async (_where, patch) => { + Object.assign(logRow, patch) + }) + $sandbox.stub(MicroserviceLogStatusManager, 'delete').resolves() + $sandbox.stub(MicroserviceManager, 'findOne').resolves({ iofogUuid: $ids.fogUuid }) + $sandbox.stub(FogManager, 'findOne').resolves({ uuid: $ids.fogUuid }) + $sandbox.stub(ChangeTrackingService, 'update').resolves() + $sandbox.stub(wsServer, 'validateAgentLogsConnection').resolves({ uuid: $ids.fogUuid }) + $sandbox.stub(wsServer, 'cleanupLogSession').resolves() + + const agentWs = createMockWebSocket() + const agentReq = createMockRequest( + `/api/v3/agent/logs/microservice/${$ids.microserviceUuid}/${$ids.sessionId}` + ) + agentReq.headers.authorization = 'Bearer fog-token' + + await wsServer.handleAgentLogsConnection( + agentWs, + agentReq, + 'Bearer fog-token', + $ids.microserviceUuid, + null, + $ids.sessionId, + transaction + ) + await delay(20) + + MicroserviceLogStatusManager.update.resetHistory() + agentWs.close(1006) + await delay(50) + + expect(wsServer.cleanupLogSession).to.have.been.calledOnceWith($ids.sessionId, sinon.match.object) + expect(MicroserviceLogStatusManager.update).to.not.have.been.called + }) + + it('full-cleans exec session when agent disconnects after user already left', async () => { + const execRow = { + sessionId: $ids.sessionId, + microserviceUuid: $ids.microserviceUuid, + status: 'ACTIVE', + userConnected: false, + agentConnected: true + } + + wsServer.relayTransport = createMockNatsRelayTransport() + + $sandbox.stub(MicroserviceExecSessionManager, 'findBySessionId').callsFake(async () => ({ ...execRow })) + $sandbox.stub(MicroserviceExecSessionManager, 'update').callsFake(async (_where, patch) => { + Object.assign(execRow, patch) + }) + $sandbox.stub(MicroserviceExecSessionManager, 'deleteBySessionId').resolves() + $sandbox.stub(MicroserviceManager, 'findOne').resolves({ iofogUuid: $ids.fogUuid }) + $sandbox.stub(FogManager, 'findOne').resolves({ uuid: $ids.fogUuid }) + $sandbox.stub(ChangeTrackingService, 'update').resolves() + $sandbox.stub(wsServer, 'validateAgentExecConnection').resolves({ uuid: $ids.fogUuid }) + $sandbox.stub(wsServer, 'cleanupExecSession').resolves() + + const agentWs = createMockWebSocket() + const agentReq = createMockRequest( + `/api/v3/agent/exec/microservice/${$ids.microserviceUuid}/${$ids.sessionId}` + ) + agentReq.headers.authorization = 'Bearer fog-token' + + await wsServer.handleAgentExecConnection( + agentWs, + agentReq, + 'Bearer fog-token', + $ids.microserviceUuid, + $ids.sessionId, + transaction + ) + await delay(20) + + MicroserviceExecSessionManager.update.resetHistory() + agentWs.close(1006) + await delay(50) + + expect(wsServer.cleanupExecSession).to.have.been.calledOnceWith($ids.sessionId, sinon.match.object) + expect(MicroserviceExecSessionManager.update).to.not.have.been.called + }) + + it('getAgentLogSessions omits rows without a connected user', async () => { + const fog = { uuid: $ids.fogUuid } + $sandbox.stub(MicroserviceManager, 'findAll').resolves([{ uuid: $ids.microserviceUuid }]) + $sandbox.stub(MicroserviceLogStatusManager, 'findAll').resolves([ + { + microserviceUuid: $ids.microserviceUuid, + sessionId: 'live-session', + tailConfig: JSON.stringify({ lines: 100, follow: true }), + status: 'ACTIVE', + agentConnected: false + } + ]) + $sandbox.stub(FogLogStatusManager, 'findAll').resolves([]) + + await agentService.getAgentLogSessions(fog, transaction) + + const query = MicroserviceLogStatusManager.findAll.firstCall.args[0] + expect(query.userConnected).to.equal(true) + }) + + it('reconcile deletes orphaned log rows with both sides disconnected', async () => { + const wsInstance = wsServer + $sandbox.stub(WebSocketServerClass, 'getInstance').returns(wsInstance) + + $sandbox.stub(MicroserviceExecSessionManager, 'findAll').resolves([]) + $sandbox.stub(MicroserviceLogStatusManager, 'findAll').resolves([ + { + sessionId: 'orphan-log', + microserviceUuid: $ids.microserviceUuid, + status: 'ACTIVE', + userConnected: false, + agentConnected: false, + updatedAt: new Date(Date.now() - 1000) + } + ]) + $sandbox.stub(FogLogStatusManager, 'findAll').resolves([]) + $sandbox.stub(MicroserviceLogStatusManager, 'delete').resolves() + $sandbox.stub(MicroserviceManager, 'findOne').resolves({ iofogUuid: $ids.fogUuid }) + $sandbox.stub(ChangeTrackingService, 'update').resolves() + + await reconcileStaleSessionsInTransaction(transaction) + + expect(MicroserviceLogStatusManager.delete).to.have.been.calledOnceWith( + { sessionId: 'orphan-log' }, + transaction + ) + }) +}) diff --git a/test/support/first-fog-sqlite-harness.js b/test/support/first-fog-sqlite-harness.js new file mode 100644 index 00000000..52c8df44 --- /dev/null +++ b/test/support/first-fog-sqlite-harness.js @@ -0,0 +1,147 @@ +'use strict' + +const fs = require('fs') +const path = require('path') +const express = require('express') + +const BOOTSTRAP_PASSWORD = 'ChangeMeSecure123!' + +const ENV_KEYS = [ + 'DB_PROVIDER', + 'DB_NAME', + 'AUTH_MODE', + 'CONTROLLER_PUBLIC_URL', + 'AUTH_INSECURE_ALLOW_HTTP', + 'OIDC_BOOTSTRAP_ADMIN_USERNAME', + 'OIDC_BOOTSTRAP_ADMIN_PASSWORD', + 'CONTROL_PLANE', + 'NODE_ENV' +] + +function snapshotEnv (keys) { + return Object.fromEntries(keys.map((key) => [key, process.env[key]])) +} + +function restoreEnv (snapshot) { + for (const key of ENV_KEYS) { + if (snapshot[key] === undefined) { + delete process.env[key] + } else { + process.env[key] = snapshot[key] + } + } +} + +function applyEnv (values) { + for (const [key, value] of Object.entries(values)) { + process.env[key] = value + } +} + +function sqliteStoragePath (dbName) { + return path.resolve(__dirname, '../../src/data/sqlite_files', dbName) +} + +function cleanupSqliteFiles (dbName) { + const base = sqliteStoragePath(dbName) + for (const suffix of ['', '-wal', '-shm']) { + try { + fs.unlinkSync(base + suffix) + } catch (_) { /* ignore */ } + } +} + +function installBusyRetryCounter (onRetry) { + const dbMetrics = require('../../src/helpers/db-metrics') + const original = dbMetrics.recordBusyRetry + dbMetrics.recordBusyRetry = (...args) => { + onRetry() + return original(...args) + } + return () => { + dbMetrics.recordBusyRetry = original + } +} + +async function sleep (ms) { + return new Promise((resolve) => setTimeout(resolve, ms)) +} + +async function driveReconcileUntilReady (fogUuid, { + timeoutMs = 10000, + drainOnce, + processNextFogTask, + processNextNatsTask = async () => {}, + getStatus +}) { + const startedAt = Date.now() + while (Date.now() - startedAt < timeoutMs) { + await drainOnce() + await processNextFogTask() + await processNextNatsTask() + + const status = await getStatus(fogUuid) + if (status && status.phase === 'Ready') { + return status + } + + await sleep(50) + } + + const lastStatus = await getStatus(fogUuid) + throw new Error( + `Timed out waiting for Ready (last phase: ${lastStatus && lastStatus.phase}, ` + + `lastError: ${lastStatus && lastStatus.lastError})` + ) +} + +async function createFirstFogSqliteHarness () { + const envSnapshot = snapshotEnv(ENV_KEYS) + const dbName = `first-fog-int-${Date.now()}-${Math.random().toString(36).slice(2)}.sqlite` + + applyEnv({ + DB_PROVIDER: 'sqlite', + DB_NAME: dbName, + AUTH_MODE: 'embedded', + CONTROLLER_PUBLIC_URL: 'http://controller.test', + AUTH_INSECURE_ALLOW_HTTP: 'true', + OIDC_BOOTSTRAP_ADMIN_USERNAME: 'admin', + OIDC_BOOTSTRAP_ADMIN_PASSWORD: BOOTSTRAP_PASSWORD, + NODE_ENV: 'test' + }) + delete process.env.CONTROL_PLANE + + const { _resetQueueForTests } = require('../../src/helpers/transaction-runner') + _resetQueueForTests() + + const { initialize } = require('../../src/init') + await initialize() + + const { runBootstrap } = require('../../src/services/auth-bootstrap-service') + await runBootstrap() + + const db = require('../../src/data/models') + const { initEmbeddedIssuer, resetEmbeddedIssuerForTests } = require('../../src/config/embedded-oidc') + const { resetSigningMaterialCacheForTests } = require('../../src/config/auth-jwks') + await initEmbeddedIssuer(express(), { db }) + + return { + bootstrapPassword: BOOTSTRAP_PASSWORD, + dbName, + async teardown () { + await db.sequelize.close() + _resetQueueForTests() + resetEmbeddedIssuerForTests() + resetSigningMaterialCacheForTests() + cleanupSqliteFiles(dbName) + restoreEnv(envSnapshot) + } + } +} + +module.exports = { + BOOTSTRAP_PASSWORD, + createFirstFogSqliteHarness, + driveReconcileUntilReady, + installBusyRetryCounter +} diff --git a/test/support/ws-session-harness.js b/test/support/ws-session-harness.js index b2e0e26e..f8f34b13 100644 --- a/test/support/ws-session-harness.js +++ b/test/support/ws-session-harness.js @@ -85,6 +85,37 @@ function buildExecFrame (type, execId, microserviceUuid, data) { }) } +function mergeExecBridgeSession (existing, incoming) { + if (!existing) { + return incoming + } + return { + ...existing, + ...incoming, + execId: incoming.execId || existing.execId, + sessionId: incoming.sessionId || existing.sessionId, + microserviceUuid: incoming.microserviceUuid || existing.microserviceUuid, + user: incoming.user || existing.user, + agent: incoming.agent || existing.agent + } +} + +function mergeLogBridgeSession (existing, incoming) { + if (!existing) { + return incoming + } + return { + ...existing, + ...incoming, + sessionId: incoming.sessionId || existing.sessionId, + microserviceUuid: incoming.microserviceUuid || existing.microserviceUuid, + fogUuid: incoming.fogUuid || existing.fogUuid, + user: incoming.user || existing.user, + agent: incoming.agent || existing.agent, + tailConfig: incoming.tailConfig || existing.tailConfig + } +} + /** * In-memory relay stub for cross-replica exec/log tests. * @param {'amqp'|'nats'} [transport='amqp'] @@ -110,7 +141,7 @@ function createMockRelayTransport (transport = 'amqp') { if (!execId) return false const existing = execBridges.get(execId) if (existing) { - existing.session = session + existing.session = mergeExecBridgeSession(existing.session, session) if (cleanupCallback) { existing.cleanupCallback = cleanupCallback } @@ -124,10 +155,34 @@ function createMockRelayTransport (transport = 'amqp') { return execBridges.has(execId) }, + setExecUserDeliveryHook (execId, hook) { + const bridge = execBridges.get(execId) + if (bridge) { + bridge.onUserRelayDelivery = hook + } + }, + + setExecAgentDeliveryHook (execId, hook) { + const bridge = execBridges.get(execId) + if (bridge) { + bridge.onAgentRelayDelivery = hook + } + }, + + setLogUserDeliveryHook (sessionId, hook) { + const bridge = logBridges.get(sessionId) + if (bridge) { + bridge.onUserRelayDelivery = hook + } + }, + async publishToAgent (execId, buffer) { const bridge = execBridges.get(execId) if (bridge && bridge.session.agent && bridge.session.agent.readyState === WebSocket.OPEN) { bridge.session.agent.send(buffer, { binary: true }) + if (bridge.onAgentRelayDelivery) { + bridge.onAgentRelayDelivery(buffer) + } } }, @@ -135,6 +190,9 @@ function createMockRelayTransport (transport = 'amqp') { const bridge = execBridges.get(execId) if (bridge && bridge.session.user && bridge.session.user.readyState === WebSocket.OPEN) { bridge.session.user.send(buffer, { binary: true }) + if (bridge.onUserRelayDelivery) { + bridge.onUserRelayDelivery(buffer) + } } }, @@ -144,7 +202,15 @@ function createMockRelayTransport (transport = 'amqp') { async enableForLogSession (session, cleanupCallback) { const sessionId = session.sessionId - logBridges.set(sessionId, { session, cleanupCallback }) + const existing = logBridges.get(sessionId) + if (existing) { + existing.session = mergeLogBridgeSession(existing.session, session) + if (cleanupCallback) { + existing.cleanupCallback = cleanupCallback + } + } else { + logBridges.set(sessionId, { session, cleanupCallback }) + } return true }, @@ -155,7 +221,10 @@ function createMockRelayTransport (transport = 'amqp') { async publishLogToUser (sessionId, buffer) { const bridge = logBridges.get(sessionId) if (bridge && bridge.session.user && bridge.session.user.readyState === WebSocket.OPEN) { - bridge.session.user.emit('message', buffer, true) + bridge.session.user.send(buffer, { binary: true }) + if (bridge.onUserRelayDelivery) { + bridge.onUserRelayDelivery(buffer) + } } }, @@ -232,6 +301,17 @@ function waitForSent (ws, minCount = 1, timeoutMs = 2000) { }) } +async function waitUntil (predicate, timeoutMs = 2000, intervalMs = 10) { + const deadline = Date.now() + timeoutMs + while (Date.now() < deadline) { + if (predicate()) { + return + } + await delay(intervalMs) + } + throw new Error(`Timed out after ${timeoutMs}ms waiting for condition`) +} + module.exports = { MESSAGE_TYPES, WS_CLOSE_CODES, @@ -249,5 +329,6 @@ module.exports = { buildFakeJwt, newTestIds, lastSentBinary, - waitForSent + waitForSent, + waitUntil } diff --git a/test/vault/openbao.md b/test/vault/openbao.md index 6c90e58a..ef2242ff 100644 --- a/test/vault/openbao.md +++ b/test/vault/openbao.md @@ -24,3 +24,6 @@ api_addr = "http://127.0.0.1:8200" + + \ No newline at end of file