diff --git a/charts/teachlink-backend/Chart.yaml b/charts/teachlink-backend/Chart.yaml
new file mode 100644
index 00000000..dcf6ede0
--- /dev/null
+++ b/charts/teachlink-backend/Chart.yaml
@@ -0,0 +1,17 @@
+apiVersion: v2
+name: teachlink-backend
+description: Helm chart for the TeachLink backend API service
+type: application
+version: 0.1.0
+appVersion: "1.0.0"
+
+keywords:
+  - teachlink
+  - backend
+  - nestjs
+
+maintainers:
+  - name: rinafcode
+    url: https://github.com/rinafcode
+
+dependencies: []
diff --git a/charts/teachlink-backend/templates/_helpers.tpl b/charts/teachlink-backend/templates/_helpers.tpl
new file mode 100644
index 00000000..c6287902
--- /dev/null
+++ b/charts/teachlink-backend/templates/_helpers.tpl
@@ -0,0 +1,49 @@
+{{/*
+Expand the name of the chart.
+*/}}
+{{- define "teachlink-backend.name" -}}
+{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }}
+{{- end }}
+
+{{/*
+Create a default fully qualified app name.
+*/}}
+{{- define "teachlink-backend.fullname" -}}
+{{- if .Values.fullnameOverride }}
+{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- $name := default .Chart.Name .Values.nameOverride }}
+{{- if contains $name .Release.Name }}
+{{- .Release.Name | trunc 63 | trimSuffix "-" }}
+{{- else }}
+{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }}
+{{- end }}
+{{- end }}
+{{- end }}
+
+{{/*
+Common labels
+*/}}
+{{- define "teachlink-backend.labels" -}}
+helm.sh/chart: {{ include "teachlink-backend.chart" . }}
+{{ include "teachlink-backend.selectorLabels" . }}
+{{- if .Chart.AppVersion }}
+app.kubernetes.io/version: {{ .Chart.AppVersion | quote }}
+{{- end }}
+app.kubernetes.io/managed-by: {{ .Release.Service }}
+{{- end }}
+
+{{/*
+Selector labels
+*/}}
+{{- define "teachlink-backend.selectorLabels" -}}
+app.kubernetes.io/name: {{ include "teachlink-backend.name" . }}
+app.kubernetes.io/instance: {{ .Release.Name }}
+{{- end }}
+
+{{/*
+Chart label
+*/}}
+{{- define "teachlink-backend.chart" -}}
+{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }}
+{{- end }}
diff --git a/charts/teachlink-backend/templates/prometheus-rules.yaml b/charts/teachlink-backend/templates/prometheus-rules.yaml
new file mode 100644
index 00000000..bf6f6439
--- /dev/null
+++ b/charts/teachlink-backend/templates/prometheus-rules.yaml
@@ -0,0 +1,117 @@
+{{- if .Values.prometheusRule.enabled }}
+apiVersion: monitoring.coreos.com/v1
+kind: PrometheusRule
+metadata:
+  name: {{ include "teachlink-backend.fullname" . }}-alerts
+  namespace: {{ .Release.Namespace }}
+  labels:
+    app.kubernetes.io/name: {{ include "teachlink-backend.name" . }}
+    app.kubernetes.io/instance: {{ .Release.Name }}
+    app.kubernetes.io/managed-by: {{ .Release.Service }}
+    helm.sh/chart: {{ include "teachlink-backend.chart" . }}
+    {{- if .Values.prometheusRule.additionalLabels }}
+    {{- toYaml .Values.prometheusRule.additionalLabels | nindent 4 }}
+    {{- end }}
+spec:
+  groups:
+    # ──────────────────────────────────────────────────────────────────────────
+    # Group: SLA — fires when the service breaches contractual error/latency SLAs
+    # ──────────────────────────────────────────────────────────────────────────
+    - name: teachlink.sla
+      interval: 30s
+      rules:
+
+        # HighErrorRate
+        # Fires when the proportion of 5xx responses exceeds 1 % over a 5-minute
+        # window. Requires the "http_requests_total" counter with a "status_code"
+        # label emitted by the NestJS Prometheus interceptor.
+        - alert: HighErrorRate
+          expr: |
+            (
+              sum(rate(http_requests_total{
+                job="{{ .Values.prometheusRule.jobLabel }}",
+                status_code=~"5.."
+              }[5m]))
+              /
+              sum(rate(http_requests_total{
+                job="{{ .Values.prometheusRule.jobLabel }}"
+              }[5m]))
+            ) > {{ .Values.prometheusRule.thresholds.errorRatePct | default 0.01 }}
+          for: 5m
+          labels:
+            severity: critical
+            team: backend
+            service: teachlink-backend
+          annotations:
+            summary: "High 5xx error rate on TeachLink backend"
+            description: "The 5xx error rate has exceeded the 1% SLA threshold. See runbook for triage steps."
+            runbook_url: "{{ .Values.prometheusRule.runbookBaseUrl }}/RUNBOOKS.md#higherrorrate"
+
+        # HighP99Latency
+        # Fires when the 99th-percentile request latency exceeds 1 second for
+        # 10 consecutive minutes. Requires the "http_request_duration_seconds"
+        # histogram emitted by the NestJS metrics module.
+        - alert: HighP99Latency
+          expr: |
+            histogram_quantile(
+              0.99,
+              sum by (le) (
+                rate(http_request_duration_seconds_bucket{
+                  job="{{ .Values.prometheusRule.jobLabel }}"
+                }[5m])
+              )
+            ) > {{ .Values.prometheusRule.thresholds.p99LatencySeconds | default 1.0 }}
+          for: 10m
+          labels:
+            severity: warning
+            team: backend
+            service: teachlink-backend
+          annotations:
+            summary: "P99 request latency exceeds 1 s on TeachLink backend"
+            description: "P99 latency is above the 1 s SLA threshold for 10 minutes. See runbook for triage steps."
+            runbook_url: "{{ .Values.prometheusRule.runbookBaseUrl }}/RUNBOOKS.md#highp99latency"
+
+    # ──────────────────────────────────────────────────────────────────────────
+    # Group: queues — fires on job-queue saturation or dead-letter accumulation
+    # ──────────────────────────────────────────────────────────────────────────
+    - name: teachlink.queues
+      interval: 60s
+      rules:
+
+        # QueueDepthHigh
+        # Fires when any Bull/BullMQ queue has more than 1 000 waiting jobs for
+        # 10 consecutive minutes. Requires the "bull_queue_waiting" gauge emitted
+        # by the @willsoto/nestjs-prometheus Bull metrics plugin.
+        - alert: QueueDepthHigh
+          expr: |
+            bull_queue_waiting{
+              job="{{ .Values.prometheusRule.jobLabel }}"
+            } > {{ .Values.prometheusRule.thresholds.queueDepth | default 1000 }}
+          for: 10m
+          labels:
+            severity: warning
+            team: backend
+            service: teachlink-backend
+          annotations:
+            summary: "Job queue depth exceeds 1 000 on TeachLink backend"
+            description: "A Bull/BullMQ queue has more than 1000 waiting jobs for 10 minutes. See runbook for triage steps."
+            runbook_url: "{{ .Values.prometheusRule.runbookBaseUrl }}/RUNBOOKS.md#queuedepthhigh"
+
+        # DLQDepthHigh
+        # Fires when the dead-letter queue depth exceeds threshold, indicating
+        # jobs are repeatedly failing. Requires the "bull_queue_failed" gauge.
+        - alert: DLQDepthHigh
+          expr: |
+            bull_queue_failed{
+              job="{{ .Values.prometheusRule.jobLabel }}"
+            } > {{ .Values.prometheusRule.thresholds.dlqDepth | default 50 }}
+          for: 5m
+          labels:
+            severity: critical
+            team: backend
+            service: teachlink-backend
+          annotations:
+            summary: "Dead-letter queue depth is growing on TeachLink backend"
+            description: "More than 50 jobs have moved to the failed DLQ and are not being retried. See runbook for triage steps."
+            runbook_url: "{{ .Values.prometheusRule.runbookBaseUrl }}/RUNBOOKS.md#dlqdepthhigh"
+{{- end }}
diff --git a/charts/teachlink-backend/values.yaml b/charts/teachlink-backend/values.yaml
new file mode 100644
index 00000000..a352b754
--- /dev/null
+++ b/charts/teachlink-backend/values.yaml
@@ -0,0 +1,159 @@
+# ─────────────────────────────────────────────────────────────────────────────
+# TeachLink Backend – Helm chart default values
+# Override any of these in values-staging.yaml / values-production.yaml
+# ─────────────────────────────────────────────────────────────────────────────
+
+# -- Deployment metadata
+replicaCount: 2
+
+image:
+  repository: ghcr.io/rinafcode/teachlink-backend
+  pullPolicy: IfNotPresent
+  tag: ""          # Defaults to Chart.appVersion when empty
+
+service:
+  type: ClusterIP
+  port: 3001
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Prometheus alerting rules
+# ─────────────────────────────────────────────────────────────────────────────
+prometheusRule:
+  # Set to true to deploy the PrometheusRule CR.
+  # Requires kube-prometheus-stack (or prometheus-operator) to be installed.
+  enabled: true
+
+  # Must match the ruleSelector labels configured on your Prometheus CR.
+  # With kube-prometheus-stack defaults this label is sufficient.
+  additionalLabels:
+    release: kube-prometheus-stack
+
+  # The Prometheus `job` label value that the backend pods are scraped under.
+  # Matches the ServiceMonitor / PodMonitor `jobLabel` field.
+  jobLabel: teachlink-backend
+
+  # Base URL prepended to runbook fragment links embedded in alert annotations.
+  # Point this at your Git-hosting URL so on-call engineers can click straight
+  # through from Alertmanager / PagerDuty / Slack to the runbook section.
+  runbookBaseUrl: "https://github.com/rinafcode/teachLink_backend/blob/main/docs"
+
+  # SLA thresholds – override per environment if needed.
+  thresholds:
+    errorRatePct: 0.01        # 1 %   – fraction (not percentage)
+    p99LatencySeconds: 1.0    # 1 s
+    queueDepth: 1000          # waiting jobs
+    dlqDepth: 50              # failed jobs in DLQ
+
+# ─────────────────────────────────────────────────────────────────────────────
+# Alertmanager – Slack / webhook routing
+#
+# This section configures the Alertmanager config secret that kube-prometheus-
+# stack creates.  Set alertmanager.enabled=true and fill in your Slack
+# webhook URL (or supply it via a pre-existing Secret; see secretRef below).
+# ─────────────────────────────────────────────────────────────────────────────
+alertmanager:
+  enabled: true
+
+  # ── Slack integration ──────────────────────────────────────────────────────
+  slack:
+    # Whether to enable Slack notifications.
+    enabled: true
+
+    # Webhook URL for your Slack app.
+    # NEVER commit a real token here.  Supply it at deploy-time:
+    #   helm upgrade ... --set alertmanager.slack.webhookUrl="https://hooks.slack.com/..."
+    # or store it in a Kubernetes Secret and reference it via secretRef below.
+    webhookUrl: ""
+
+    # Alternatively, reference a pre-existing Secret that contains the key
+    # `slack-webhook-url`.  When set, webhookUrl above is ignored.
+    secretRef:
+      name: ""       # e.g. teachlink-alertmanager-secrets
+      key: slack-webhook-url
+
+    # Slack channels per severity level.
+    channels:
+      critical: "#teachlink-alerts-critical"
+      warning: "#teachlink-alerts-warning"
+      default: "#teachlink-alerts"
+
+  # ── Generic webhook (PagerDuty, OpsGenie, custom endpoint, …) ─────────────
+  webhook:
+    enabled: false
+    url: ""
+
+  # ── Alertmanager routing config (rendered into the kube-prometheus-stack
+  #    alertmanager.config value).
+  #    Adjust `group_wait`, `group_interval`, and `repeat_interval` to taste.
+  config:
+    global:
+      resolve_timeout: 5m
+      # slack_api_url is set dynamically from alertmanager.slack.webhookUrl
+      # by the chart helper; do not set it here.
+
+    route:
+      group_by: ['alertname', 'service', 'namespace']
+      group_wait: 30s
+      group_interval: 5m
+      repeat_interval: 4h
+      receiver: slack-default
+
+      routes:
+        # Critical alerts → dedicated critical channel, short repeat.
+        - matchers:
+            - severity = "critical"
+          receiver: slack-critical
+          repeat_interval: 1h
+
+        # Warning alerts → warning channel.
+        - matchers:
+            - severity = "warning"
+          receiver: slack-warning
+          repeat_interval: 4h
+
+    receivers:
+      - name: slack-default
+        slack_configs:
+          - channel: "{{ .Values.alertmanager.slack.channels.default }}"
+            send_resolved: true
+            title: '[{{ "{{" }} .Status | toUpper {{ "}}" }}{{ "{{" }} if eq .Status "firing" {{ "}}" }}:{{ "{{" }} .Alerts.Firing | len {{ "}}" }}{{ "{{" }} end {{ "}}" }}] TeachLink Alert'
+            text: >-
+              {{ "{{" }} range .Alerts {{ "}}" }}
+              *Alert:* {{ "{{" }} .Annotations.summary {{ "}}" }}
+              *Severity:* {{ "{{" }} .Labels.severity {{ "}}" }}
+              *Description:* {{ "{{" }} .Annotations.description {{ "}}" }}
+              *Runbook:* {{ "{{" }} .Annotations.runbook_url {{ "}}" }}
+              {{ "{{" }} end {{ "}}" }}
+
+      - name: slack-critical
+        slack_configs:
+          - channel: "{{ .Values.alertmanager.slack.channels.critical }}"
+            send_resolved: true
+            title: '🚨 [CRITICAL] TeachLink Alert'
+            text: >-
+              {{ "{{" }} range .Alerts {{ "}}" }}
+              *Alert:* {{ "{{" }} .Annotations.summary {{ "}}" }}
+              *Description:* {{ "{{" }} .Annotations.description {{ "}}" }}
+              *Runbook:* {{ "{{" }} .Annotations.runbook_url {{ "}}" }}
+              {{ "{{" }} end {{ "}}" }}
+
+      - name: slack-warning
+        slack_configs:
+          - channel: "{{ .Values.alertmanager.slack.channels.warning }}"
+            send_resolved: true
+            title: '⚠️ [WARNING] TeachLink Alert'
+            text: >-
+              {{ "{{" }} range .Alerts {{ "}}" }}
+              *Alert:* {{ "{{" }} .Annotations.summary {{ "}}" }}
+              *Description:* {{ "{{" }} .Annotations.description {{ "}}" }}
+              *Runbook:* {{ "{{" }} .Annotations.runbook_url {{ "}}" }}
+              {{ "{{" }} end {{ "}}" }}
+
+    inhibit_rules:
+      # Suppress warnings when a critical alert for the same service is already
+      # firing, to reduce noise.
+      - source_matchers:
+          - severity = "critical"
+        target_matchers:
+          - severity = "warning"
+        equal: ['service', 'namespace']
diff --git a/docs/API_VERSIONING_POLICY.md b/docs/API_VERSIONING_POLICY.md
index 811d8679..32c7d137 100644
--- a/docs/API_VERSIONING_POLICY.md
+++ b/docs/API_VERSIONING_POLICY.md
@@ -15,6 +15,18 @@ This project uses URL-based API versioning to protect clients from breaking chan
 - Path-based versioning is the primary version selection mechanism
 - API clients should prefer explicit `/api/v1/...` paths when available
 
+## Supported version numbers
+
+The middleware validates version strings against the pattern `/^v\d+$/` (the letter
+`v` followed by one or more digits). Any other format is rejected with `400 Bad Request`.
+
+| Version | Status  | Notes                   |
+|---------|---------|-------------------------|
+| `v1`    | Active  | Current stable version  |
+| `v2`    | Planned | Reserved for future use |
+
+Examples of **invalid** version strings that are rejected: `vABC`, `v1.2`, `../v1`, `123`.
+
 ## Compatibility layer
 
 The middleware rewrites legacy API requests from `/api/*` to `/api/v1/*`.
diff --git a/docs/RUNBOOKS.md b/docs/RUNBOOKS.md
new file mode 100644
index 00000000..a98d4394
--- /dev/null
+++ b/docs/RUNBOOKS.md
@@ -0,0 +1,338 @@
+# TeachLink Backend – Alert Runbooks
+
+This document provides on-call guidance for every Prometheus alert defined in
+`charts/teachlink-backend/templates/prometheus-rules.yaml`.
+
+Each section follows the same structure:
+
+1. **What it means** – plain-English explanation of why the alert fired.
+2. **Impact** – which users or features are affected.
+3. **Triage steps** – ordered checklist to reproduce, scope, and diagnose.
+4. **Remediation** – actions to resolve the incident.
+5. **Escalation** – who to page if the steps above don't resolve it within SLA.
+
+---
+
+## Table of Contents
+
+- [HighErrorRate](#higherrorrate)
+- [HighP99Latency](#highp99latency)
+- [QueueDepthHigh](#queuedepthhigh)
+- [DLQDepthHigh](#dlqdepthhigh)
+
+---
+
+## HighErrorRate
+
+**Severity:** `critical`
+**Alert expression:**
+```promql
+(
+  sum(rate(http_requests_total{job="teachlink-backend", status_code=~"5.."}[5m]))
+  /
+  sum(rate(http_requests_total{job="teachlink-backend"}[5m]))
+) > 0.01
+```
+**Fires when:** More than 1 % of all HTTP requests have returned a 5xx response
+code over a rolling 5-minute window, sustained for 5 minutes.
+
+### What it means
+
+A meaningful fraction of API calls are failing server-side.  This can be caused
+by unhandled exceptions, database timeouts, downstream service failures,
+out-of-memory crashes, or a bad deployment.
+
+### Impact
+
+- End-users see errors when loading courses, submitting assignments, or making
+  payments.
+- API consumers (mobile apps, third-party integrations) receive 5xx responses.
+
+### Triage steps
+
+1. **Confirm the alert is genuine** – open Grafana → TeachLink API dashboard →
+   "Error Rate" panel.  Verify the rate exceeds 1 % and is not a transient blip.
+
+2. **Identify the failing endpoints:**
+   ```promql
+   topk(10,
+     rate(http_requests_total{job="teachlink-backend", status_code=~"5.."}[5m])
+   ) by (path, method, status_code)
+   ```
+
+3. **Check pod logs for stack traces:**
+   ```bash
+   kubectl logs -n <namespace> -l app.kubernetes.io/name=teachlink-backend \
+     --since=10m | grep -E "(ERROR|Exception|5[0-9][0-9])"
+   ```
+
+4. **Check recent deployments:**
+   ```bash
+   kubectl rollout history deployment/teachlink-backend -n <namespace>
+   ```
+   If a deployment was rolled out in the last 30 minutes, rollback is the
+   fastest mitigation (see Remediation below).
+
+5. **Check downstream dependencies** – database connectivity, Redis, external
+   payment APIs.  Look for connection-refused or timeout errors in the logs.
+
+6. **Check pod restarts / OOMKilled events:**
+   ```bash
+   kubectl get pods -n <namespace> -l app.kubernetes.io/name=teachlink-backend
+   kubectl describe pod <pod-name> -n <namespace> | grep -A5 "Last State"
+   ```
+
+### Remediation
+
+| Cause | Action |
+|---|---|
+| Bad deployment | `kubectl rollout undo deployment/teachlink-backend -n <namespace>` |
+| Database down | Restore DB or failover to replica; check RDS/PG logs |
+| Pod OOMKilled | Increase `resources.limits.memory` in values; redeploy |
+| Downstream API down | Enable circuit-breaker flag or return cached fallback |
+| Unhandled exception | Hot-fix the code path identified in logs, redeploy |
+
+### Escalation
+
+If error rate does not drop below 1 % within **15 minutes** of initial triage:
+- Page the on-call backend engineer via PagerDuty.
+- Notify `#teachlink-incidents` Slack channel with a brief status update.
+
+---
+
+## HighP99Latency
+
+**Severity:** `warning`
+**Alert expression:**
+```promql
+histogram_quantile(
+  0.99,
+  sum by (le) (
+    rate(http_request_duration_seconds_bucket{job="teachlink-backend"}[5m])
+  )
+) > 1.0
+```
+**Fires when:** The 99th-percentile request latency exceeds 1 second for 10
+consecutive minutes.
+
+### What it means
+
+At least 1 % of requests are taking longer than 1 second.  Common culprits are
+slow database queries, N+1 query patterns, lock contention, CPU throttling, or
+memory pressure causing GC pauses.
+
+### Impact
+
+- Users experience sluggish page loads and time-outs on slow connections.
+- Background jobs that call the API may queue up, eventually triggering
+  `QueueDepthHigh`.
+
+### Triage steps
+
+1. **Identify the slow endpoints:**
+   ```promql
+   topk(10,
+     histogram_quantile(0.99,
+       rate(http_request_duration_seconds_bucket{job="teachlink-backend"}[5m])
+     ) by (path, method, le)
+   )
+   ```
+
+2. **Check database slow-query logs:**
+   - RDS Performance Insights → filter by `wait_event_type = Lock` or
+     `wait_event_type = IO`.
+   - Look for queries taking > 500 ms.
+
+3. **Check CPU and memory utilisation:**
+   ```promql
+   rate(process_cpu_seconds_total{job="teachlink-backend"}[5m]) * 100
+   process_resident_memory_bytes{job="teachlink-backend"} / 1024 / 1024
+   ```
+
+4. **Check for pod CPU throttling:**
+   ```bash
+   kubectl top pods -n <namespace> -l app.kubernetes.io/name=teachlink-backend
+   ```
+   If pods are at or near CPU limit, throttling is the likely cause.
+
+5. **Enable query explain-analyse** on the suspected slow query in a staging
+   environment to confirm.
+
+### Remediation
+
+| Cause | Action |
+|---|---|
+| Slow DB query | Add index; rewrite query; cache result with Redis |
+| CPU throttling | Increase `resources.limits.cpu`; add HPA scaling rule |
+| N+1 queries | Apply DataLoader / eager-load relations in ORM |
+| Memory pressure / GC | Increase memory limit; profile heap with `clinic.js` |
+| External API slow | Add timeouts; cache responses; use background job |
+
+### Escalation
+
+If P99 latency remains above 1 s after **30 minutes**:
+- Page backend engineer.
+- If DB is implicated, page the DBA on-call.
+
+---
+
+## QueueDepthHigh
+
+**Severity:** `warning`
+**Alert expression:**
+```promql
+bull_queue_waiting{job="teachlink-backend"} > 1000
+```
+**Fires when:** Any Bull/BullMQ queue has more than 1 000 jobs waiting to be
+processed for 10 consecutive minutes.
+
+### What it means
+
+Workers are not consuming jobs fast enough.  This can mean workers have crashed,
+processing is too slow, or a traffic spike has produced an unusual burst of jobs.
+
+### Impact
+
+- Delayed delivery of emails, push notifications, certificate generation, or
+  other async tasks.
+- If the queue continues growing, Redis memory pressure will follow.
+
+### Triage steps
+
+1. **Identify which queue is backed up:**
+   ```promql
+   topk(5, bull_queue_waiting{job="teachlink-backend"}) by (queue)
+   ```
+
+2. **Check the rate of job consumption vs. arrival:**
+   ```promql
+   rate(bull_queue_completed{job="teachlink-backend"}[5m]) by (queue)
+   rate(bull_queue_added{job="teachlink-backend"}[5m])    by (queue)
+   ```
+
+3. **Check worker pod health:**
+   ```bash
+   kubectl get pods -n <namespace> -l app.kubernetes.io/name=teachlink-backend
+   kubectl logs -n <namespace> <worker-pod> --since=10m | grep -i "worker\|queue\|bull"
+   ```
+
+4. **Check Redis health** (Bull backs onto Redis):
+   ```bash
+   kubectl exec -it <redis-pod> -n <namespace> -- redis-cli INFO memory
+   ```
+
+5. **Check for a sudden spike in job arrivals** (e.g., a scheduled batch job or
+   user-triggered bulk operation).
+
+### Remediation
+
+| Cause | Action |
+|---|---|
+| Workers crashed | `kubectl rollout restart deployment/teachlink-backend -n <namespace>` |
+| Too few workers | Scale out: `kubectl scale deployment/teachlink-backend --replicas=N` |
+| Slow job processing | Profile the job handler; optimise DB calls or external I/O |
+| Redis OOM | Increase Redis memory limit or purge stale keys |
+| Burst traffic | Enable rate-limiting at API layer to reduce job creation rate |
+
+### Escalation
+
+If queue depth does not decrease within **20 minutes**:
+- Page backend engineer.
+- If Redis is implicated, page infrastructure on-call.
+
+---
+
+## DLQDepthHigh
+
+**Severity:** `critical`
+**Alert expression:**
+```promql
+bull_queue_failed{job="teachlink-backend"} > 50
+```
+**Fires when:** More than 50 jobs have moved to the failed (dead-letter) state
+within a queue, sustained for 5 minutes.
+
+### What it means
+
+Jobs are failing repeatedly and exhausting their retry budget.  No further
+automatic retries will occur for these jobs — the work is effectively lost until
+an engineer intervenes.
+
+### Impact
+
+- Permanent failure of async tasks: emails unsent, certificates not issued,
+  webhooks not delivered, payments not reconciled.
+- Data consistency issues if jobs were part of a saga or transactional workflow.
+
+### Triage steps
+
+1. **Identify the failing queue and error:**
+   ```promql
+   topk(5, bull_queue_failed{job="teachlink-backend"}) by (queue)
+   ```
+
+2. **Inspect failed job payloads via Bull Board** (if deployed) at
+   `https://<internal-host>/admin/queues`, or directly via Redis:
+   ```bash
+   kubectl exec -it <redis-pod> -n <namespace> -- \
+     redis-cli LRANGE bull:<queue-name>:failed 0 4
+   ```
+
+3. **Read the failure reason from job metadata** — look for
+   `"failedReason"` in the JSON payload.
+
+4. **Check application logs** for the worker around the time failures spiked:
+   ```bash
+   kubectl logs -n <namespace> -l app.kubernetes.io/name=teachlink-backend \
+     --since=30m | grep -i "failed\|error\|unhandled"
+   ```
+
+5. **Reproduce** the failing job in a staging environment using the same payload
+   to confirm the fix before retrying production jobs.
+
+### Remediation
+
+| Cause | Action |
+|---|---|
+| Code bug in job handler | Fix bug, redeploy, then retry jobs from Bull Board |
+| External dependency down | Wait for dependency to recover; then bulk-retry jobs |
+| Invalid job payload | Patch payload schema validation; discard or correct jobs |
+| Credentials expired | Rotate the affected secret; restart the worker |
+
+**Bulk retry via Bull Board:**
+Navigate to `Admin → Queues → <queue-name> → Failed` and click
+**Retry All Failed**.
+
+**Bulk retry via Redis CLI** (last resort):
+```bash
+# Move all failed jobs back to the waiting list
+kubectl exec -it <redis-pod> -n <namespace> -- \
+  redis-cli EVAL "
+    local failed = redis.call('lrange', KEYS[1], 0, -1)
+    for _, v in ipairs(failed) do
+      redis.call('lpush', KEYS[2], v)
+    end
+    redis.call('del', KEYS[1])
+    return #failed
+  " 2 bull:<queue-name>:failed bull:<queue-name>:wait
+```
+
+### Escalation
+
+If the DLQ continues growing after the fix is deployed:
+- Page backend engineer immediately — data loss may be occurring.
+- Open a P1 incident and notify `#teachlink-incidents`.
+- Document affected job IDs for potential manual reprocessing.
+
+---
+
+## Updating These Runbooks
+
+When a new alert is added to `prometheus-rules.yaml`:
+
+1. Add a matching `##` section to this file following the template above.
+2. Set the `runbook_url` annotation in the alert to point at the new section:
+   ```
+   runbook_url: "https://github.com/rinafcode/teachLink_backend/blob/main/docs/RUNBOOKS.md#<anchor>"
+   ```
+3. Open a PR — runbook changes should be reviewed by the on-call rotation lead.
diff --git a/src/app/(auth)/layout.tsx b/src/app/(auth)/layout.tsx
new file mode 100644
index 00000000..51840533
--- /dev/null
+++ b/src/app/(auth)/layout.tsx
@@ -0,0 +1,23 @@
+import type { Metadata } from 'next';
+
+export const metadata: Metadata = {
+  title: 'TeachLink - Sign In or Create an Account',
+  description:
+    'Access your TeachLink account to continue learning offline. Sign in, sign up, or verify your email.',
+  openGraph: {
+    title: 'TeachLink - Sign In or Create an Account',
+    description: 'Access your TeachLink account to continue learning.',
+    type: 'website',
+    siteName: 'TeachLink',
+  },
+  twitter: {
+    card: 'summary',
+    site: '@teachlink',
+    title: 'TeachLink - Sign In or Create an Account',
+    description: 'Access your TeachLink account to continue learning.',
+  },
+};
+
+export default function AuthLayout({ children }: { children: React.ReactNode }) {
+  return <>{children}</>;
+}
diff --git a/src/app/__tests__/twitter-cards.test.ts b/src/app/__tests__/twitter-cards.test.ts
new file mode 100644
index 00000000..277e40fb
--- /dev/null
+++ b/src/app/__tests__/twitter-cards.test.ts
@@ -0,0 +1,76 @@
+import { describe, it, expect } from 'vitest';
+import { metadata as rootMetadata } from '@/app/layout';
+import { metadata as authMetadata } from '@/app/(auth)/layout';
+import { metadata as dashboardMetadata } from '@/app/dashboard/layout';
+import { metadata as profileMetadata } from '@/app/profile/layout';
+
+describe('Twitter Cards metadata', () => {
+  describe('Root layout', () => {
+    it('exports a twitter card field', () => {
+      expect(rootMetadata.twitter).toBeDefined();
+    });
+
+    it('uses summary_large_image card type', () => {
+      expect(rootMetadata.twitter?.card).toBe('summary_large_image');
+    });
+
+    it('includes a twitter title', () => {
+      expect(rootMetadata.twitter?.title).toBeTruthy();
+    });
+
+    it('includes a twitter description', () => {
+      expect(rootMetadata.twitter?.description).toBeTruthy();
+    });
+
+    it('includes twitter site handle', () => {
+      expect(rootMetadata.twitter?.site).toBe('@teachlink');
+    });
+
+    it('exports openGraph metadata', () => {
+      expect(rootMetadata.openGraph).toBeDefined();
+      expect(rootMetadata.openGraph?.siteName).toBe('TeachLink');
+    });
+  });
+
+  describe('Auth layout', () => {
+    it('exports a twitter card field', () => {
+      expect(authMetadata.twitter).toBeDefined();
+    });
+
+    it('uses summary card type', () => {
+      expect(authMetadata.twitter?.card).toBe('summary');
+    });
+
+    it('includes a twitter title', () => {
+      expect(authMetadata.twitter?.title).toBeTruthy();
+    });
+
+    it('includes a twitter description', () => {
+      expect(authMetadata.twitter?.description).toBeTruthy();
+    });
+
+    it('includes twitter site handle', () => {
+      expect(authMetadata.twitter?.site).toBe('@teachlink');
+    });
+  });
+
+  describe('Dashboard layout', () => {
+    it('exports a twitter card field', () => {
+      expect(dashboardMetadata.twitter).toBeDefined();
+    });
+
+    it('uses summary card type', () => {
+      expect(dashboardMetadata.twitter?.card).toBe('summary');
+    });
+  });
+
+  describe('Profile layout', () => {
+    it('exports a twitter card field', () => {
+      expect(profileMetadata.twitter).toBeDefined();
+    });
+
+    it('uses summary card type', () => {
+      expect(profileMetadata.twitter?.card).toBe('summary');
+    });
+  });
+});
diff --git a/src/app/courses/[courseId]/page.tsx b/src/app/courses/[courseId]/page.tsx
index 7545d78d..9b15f34a 100644
--- a/src/app/courses/[courseId]/page.tsx
+++ b/src/app/courses/[courseId]/page.tsx
@@ -12,6 +12,18 @@ export async function generateMetadata({ params }: CoursePageProps): Promise<Met
     title: 'Course Details | TeachLink',
     description:
       'View detailed information about this course, including syllabus, instructor details, and enrollment options.',
+    openGraph: {
+      title: 'Course Details | TeachLink',
+      description: 'View course syllabus, instructor details, and enrollment options.',
+      type: 'website',
+      siteName: 'TeachLink',
+    },
+    twitter: {
+      card: 'summary_large_image',
+      site: '@teachlink',
+      title: 'Course Details | TeachLink',
+      description: 'View course syllabus, instructor details, and enrollment options.',
+    },
   };
 }
 
diff --git a/src/app/editor/page.tsx b/src/app/editor/page.tsx
index f7d6efb8..021cfa71 100644
--- a/src/app/editor/page.tsx
+++ b/src/app/editor/page.tsx
@@ -9,6 +9,18 @@ import { EditorWorkspace } from './EditorWorkspace';
 export const metadata: Metadata = {
   title: 'Post Editor | TeachLink',
   description: 'Create and edit privileged post content with a secure editor workspace.',
+  openGraph: {
+    title: 'Post Editor | TeachLink',
+    description: 'Create and edit privileged post content with a secure editor workspace.',
+    type: 'website',
+    siteName: 'TeachLink',
+  },
+  twitter: {
+    card: 'summary',
+    site: '@teachlink',
+    title: 'Post Editor | TeachLink',
+    description: 'Create and edit privileged post content with a secure editor workspace.',
+  },
 };
 
 function fallback() {
diff --git a/src/app/layout.tsx b/src/app/layout.tsx
index c07702de..3e0f4fff 100644
--- a/src/app/layout.tsx
+++ b/src/app/layout.tsx
@@ -42,6 +42,20 @@ export const metadata: Metadata = {
   title: 'TeachLink - Offline Learning Platform',
   description: 'Learn anywhere, anytime with offline capabilities',
   manifest: '/manifest.json',
+  openGraph: {
+    title: 'TeachLink - Offline Learning Platform',
+    description: 'Learn anywhere, anytime with offline capabilities',
+    type: 'website',
+    siteName: 'TeachLink',
+    url: 'https://teachlink.app',
+  },
+  twitter: {
+    card: 'summary_large_image',
+    site: '@teachlink',
+    creator: '@teachlink',
+    title: 'TeachLink - Offline Learning Platform',
+    description: 'Learn anywhere, anytime with offline capabilities',
+  },
 };
 
 export default async function RootLayout({
diff --git a/src/app/leaderboard/page.tsx b/src/app/leaderboard/page.tsx
index 2235d409..2cb6966f 100644
--- a/src/app/leaderboard/page.tsx
+++ b/src/app/leaderboard/page.tsx
@@ -4,6 +4,18 @@ import { LeaderboardConference } from '@/components/leaderboard/LeaderboardConfe
 export const metadata: Metadata = {
   title: 'Leaderboard | TeachLink',
   description: 'View top contributors and join live conference sessions on TeachLink.',
+  openGraph: {
+    title: 'Leaderboard | TeachLink',
+    description: 'View top contributors and join live conference sessions on TeachLink.',
+    type: 'website',
+    siteName: 'TeachLink',
+  },
+  twitter: {
+    card: 'summary',
+    site: '@teachlink',
+    title: 'Leaderboard | TeachLink',
+    description: 'View top contributors and join live conference sessions on TeachLink.',
+  },
 };
 
 export default function LeaderboardPage() {
diff --git a/src/app/privacy/page.tsx b/src/app/privacy/page.tsx
index 05c53e07..3a41a1f1 100644
--- a/src/app/privacy/page.tsx
+++ b/src/app/privacy/page.tsx
@@ -21,6 +21,12 @@ export const metadata: Metadata = {
     'max-image-preview': 'large',
     'max-video-preview': -1,
   },
+  twitter: {
+    card: 'summary',
+    site: '@teachlink',
+    title: 'Privacy Policy | TeachLink',
+    description: 'Learn how TeachLink collects, uses, and protects your personal information.',
+  },
 };
 
 /**
diff --git a/src/app/search/page.tsx b/src/app/search/page.tsx
index 1af4b691..633ea3fc 100644
--- a/src/app/search/page.tsx
+++ b/src/app/search/page.tsx
@@ -1,8 +1,21 @@
+import type { Metadata } from 'next';
 import { AdvancedSearchInterface } from '@/components/search/AdvancedSearchInterface';
 
-export const metadata = {
+export const metadata: Metadata = {
   title: 'Advanced Search | TeachLink',
   description: 'Powerful multi-dimensional search for the TeachLink ecosystem.',
+  openGraph: {
+    title: 'Advanced Search | TeachLink',
+    description: 'Powerful multi-dimensional search for the TeachLink ecosystem.',
+    type: 'website',
+    siteName: 'TeachLink',
+  },
+  twitter: {
+    card: 'summary',
+    site: '@teachlink',
+    title: 'Advanced Search | TeachLink',
+    description: 'Powerful multi-dimensional search for the TeachLink ecosystem.',
+  },
 };
 
 export default function SearchPage() {
diff --git a/src/app/study-groups/page.tsx b/src/app/study-groups/page.tsx
index 5e87f1b2..0b88bb80 100644
--- a/src/app/study-groups/page.tsx
+++ b/src/app/study-groups/page.tsx
@@ -5,6 +5,18 @@ export const metadata: Metadata = {
   title: 'Study Groups | TeachLink',
   description:
     'Create and collaborate in study groups with discussions, resources, and challenges.',
+  openGraph: {
+    title: 'Study Groups | TeachLink',
+    description: 'Create and collaborate in study groups on TeachLink.',
+    type: 'website',
+    siteName: 'TeachLink',
+  },
+  twitter: {
+    card: 'summary',
+    site: '@teachlink',
+    title: 'Study Groups | TeachLink',
+    description: 'Create and collaborate in study groups on TeachLink.',
+  },
 };
 
 export default function Page() {
diff --git a/src/app/topics/[slug]/page.tsx b/src/app/topics/[slug]/page.tsx
index 40a2d477..25f0549b 100644
--- a/src/app/topics/[slug]/page.tsx
+++ b/src/app/topics/[slug]/page.tsx
@@ -13,6 +13,18 @@ export async function generateMetadata({ params }: TopicPageProps): Promise<Meta
   return {
     title: `#${name} · TeachLink`,
     description: `Explore posts and discussions about ${name} on TeachLink.`,
+    openGraph: {
+      title: `#${name} · TeachLink`,
+      description: `Explore posts and discussions about ${name} on TeachLink.`,
+      type: 'website',
+      siteName: 'TeachLink',
+    },
+    twitter: {
+      card: 'summary',
+      site: '@teachlink',
+      title: `#${name} · TeachLink`,
+      description: `Explore posts and discussions about ${name} on TeachLink.`,
+    },
   };
 }
 
diff --git a/src/middleware.ts b/src/middleware.ts
index b99a094b..c6f48603 100644
--- a/src/middleware.ts
+++ b/src/middleware.ts
@@ -45,9 +45,7 @@ export async function middleware(request: NextRequest) {
   };
 
   const permissionResponse = checkRoutePermission(request, userRole);
-  if (permissionResponse) {
-    return withHeaders(permissionResponse);
-  }
+  if (permissionResponse) return withHeaders(permissionResponse);
 
   const { pathname } = request.nextUrl;
   if (pathname.startsWith(API_ROOT)) {
@@ -72,8 +70,13 @@ export async function middleware(request: NextRequest) {
       return withHeaders(response);
     }
 
+    // Fix for #726 — validate version string before use
+    const extractedVersion = pathname.split('/')[2];
+    if (!extractedVersion || !/^v\d+$/.test(extractedVersion)) {
+      return withHeaders(new NextResponse('Invalid API version', { status: 400 }));
+    }
     const response = NextResponse.next();
-    response.headers.set(API_VERSION_HEADER, pathname.split('/')[2] || DEFAULT_API_VERSION);
+    response.headers.set(API_VERSION_HEADER, extractedVersion);
     return withHeaders(response);
   }
 
diff --git a/src/middleware/__tests__/apiVersioning.test.ts b/src/middleware/__tests__/apiVersioning.test.ts
index 1336ef6e..2fd34c04 100644
--- a/src/middleware/__tests__/apiVersioning.test.ts
+++ b/src/middleware/__tests__/apiVersioning.test.ts
@@ -71,4 +71,65 @@ describe('API versioning middleware', () => {
     expect(response.headers.get(API_VERSION_HEADER)).toBe('v1');
     expect(response.headers.get(API_DEPRECATION_HEADER)).toBeNull();
   });
-});
+
+  describe('valid version strings — should route correctly', () => {
+    it('accepts v1 and sets X-Api-Version header', () => {
+      const request = createMockRequest('/api/v1/posts');
+      const response = middleware(request) as NextResponse;
+      expect(response.status).not.toBe(400);
+      expect(response.headers.get(API_VERSION_HEADER)).toBe('v1');
+    });
+
+    it('accepts v2 and sets X-Api-Version header', () => {
+      const request = createMockRequest('/api/v2/posts');
+      const response = middleware(request) as NextResponse;
+      expect(response.status).not.toBe(400);
+      expect(response.headers.get(API_VERSION_HEADER)).toBe('v2');
+    });
+
+    it('accepts large version numbers like v10', () => {
+      const request = createMockRequest('/api/v10/posts');
+      const response = middleware(request) as NextResponse;
+      expect(response.status).not.toBe(400);
+      expect(response.headers.get(API_VERSION_HEADER)).toBe('v10');
+    });
+  });
+
+  describe('malformed version strings — should return 400', () => {
+    it('rejects alphabetic version string (vABC)', () => {
+      const request = createMockRequest('/api/vABC/posts');
+      const response = middleware(request) as NextResponse;
+      expect(response.status).toBe(400);
+    });
+
+    it('rejects path-traversal characters (/../)', () => {
+      const request = createMockRequest('/api/../v1/posts');
+      const response = middleware(request) as NextResponse;
+      expect(response.status).toBe(400);
+    });
+
+    it('rejects empty version segment (/api/v/)', () => {
+      const request = createMockRequest('/api/v/posts');
+      const response = middleware(request) as NextResponse;
+      expect(response.status).toBe(400);
+    });
+
+    it('rejects version with special characters (v1.2)', () => {
+      const request = createMockRequest('/api/v1.2/posts');
+      const response = middleware(request) as NextResponse;
+      expect(response.status).toBe(400);
+    });
+
+    it('rejects version with injection attempt (v1;drop)', () => {
+      const request = createMockRequest('/api/v1;drop/posts');
+      const response = middleware(request) as NextResponse;
+      expect(response.status).toBe(400);
+    });
+
+    it('rejects purely numeric version without v prefix (123)', () => {
+      const request = createMockRequest('/api/123/posts');
+      const response = middleware(request) as NextResponse;
+      expect(response.status).toBe(400);
+    });
+  });
+});
\ No newline at end of file