Skip to content

Commit d5ad7b0

Browse files
committed
feat: add startup cleanup and maintenance system
Add configurable cleanup module that runs on startup to manage database size and remove orphaned storage files. Fixes log cleanup bug where non-deterministic glob ordering could delete newest files instead of oldest. Changes: - New cleanup config stanza with session retention, storage sweep, and vacuum options - Fix log cleanup: sort by filename before slicing, align guard with slice count - Sweep orphaned storage files (session, message, part, todo, session_diff, project, snapshot) - Prune empty directories after sweep - Run SQLite VACUUM and WAL checkpoint on startup - Fix pre-existing path.sep bug in Storage.list() for cross-platform correctness - Defer cleanup 500ms and yield every 100 deletions to avoid blocking TUI
1 parent d15c2ce commit d5ad7b0

6 files changed

Lines changed: 362 additions & 6 deletions

File tree

Lines changed: 275 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,275 @@
1+
import fs from "fs/promises"
2+
import path from "path"
3+
import { Log } from "../util/log"
4+
import { Database, and, lt, isNotNull, isNull, eq, inArray } from "../storage/db"
5+
import { SessionTable, MessageTable } from "../session/session.sql"
6+
import { ProjectTable } from "../project/project.sql"
7+
import { Storage } from "../storage/storage"
8+
import { Global } from "../global"
9+
import type { Config } from "../config/config"
10+
11+
type StorageCategory = "session" | "session_diff" | "message" | "part" | "todo" | "project" | "snapshot"
12+
13+
const ALL_CATEGORIES: StorageCategory[] = [
14+
"session",
15+
"session_diff",
16+
"message",
17+
"part",
18+
"todo",
19+
"project",
20+
"snapshot",
21+
]
22+
23+
export namespace Cleanup {
24+
const log = Log.create({ service: "cleanup" })
25+
26+
/** Yield to the event loop so the TUI can render */
27+
const yieldTick = () => new Promise<void>((r) => setTimeout(r, 0))
28+
29+
export function run(config: Config.Info["cleanup"]) {
30+
if (config?.enabled === false) return
31+
// Defer cleanup to avoid competing with TUI startup
32+
setTimeout(() => runDeferred(config), 500)
33+
}
34+
35+
async function runDeferred(config: Config.Info["cleanup"]) {
36+
log.info("cleanup started")
37+
const sessionsDeleted = await sessionCleanup(config?.session).catch((e) => {
38+
log.error("session cleanup failed", { error: e })
39+
return 0
40+
})
41+
const categories = new Set(config?.storage ?? ALL_CATEGORIES)
42+
const orphansSwept = await sweepOrphanedStorage(categories).catch((e) => {
43+
log.error("orphan sweep failed", { error: e })
44+
return 0
45+
})
46+
await vacuum(config?.vacuum).catch((e) =>
47+
log.error("vacuum failed", { error: e }),
48+
)
49+
log.info("cleanup complete", {
50+
sessions_deleted: sessionsDeleted,
51+
orphans_swept: orphansSwept,
52+
})
53+
}
54+
55+
async function sessionCleanup(config: NonNullable<Config.Info["cleanup"]>["session"]): Promise<number> {
56+
if (!config?.max_age_days) return 0
57+
const cutoff = Date.now() - config.max_age_days * 86_400_000
58+
59+
const conditions = [
60+
isNotNull(SessionTable.time_updated),
61+
lt(SessionTable.time_updated, cutoff),
62+
isNull(SessionTable.parent_id), // only root sessions; children cascade
63+
]
64+
if (!config.target || config.target === "archived") {
65+
conditions.push(isNotNull(SessionTable.time_archived))
66+
}
67+
68+
const sessions = Database.use((db) =>
69+
db
70+
.select({ id: SessionTable.id })
71+
.from(SessionTable)
72+
.where(and(...conditions))
73+
.all(),
74+
)
75+
76+
if (sessions.length === 0) return 0
77+
const sessionIDs = sessions.map((s) => s.id)
78+
79+
// DB first — orphaned storage files are harmless; orphaned DB rows could confuse UI
80+
Database.transaction((db) => {
81+
db.delete(SessionTable).where(inArray(SessionTable.id, sessionIDs)).run()
82+
})
83+
84+
// Storage cleanup is best-effort; orphan sweep catches any misses
85+
for (const id of sessionIDs) {
86+
await Storage.remove(["session_diff", id]).catch(() => {})
87+
}
88+
89+
log.info("session cleanup", { deleted: sessionIDs.length, target: config.target ?? "archived" })
90+
return sessionIDs.length
91+
}
92+
93+
// -- Orphan sweep helpers --
94+
95+
/** Get all session IDs that exist in the DB */
96+
function getSessionIDs(): Set<string> {
97+
const rows = Database.use((db) =>
98+
db.select({ id: SessionTable.id }).from(SessionTable).all(),
99+
)
100+
return new Set(rows.map((r) => r.id))
101+
}
102+
103+
/** Get all message IDs that exist in the DB */
104+
function getMessageIDs(): Set<string> {
105+
const rows = Database.use((db) =>
106+
db.select({ id: MessageTable.id }).from(MessageTable).all(),
107+
)
108+
return new Set(rows.map((r) => r.id))
109+
}
110+
111+
/** Get all project IDs that exist in the DB */
112+
function getProjectIDs(): Set<string> {
113+
const rows = Database.use((db) =>
114+
db.select({ id: ProjectTable.id }).from(ProjectTable).all(),
115+
)
116+
return new Set(rows.map((r) => r.id))
117+
}
118+
119+
/**
120+
* Sweep a storage prefix, removing files whose ID (extracted from the key)
121+
* is not in the provided valid set.
122+
*/
123+
async function sweepStoragePrefix(
124+
prefix: string,
125+
validIDs: Set<string>,
126+
idIndex: number,
127+
): Promise<number> {
128+
let count = 0
129+
const keys = await Storage.list([prefix])
130+
for (let i = 0; i < keys.length; i++) {
131+
const id = keys[i][idIndex]
132+
if (!id) continue
133+
if (!validIDs.has(id)) {
134+
await Storage.remove(keys[i]).catch(() => {})
135+
count++
136+
// Yield every 100 deletions to avoid blocking the event loop
137+
if (count % 100 === 0) await yieldTick()
138+
}
139+
}
140+
return count
141+
}
142+
143+
/** Recursively remove empty directories under a path (bottom-up) */
144+
async function pruneEmptyDirs(dir: string) {
145+
let entries: string[]
146+
try {
147+
entries = await fs.readdir(dir)
148+
} catch {
149+
return
150+
}
151+
for (const entry of entries) {
152+
const full = path.join(dir, entry)
153+
try {
154+
const stat = await fs.stat(full)
155+
if (!stat.isDirectory()) continue
156+
await pruneEmptyDirs(full)
157+
const children = await fs.readdir(full)
158+
if (children.length === 0) {
159+
await fs.rmdir(full).catch(() => {})
160+
}
161+
} catch {
162+
// entry disappeared between readdir and stat — fine
163+
}
164+
}
165+
}
166+
167+
async function sweepOrphanedStorage(categories: Set<StorageCategory>): Promise<number> {
168+
const storageDir = path.join(Global.Path.data, "storage")
169+
let totalSwept = 0
170+
171+
// Lazily load ID sets only when needed
172+
let sessionIDs: Set<string> | undefined
173+
let messageIDs: Set<string> | undefined
174+
let projectIDs: Set<string> | undefined
175+
176+
const ensureSessionIDs = () => (sessionIDs ??= getSessionIDs())
177+
const ensureMessageIDs = () => (messageIDs ??= getMessageIDs())
178+
const ensureProjectIDs = () => (projectIDs ??= getProjectIDs())
179+
180+
// session_diff: storage/session_diff/<sessionID>.json
181+
if (categories.has("session_diff")) {
182+
const swept = await sweepStoragePrefix("session_diff", ensureSessionIDs(), 1)
183+
if (swept > 0) {
184+
log.info("swept orphaned session_diff files", { count: swept })
185+
totalSwept += swept
186+
}
187+
}
188+
189+
// todo: storage/todo/<sessionID>.json
190+
if (categories.has("todo")) {
191+
const swept = await sweepStoragePrefix("todo", ensureSessionIDs(), 1)
192+
if (swept > 0) {
193+
log.info("swept orphaned todo files", { count: swept })
194+
totalSwept += swept
195+
}
196+
}
197+
198+
// message: storage/message/<sessionID>/<messageID>.json
199+
if (categories.has("message")) {
200+
const swept = await sweepStoragePrefix("message", ensureSessionIDs(), 1)
201+
if (swept > 0) {
202+
log.info("swept orphaned message files", { count: swept })
203+
totalSwept += swept
204+
}
205+
}
206+
207+
// part: storage/part/<messageID>/<partID>.json
208+
if (categories.has("part")) {
209+
const swept = await sweepStoragePrefix("part", ensureMessageIDs(), 1)
210+
if (swept > 0) {
211+
log.info("swept orphaned part files", { count: swept })
212+
totalSwept += swept
213+
}
214+
}
215+
216+
// session: storage/session/<projectID>/<sessionID>.json
217+
if (categories.has("session")) {
218+
const swept = await sweepStoragePrefix("session", ensureSessionIDs(), 2)
219+
if (swept > 0) {
220+
log.info("swept orphaned session files", { count: swept })
221+
totalSwept += swept
222+
}
223+
}
224+
225+
// project: storage/project/<projectID>.json
226+
if (categories.has("project")) {
227+
const swept = await sweepStoragePrefix("project", ensureProjectIDs(), 1)
228+
if (swept > 0) {
229+
log.info("swept orphaned project files", { count: swept })
230+
totalSwept += swept
231+
}
232+
}
233+
234+
// snapshot: ~/.local/share/opencode/snapshot/<projectID>/
235+
if (categories.has("snapshot")) {
236+
const snapshotDir = path.join(Global.Path.data, "snapshot")
237+
let swept = 0
238+
try {
239+
const pids = ensureProjectIDs()
240+
const entries = await fs.readdir(snapshotDir)
241+
for (const entry of entries) {
242+
if (pids.has(entry)) continue
243+
const full = path.join(snapshotDir, entry)
244+
const stat = await fs.stat(full).catch(() => null)
245+
if (!stat?.isDirectory()) continue
246+
await fs.rm(full, { recursive: true, force: true }).catch(() => {})
247+
swept++
248+
}
249+
} catch {
250+
// snapshot dir may not exist — that's fine
251+
}
252+
if (swept > 0) {
253+
log.info("swept orphaned snapshot dirs", { count: swept })
254+
totalSwept += swept
255+
}
256+
}
257+
258+
// Prune empty subdirectories across all storage categories
259+
await pruneEmptyDirs(storageDir)
260+
261+
if (totalSwept > 0) {
262+
log.info("orphan sweep complete", { total: totalSwept })
263+
}
264+
return totalSwept
265+
}
266+
267+
async function vacuum(config: { enabled?: boolean } | undefined) {
268+
if (config?.enabled === false) return
269+
const start = Date.now()
270+
const client = Database.Client().$client
271+
client.run("PRAGMA wal_checkpoint(TRUNCATE)")
272+
client.run("VACUUM")
273+
log.info("vacuum complete", { duration: Date.now() - start })
274+
}
275+
}

packages/opencode/src/config/config.ts

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,44 @@ export namespace Config {
11471147
.describe("Token buffer for compaction. Leaves enough window to avoid overflow during compaction."),
11481148
})
11491149
.optional(),
1150+
cleanup: z
1151+
.object({
1152+
enabled: z.boolean().optional().describe("Enable cleanup on startup (default: true)"),
1153+
log: z
1154+
.object({
1155+
max_count: z
1156+
.number()
1157+
.int()
1158+
.min(1)
1159+
.optional()
1160+
.describe("Maximum log files to retain (default: 10)"),
1161+
})
1162+
.optional(),
1163+
session: z
1164+
.object({
1165+
max_age_days: z
1166+
.number()
1167+
.int()
1168+
.min(1)
1169+
.optional()
1170+
.describe("Delete sessions older than N days (disabled by default)"),
1171+
target: z
1172+
.enum(["archived", "all"])
1173+
.optional()
1174+
.describe("Which sessions to consider for cleanup (default: archived)"),
1175+
})
1176+
.optional(),
1177+
storage: z
1178+
.array(z.enum(["session", "session_diff", "message", "part", "todo", "project", "snapshot"]))
1179+
.optional()
1180+
.describe("Storage categories to sweep for orphaned files (default: all)"),
1181+
vacuum: z
1182+
.object({
1183+
enabled: z.boolean().optional().describe("Run VACUUM on startup (default: true)"),
1184+
})
1185+
.optional(),
1186+
})
1187+
.optional(),
11501188
experimental: z
11511189
.object({
11521190
disable_paste_summary: z.boolean().optional(),

packages/opencode/src/index.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ import path from "path"
3333
import { Global } from "./global"
3434
import { JsonMigration } from "./storage/json-migration"
3535
import { Database } from "./storage/db"
36+
import { Config } from "./config/config"
37+
import { Cleanup } from "./cleanup"
3638

3739
process.on("unhandledRejection", (e) => {
3840
Log.Default.error("rejection", {
@@ -69,9 +71,12 @@ let cli = yargs(hideBin(process.argv))
6971
choices: ["DEBUG", "INFO", "WARN", "ERROR"],
7072
})
7173
.middleware(async (opts) => {
74+
const globalConfig = await Config.global()
75+
7276
await Log.init({
7377
print: process.argv.includes("--print-logs"),
7478
dev: Installation.isLocal(),
79+
maxLogFiles: globalConfig?.cleanup?.log?.max_count,
7580
level: (() => {
7681
if (opts.logLevel) return opts.logLevel as Log.Level
7782
if (Installation.isLocal()) return "DEBUG"
@@ -124,6 +129,8 @@ let cli = yargs(hideBin(process.argv))
124129
}
125130
process.stderr.write("Database migration complete." + EOL)
126131
}
132+
133+
Cleanup.run(globalConfig?.cleanup)
127134
})
128135
.usage("\n" + UI.logo())
129136
.completion("completion", "generate shell completion script")

packages/opencode/src/storage/storage.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,7 @@ export namespace Storage {
210210
const result = await Glob.scan("**/*", {
211211
cwd: path.join(dir, ...prefix),
212212
include: "file",
213-
}).then((results) => results.map((x) => [...prefix, ...x.slice(0, -5).split(path.sep)]))
213+
}).then((results) => results.map((x) => [...prefix, ...x.slice(0, -5).split("/")]))
214214
result.sort()
215215
return result
216216
} catch {

packages/opencode/src/util/log.ts

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ export namespace Log {
4646
print: boolean
4747
dev?: boolean
4848
level?: Level
49+
maxLogFiles?: number
4950
}
5051

5152
let logpath = ""
@@ -59,7 +60,7 @@ export namespace Log {
5960

6061
export async function init(options: Options) {
6162
if (options.level) level = options.level
62-
cleanup(Global.Path.log)
63+
cleanup(Global.Path.log, options.maxLogFiles)
6364
if (options.print) return
6465
logpath = path.join(
6566
Global.Path.log,
@@ -77,15 +78,15 @@ export namespace Log {
7778
}
7879
}
7980

80-
async function cleanup(dir: string) {
81+
async function cleanup(dir: string, maxCount = 10) {
8182
const files = await Glob.scan("????-??-??T??????.log", {
8283
cwd: dir,
8384
absolute: true,
8485
include: "file",
8586
})
86-
if (files.length <= 5) return
87-
88-
const filesToDelete = files.slice(0, -10)
87+
if (files.length <= maxCount) return
88+
files.sort()
89+
const filesToDelete = files.slice(0, -maxCount)
8990
await Promise.all(filesToDelete.map((file) => fs.unlink(file).catch(() => {})))
9091
}
9192

0 commit comments

Comments
 (0)