-
-
Notifications
You must be signed in to change notification settings - Fork 10.8k
Expand file tree
/
Copy pathtracker.mjs
More file actions
451 lines (401 loc) · 18.8 KB
/
Copy pathtracker.mjs
File metadata and controls
451 lines (401 loc) · 18.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
#!/usr/bin/env node
/**
* tracker.mjs — SQLite derived index for the applications tracker (RFC #918, phase 1).
*
* data/applications.md stays the source of truth. The SQLite DB is a derived
* index, built and rebuilt from the markdown — safe to delete at any time, it
* regenerates on the next sync. Tools and agents READ through the index for
* schema-validated, model-independent results; all writes keep going to the
* markdown exactly as today (merge-tracker.mjs, hand edits).
*
* Why: at hundreds of rows, a markdown table degrades structurally — encoding
* corruption propagates, columns drift, a `|` inside a cell shifts every
* column after it, and agents grepping the table get model-dependent results.
* The index normalizes on sync (canonical statuses, repaired columns) so every
* query returns the same rows for every model on every CLI, and corruption is
* DETECTED at sync time instead of propagating silently.
*
* Phase 2 of #918 (DB becomes source of truth, markdown becomes a rendered
* view) is a separate, explicit per-user opt-in — not implemented here.
*
* Zero new dependencies — uses node:sqlite (built into Node >= 22.5).
*
* Usage:
* node tracker.mjs sync [--check] # (re)build applications.db from applications.md
* # --check: diagnose only, no write; exit 1 if issues found
* node tracker.mjs query [--status Applied] [--company acme] [--role designer]
* [--since 2026-01-01] [--id N] [--limit 20] [--json]
* node tracker.mjs history --id N # status transition log observed across syncs
* node tracker.mjs export [--out FILE] # inverse: applications.db → canonical markdown (stdout by default)
*
* query/history auto-resync when applications.md changed since the last sync,
* so the index can never serve stale reads.
*/
import { readFileSync, writeFileSync, copyFileSync, existsSync, mkdirSync, statSync } from 'fs';
import { createHash } from 'crypto';
import { dirname, resolve } from 'path';
import { pathToFileURL } from 'url';
import yaml from 'js-yaml';
const MD_PATH = process.env.CAREER_OPS_TRACKER || 'data/applications.md';
const DB_PATH = process.env.CAREER_OPS_TRACKER_DB
|| (MD_PATH.endsWith('.md') ? MD_PATH.slice(0, -3) + '.db' : MD_PATH + '.db');
// SQLite must never open the source of truth itself (an explicit
// CAREER_OPS_TRACKER_DB could point both names at the same file).
if (resolve(MD_PATH) === resolve(DB_PATH)) {
console.error(`Error: DB path must differ from the markdown path (${MD_PATH}).`);
process.exit(1);
}
const STATES_PATH = 'templates/states.yml';
const HEADER = '| # | Date | Company | Role | Score | Status | PDF | Report | Notes |';
const SEPARATOR = '|---|------|---------|------|-------|--------|-----|--------|-------|';
// ── node:sqlite loading ─────────────────────────────────────────────
async function loadSqlite() {
// node:sqlite is stable in behavior but still flagged experimental in some
// Node lines — silence only that one warning, leave everything else alone.
const origEmit = process.emitWarning;
process.emitWarning = (warning, ...args) => {
const text = typeof warning === 'string' ? warning : warning?.message || '';
if (text.includes('SQLite is an experimental feature')) return;
return origEmit.call(process, warning, ...args);
};
try {
const { DatabaseSync } = await import('node:sqlite');
return DatabaseSync;
} catch {
console.error('Error: node:sqlite is not available. tracker.mjs needs Node >= 22.5 (you are on ' + process.version + ').');
console.error('The markdown tracker keeps working without it — the index is optional.');
process.exit(1);
} finally {
process.emitWarning = origEmit; // the warning fires at import time — safe to restore here
}
}
function openDb(DatabaseSync) {
mkdirSync(dirname(DB_PATH) || '.', { recursive: true });
const db = new DatabaseSync(DB_PATH);
db.exec('PRAGMA foreign_keys = ON'); // SQLite ignores REFERENCES without this
db.exec(`
CREATE TABLE IF NOT EXISTS applications (
id INTEGER PRIMARY KEY,
pos INTEGER NOT NULL,
date TEXT NOT NULL,
company TEXT NOT NULL,
role TEXT NOT NULL,
score TEXT NOT NULL DEFAULT '—',
status TEXT NOT NULL,
pdf TEXT NOT NULL DEFAULT '❌',
report TEXT NOT NULL DEFAULT '—',
notes TEXT NOT NULL DEFAULT ''
);
CREATE TABLE IF NOT EXISTS status_events (
id INTEGER PRIMARY KEY AUTOINCREMENT,
app_id INTEGER NOT NULL REFERENCES applications(id),
status TEXT NOT NULL,
date TEXT NOT NULL
);
CREATE TABLE IF NOT EXISTS meta (
key TEXT PRIMARY KEY,
value TEXT NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_apps_status ON applications(status);
CREATE INDEX IF NOT EXISTS idx_apps_company ON applications(company);
CREATE INDEX IF NOT EXISTS idx_events_app ON status_events(app_id);
`);
return db;
}
// ── Canonical states (templates/states.yml is the source of truth) ──
function loadStates() {
if (!existsSync(STATES_PATH)) {
console.error(`Error: ${STATES_PATH} not found — cannot validate statuses. Run from the career-ops root.`);
process.exit(1);
}
const doc = yaml.load(readFileSync(STATES_PATH, 'utf-8'));
const byKey = new Map(); // lowercased label/alias → canonical label
const labels = [];
for (const s of doc?.states || []) {
if (!s?.label) continue;
labels.push(s.label);
byKey.set(s.label.toLowerCase(), s.label);
if (s.id) byKey.set(String(s.id).toLowerCase(), s.label);
for (const alias of s.aliases || []) byKey.set(String(alias).toLowerCase(), s.label);
}
return { byKey, labels };
}
// Strip markdown bold, trailing dates, and surrounding noise, then resolve
// against canonical labels/aliases. Returns the canonical label or null.
function normalizeStatus(raw, states) {
if (!raw) return null;
const cleaned = String(raw)
.replace(/\*\*/g, '')
.replace(/\(?\d{4}-\d{2}-\d{2}\)?/g, '')
.trim()
.toLowerCase();
return states.byKey.get(cleaned) || null;
}
const SCORE_RE = /^\*{0,2}(\d(?:\.\d)?\/5)\*{0,2}$/;
const DATE_RE = /^\d{4}-\d{2}-\d{2}$/;
// Mojibake left by a UTF-8 → GBK → UTF-8 round trip: an em-dash cell becomes
// "鈥?" / "鈥�" variants. Only short placeholder cells are repaired — free-text
// notes are preserved as-is rather than risk corrupting real content.
function repairPlaceholder(cell) {
if (/^鈥.{0,2}$/.test(cell) || cell === '�') return '—';
return cell;
}
// ── Markdown parsing ────────────────────────────────────────────────
function parseMarkdownRows(text, diag) {
const rows = [];
for (const line of text.split('\n')) {
if (!line.trim().startsWith('|')) continue;
let cells = line.trim().replace(/^\|/, '').replace(/\|$/, '').split('|').map(c => c.trim());
if (cells.length < 2) continue;
if (cells[0] === '#' || /^[-: ]*$/.test(cells.join(''))) continue; // header / separator
if (cells.length > 9) {
cells = [...cells.slice(0, 8), cells.slice(8).join(' | ')]; // stray pipes → notes
if (diag) diag.strayPipes++;
}
while (cells.length < 9) cells.push('');
rows.push(cells);
}
return rows;
}
// Parse + normalize the markdown into index-ready rows. The markdown itself is
// never modified — normalization lives only in the derived index, and the
// diagnostics tell the user what to fix at the source (normalize-statuses.mjs,
// dedup-tracker.mjs).
function parseTracker(states) {
const diag = { mojibake: 0, scoreInStatus: 0, unknownStatus: 0, badId: 0, badDate: 0, strayPipes: 0 };
const rows = parseMarkdownRows(readFileSync(MD_PATH, 'utf-8'), diag);
const usedIds = new Set();
let maxId = 0;
const apps = [];
for (const cells of rows) {
let [idRaw, date, company, role, score, status, pdf, report, notes] = cells;
const before = [score, pdf, report].join('|');
score = repairPlaceholder(score);
pdf = repairPlaceholder(pdf);
report = repairPlaceholder(report);
if ([score, pdf, report].join('|') !== before) diag.mojibake++;
// Score sitting in the status column (column drift)
const scoreInStatus = status.match(SCORE_RE);
if (scoreInStatus) {
if (!SCORE_RE.test(score)) score = scoreInStatus[1];
status = 'Evaluated';
diag.scoreInStatus++;
}
const canonical = normalizeStatus(status, states);
if (!canonical) {
notes = notes ? `${notes} [sync: original status "${status}"]` : `[sync: original status "${status}"]`;
status = 'Evaluated';
diag.unknownStatus++;
} else {
status = canonical;
}
let id = parseInt(idRaw, 10);
if (!Number.isInteger(id) || id <= 0 || usedIds.has(id)) {
id = 0; // assign after the pass, once maxId is known
diag.badId++;
} else {
usedIds.add(id);
if (id > maxId) maxId = id;
}
if (!DATE_RE.test(date)) diag.badDate++; // kept as-is — flagged, not destroyed
apps.push({ id, pos: apps.length, date, company, role, score: score || '—', status, pdf: pdf || '❌', report: report || '—', notes });
}
for (const app of apps) if (app.id === 0) app.id = ++maxId;
return { apps, diag };
}
function mdHash() {
return createHash('sha256').update(readFileSync(MD_PATH)).digest('hex');
}
// ── Sync (markdown → derived index) ─────────────────────────────────
function reportDiagnostics(diag) {
const total = Object.values(diag).reduce((a, b) => a + b, 0);
if (total === 0) {
console.error('No corruption detected — index matches the markdown cleanly.');
return 0;
}
console.error(`Corruption detected in ${MD_PATH} (normalized in the index only — the markdown is untouched):`);
if (diag.mojibake) console.error(` ${diag.mojibake} mojibake placeholder cell(s)`);
if (diag.scoreInStatus) console.error(` ${diag.scoreInStatus} score(s) sitting in the status column`);
if (diag.unknownStatus) console.error(` ${diag.unknownStatus} non-canonical status(es), indexed as Evaluated (original kept in notes)`);
if (diag.badId) console.error(` ${diag.badId} missing/duplicate id(s), reassigned in the index`);
if (diag.badDate) console.error(` ${diag.badDate} malformed date(s), kept as-is`);
if (diag.strayPipes) console.error(` ${diag.strayPipes} row(s) with stray pipes, folded into notes`);
console.error('Fix at the source with `node normalize-statuses.mjs` / `node dedup-tracker.mjs`, then re-sync.');
return total;
}
function syncIndex(db, states) {
const { apps, diag } = parseTracker(states);
const today = new Date().toISOString().slice(0, 10);
db.exec('BEGIN');
db.exec('PRAGMA defer_foreign_keys = ON'); // full rebuild — FKs settle at commit
try {
db.exec('DELETE FROM applications');
const insertApp = db.prepare('INSERT INTO applications (id, pos, date, company, role, score, status, pdf, report, notes) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)');
for (const a of apps) insertApp.run(a.id, a.pos, a.date, a.company, a.role, a.score, a.status, a.pdf, a.report, a.notes);
// Status history: events persist across rebuilds, keyed by id. An app whose
// status changed since the last sync gets a new event; rows that left the
// markdown lose their events (the index never outlives its source).
db.exec('DELETE FROM status_events WHERE app_id NOT IN (SELECT id FROM applications)');
const latestEvent = db.prepare('SELECT status FROM status_events WHERE app_id = ? ORDER BY id DESC LIMIT 1');
const insertEvent = db.prepare('INSERT INTO status_events (app_id, status, date) VALUES (?, ?, ?)');
for (const a of apps) {
const last = latestEvent.get(a.id);
if (!last) insertEvent.run(a.id, a.status, DATE_RE.test(a.date) ? a.date : today);
else if (last.status !== a.status) insertEvent.run(a.id, a.status, today);
}
db.prepare('INSERT INTO meta (key, value) VALUES (?, ?) ON CONFLICT(key) DO UPDATE SET value = excluded.value')
.run('md_sha256', mdHash());
db.exec('COMMIT');
} catch (err) {
db.exec('ROLLBACK');
throw err;
}
return { apps, diag };
}
async function sync(args) {
if (!existsSync(MD_PATH)) {
console.error(`Error: ${MD_PATH} not found — nothing to index.`);
process.exit(1);
}
const states = loadStates();
if (args.includes('--check')) {
const { apps, diag } = parseTracker(states);
console.error(`Parsed ${apps.length} data rows from ${MD_PATH}`);
const issues = reportDiagnostics(diag);
console.error('(--check — no index written)');
process.exit(issues > 0 ? 1 : 0);
}
const DatabaseSync = await loadSqlite();
const db = openDb(DatabaseSync);
const { apps, diag } = syncIndex(db, states);
console.error(`Indexed ${apps.length} applications from ${MD_PATH} into ${DB_PATH}`);
reportDiagnostics(diag);
}
// query/history must never serve stale reads: if the markdown changed since
// the last sync (or was never synced), rebuild the index first.
function ensureFresh(db, states) {
if (!existsSync(MD_PATH)) {
console.error(`Error: ${MD_PATH} not found — the index has no source of truth to read from.`);
process.exit(1);
}
const synced = db.prepare('SELECT value FROM meta WHERE key = ?').get('md_sha256');
if (synced && synced.value === mdHash()) return;
console.error(`(index stale — resyncing from ${MD_PATH})`);
syncIndex(db, states);
}
// ── Query helpers ───────────────────────────────────────────────────
function flagValue(args, flag) {
const idx = args.indexOf(flag);
if (idx !== -1 && args[idx + 1] !== undefined && !args[idx + 1].startsWith('--')) return args[idx + 1];
const kv = args.find(a => a.startsWith(flag + '='));
return kv ? kv.split('=').slice(1).join('=') : null;
}
function rowToMarkdown(r) {
const clean = (v) => String(v ?? '').replace(/\|/g, '│').replace(/\r?\n/g, ' ');
return `| ${r.id} | ${clean(r.date)} | ${clean(r.company)} | ${clean(r.role)} | ${clean(r.score)} | ${clean(r.status)} | ${clean(r.pdf)} | ${clean(r.report)} | ${clean(r.notes)} |`;
}
async function query(args) {
const DatabaseSync = await loadSqlite();
const db = openDb(DatabaseSync);
const states = loadStates();
ensureFresh(db, states);
const where = [];
const params = [];
const status = flagValue(args, '--status');
if (status) {
const canonical = normalizeStatus(status, states);
if (!canonical) { console.error(`Error: unknown status "${status}". Canonical: ${states.labels.join(', ')}`); process.exit(1); }
where.push('status = ?'); params.push(canonical);
}
const company = flagValue(args, '--company');
if (company) { where.push('company LIKE ?'); params.push(`%${company}%`); }
const role = flagValue(args, '--role');
if (role) { where.push('role LIKE ?'); params.push(`%${role}%`); }
const since = flagValue(args, '--since');
if (since) {
if (!DATE_RE.test(since)) { console.error('Error: --since must be YYYY-MM-DD'); process.exit(1); }
where.push('date >= ?'); params.push(since);
}
const id = flagValue(args, '--id');
if (id) { where.push('id = ?'); params.push(parseInt(id, 10)); }
let sql = 'SELECT id, date, company, role, score, status, pdf, report, notes FROM applications'
+ (where.length ? ' WHERE ' + where.join(' AND ') : '') + ' ORDER BY id DESC';
const limit = parseInt(flagValue(args, '--limit') || '0', 10);
if (limit > 0) { sql += ' LIMIT ?'; params.push(limit); }
const rows = db.prepare(sql).all(...params);
if (args.includes('--json')) {
console.log(JSON.stringify(rows, null, 2));
} else {
console.log(HEADER);
console.log(SEPARATOR);
for (const r of rows) console.log(rowToMarkdown(r));
console.error(`\n${rows.length} row(s)`); // stderr so stdout stays pipeable
}
}
async function history(args) {
const DatabaseSync = await loadSqlite();
const db = openDb(DatabaseSync);
ensureFresh(db, loadStates());
const id = parseInt(flagValue(args, '--id') || '', 10);
if (!Number.isInteger(id)) { console.error('Error: history requires --id N'); process.exit(1); }
const app = db.prepare('SELECT * FROM applications WHERE id = ?').get(id);
if (!app) { console.error(`Error: no application with id ${id}`); process.exit(1); }
console.log(`#${app.id} ${app.company} — ${app.role}`);
for (const e of db.prepare('SELECT status, date FROM status_events WHERE app_id = ? ORDER BY id').all(id)) {
console.log(` ${e.date} ${e.status}`);
}
}
// ── Export (index → canonical markdown) ─────────────────────────────
// The inverse of sync: regenerates the canonical table from the index. Used by
// the round-trip tests (md → db → md must be lossless for clean input), and as
// a repaired copy the user can review and adopt by hand. It never touches
// applications.md unless explicitly asked to via --out.
async function exportMd(args) {
const DatabaseSync = await loadSqlite();
const db = openDb(DatabaseSync);
ensureFresh(db, loadStates());
const rows = db.prepare('SELECT * FROM applications ORDER BY pos').all();
const out = [
'# Applications Tracker',
'',
HEADER,
SEPARATOR,
...rows.map(rowToMarkdown),
'',
].join('\n');
const outPath = flagValue(args, '--out');
if (!outPath) {
process.stdout.write(out);
return;
}
if (existsSync(outPath) && statSync(outPath).isDirectory()) {
console.error(`Error: --out ${outPath} is a directory — pass a file path.`);
process.exit(1);
}
mkdirSync(dirname(outPath) || '.', { recursive: true });
// Never silently clobber — whatever was there is backed up first.
if (existsSync(outPath)) {
copyFileSync(outPath, outPath + '.bak');
console.error(`Existing ${outPath} backed up to ${outPath}.bak`);
}
writeFileSync(outPath, out, 'utf-8');
console.error(`Exported ${rows.length} applications to ${outPath}`);
}
// ── Main ────────────────────────────────────────────────────────────
const COMMANDS = { sync, query, history, export: exportMd };
async function main() {
const [command, ...args] = process.argv.slice(2);
const fn = COMMANDS[command];
if (!fn) {
console.log('Usage: node tracker.mjs <sync|query|history|export> [flags]');
console.log('See the header comment of this file for examples, or docs/SCRIPTS.md.');
process.exit(command ? 1 : 0);
}
await fn(args);
}
if (import.meta.url === pathToFileURL(process.argv[1] || '').href) {
main().catch(err => {
console.error('Fatal:', err.message);
process.exit(1);
});
}