feat: integrate Headroom compression - compress large analysis/code/devops contexts (≥5K tokens) via context event
This commit is contained in:
@@ -5,11 +5,30 @@ import type {
|
||||
BeforeAgentStartEventResult,
|
||||
ExtensionCommandContext,
|
||||
} from "@earendil-works/pi-coding-agent";
|
||||
import { compress } from "headroom-ai";
|
||||
|
||||
// Global state for manual model lock
|
||||
let isLocked = false;
|
||||
let lockedModel: any = null;
|
||||
|
||||
// Tags that trigger Headroom context compression.
|
||||
// read / discuss / search skip compression entirely.
|
||||
const COMPRESS_TAGS = new Set([
|
||||
"devops-low",
|
||||
"devops-high",
|
||||
"code-analysis-low",
|
||||
"code-analysis-high",
|
||||
"codewrite-low",
|
||||
"codewrite-high",
|
||||
]);
|
||||
|
||||
// Minimum message size (in chars) before compression activates.
|
||||
// ~5K tokens ≈ 20K characters (rough 4:1 ratio).
|
||||
const COMPRESS_MIN_CHARS = 20_000;
|
||||
|
||||
// Current routing tag for the active turn (used by context handler)
|
||||
let currentTag: string | null = null;
|
||||
|
||||
// Model ID mappings for routing
|
||||
const MODELS: Record<string, { provider: string; id: string }> = {
|
||||
"free-core": { provider: "openrouter", id: "free" },
|
||||
@@ -193,6 +212,9 @@ export default function (pi: ExtensionAPI) {
|
||||
pi.setThinkingLevel(thinkingLevel as any);
|
||||
}
|
||||
|
||||
// Store tag for compression check in context event
|
||||
currentTag = tag;
|
||||
|
||||
// Show routing decision in status bar
|
||||
ctx.ui.setStatus("router", `🎯 ${tag} → ${modelLabel(modelKey)}`);
|
||||
|
||||
@@ -203,4 +225,30 @@ export default function (pi: ExtensionAPI) {
|
||||
ctx.ui.setStatus("router", `⚠️ fallback ${modelLabel("free-core")}`);
|
||||
}
|
||||
});
|
||||
|
||||
// 4. Compress large contexts before LLM turns (Headroom)
|
||||
pi.on("context", async (event, ctx) => {
|
||||
// Only compress for analysis/coding/devops tags
|
||||
if (!currentTag || !COMPRESS_TAGS.has(currentTag)) return;
|
||||
|
||||
// Quick size check before calling the proxy
|
||||
const totalChars = JSON.stringify(event.messages).length;
|
||||
if (totalChars < COMPRESS_MIN_CHARS) return;
|
||||
|
||||
try {
|
||||
const result = await compress(event.messages, {
|
||||
baseUrl: "http://localhost:8787",
|
||||
fallback: true,
|
||||
timeout: 15_000,
|
||||
});
|
||||
if (result.messages && result.messages.length > 0) {
|
||||
const saved = ((result.tokensBefore - result.tokensAfter) / result.tokensBefore * 100).toFixed(0);
|
||||
ctx.ui.setStatus("compression", `📦 ${saved}%`);
|
||||
return { messages: result.messages };
|
||||
}
|
||||
} catch {
|
||||
// Proxy down — pass through (fallback: true already handles transport errors)
|
||||
ctx.ui.setStatus("compression", "⚠️ offline");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user