feat: integrate Headroom compression - compress large analysis/code/devops contexts (≥5K tokens) via context event
This commit is contained in:
@@ -5,11 +5,30 @@ import type {
|
|||||||
BeforeAgentStartEventResult,
|
BeforeAgentStartEventResult,
|
||||||
ExtensionCommandContext,
|
ExtensionCommandContext,
|
||||||
} from "@earendil-works/pi-coding-agent";
|
} from "@earendil-works/pi-coding-agent";
|
||||||
|
import { compress } from "headroom-ai";
|
||||||
|
|
||||||
// Global state for manual model lock
|
// Global state for manual model lock
|
||||||
let isLocked = false;
|
let isLocked = false;
|
||||||
let lockedModel: any = null;
|
let lockedModel: any = null;
|
||||||
|
|
||||||
|
// Tags that trigger Headroom context compression.
|
||||||
|
// read / discuss / search skip compression entirely.
|
||||||
|
const COMPRESS_TAGS = new Set([
|
||||||
|
"devops-low",
|
||||||
|
"devops-high",
|
||||||
|
"code-analysis-low",
|
||||||
|
"code-analysis-high",
|
||||||
|
"codewrite-low",
|
||||||
|
"codewrite-high",
|
||||||
|
]);
|
||||||
|
|
||||||
|
// Minimum message size (in chars) before compression activates.
|
||||||
|
// ~5K tokens ≈ 20K characters (rough 4:1 ratio).
|
||||||
|
const COMPRESS_MIN_CHARS = 20_000;
|
||||||
|
|
||||||
|
// Current routing tag for the active turn (used by context handler)
|
||||||
|
let currentTag: string | null = null;
|
||||||
|
|
||||||
// Model ID mappings for routing
|
// Model ID mappings for routing
|
||||||
const MODELS: Record<string, { provider: string; id: string }> = {
|
const MODELS: Record<string, { provider: string; id: string }> = {
|
||||||
"free-core": { provider: "openrouter", id: "free" },
|
"free-core": { provider: "openrouter", id: "free" },
|
||||||
@@ -193,6 +212,9 @@ export default function (pi: ExtensionAPI) {
|
|||||||
pi.setThinkingLevel(thinkingLevel as any);
|
pi.setThinkingLevel(thinkingLevel as any);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Store tag for compression check in context event
|
||||||
|
currentTag = tag;
|
||||||
|
|
||||||
// Show routing decision in status bar
|
// Show routing decision in status bar
|
||||||
ctx.ui.setStatus("router", `🎯 ${tag} → ${modelLabel(modelKey)}`);
|
ctx.ui.setStatus("router", `🎯 ${tag} → ${modelLabel(modelKey)}`);
|
||||||
|
|
||||||
@@ -203,4 +225,30 @@ export default function (pi: ExtensionAPI) {
|
|||||||
ctx.ui.setStatus("router", `⚠️ fallback ${modelLabel("free-core")}`);
|
ctx.ui.setStatus("router", `⚠️ fallback ${modelLabel("free-core")}`);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
// 4. Compress large contexts before LLM turns (Headroom)
|
||||||
|
pi.on("context", async (event, ctx) => {
|
||||||
|
// Only compress for analysis/coding/devops tags
|
||||||
|
if (!currentTag || !COMPRESS_TAGS.has(currentTag)) return;
|
||||||
|
|
||||||
|
// Quick size check before calling the proxy
|
||||||
|
const totalChars = JSON.stringify(event.messages).length;
|
||||||
|
if (totalChars < COMPRESS_MIN_CHARS) return;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const result = await compress(event.messages, {
|
||||||
|
baseUrl: "http://localhost:8787",
|
||||||
|
fallback: true,
|
||||||
|
timeout: 15_000,
|
||||||
|
});
|
||||||
|
if (result.messages && result.messages.length > 0) {
|
||||||
|
const saved = ((result.tokensBefore - result.tokensAfter) / result.tokensBefore * 100).toFixed(0);
|
||||||
|
ctx.ui.setStatus("compression", `📦 ${saved}%`);
|
||||||
|
return { messages: result.messages };
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
// Proxy down — pass through (fallback: true already handles transport errors)
|
||||||
|
ctx.ui.setStatus("compression", "⚠️ offline");
|
||||||
|
}
|
||||||
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
45
extensions/smart-router/package-lock.json
generated
Normal file
45
extensions/smart-router/package-lock.json
generated
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
{
|
||||||
|
"name": "smart-router",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"lockfileVersion": 3,
|
||||||
|
"requires": true,
|
||||||
|
"packages": {
|
||||||
|
"": {
|
||||||
|
"name": "smart-router",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"license": "ISC",
|
||||||
|
"dependencies": {
|
||||||
|
"headroom-ai": "^0.22.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"node_modules/headroom-ai": {
|
||||||
|
"version": "0.22.4",
|
||||||
|
"resolved": "https://registry.npmjs.org/headroom-ai/-/headroom-ai-0.22.4.tgz",
|
||||||
|
"integrity": "sha512-9a0rgB/jsWe8gs/ggyUwe6E8DYwKAuBvlUml2ApwlUjb5EfJ611X6X+WG0SiXw3nO6sdyV1/+Ah5uw9P7ecnjw==",
|
||||||
|
"license": "Apache-2.0",
|
||||||
|
"engines": {
|
||||||
|
"node": ">=18.0.0"
|
||||||
|
},
|
||||||
|
"peerDependencies": {
|
||||||
|
"@ai-sdk/provider": ">=1.0.0",
|
||||||
|
"@anthropic-ai/sdk": ">=0.30.0",
|
||||||
|
"ai": ">=6.0.0",
|
||||||
|
"openai": ">=4.0.0"
|
||||||
|
},
|
||||||
|
"peerDependenciesMeta": {
|
||||||
|
"@ai-sdk/provider": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"@anthropic-ai/sdk": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"ai": {
|
||||||
|
"optional": true
|
||||||
|
},
|
||||||
|
"openai": {
|
||||||
|
"optional": true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
16
extensions/smart-router/package.json
Normal file
16
extensions/smart-router/package.json
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
{
|
||||||
|
"name": "smart-router",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "",
|
||||||
|
"main": "index.js",
|
||||||
|
"scripts": {
|
||||||
|
"test": "echo \"Error: no test specified\" && exit 1"
|
||||||
|
},
|
||||||
|
"keywords": [],
|
||||||
|
"author": "",
|
||||||
|
"license": "ISC",
|
||||||
|
"type": "commonjs",
|
||||||
|
"dependencies": {
|
||||||
|
"headroom-ai": "^0.22.4"
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user