remove deprecated smart-router extension
This commit is contained in:
@@ -1,264 +0,0 @@
|
|||||||
import type {
|
|
||||||
ExtensionAPI,
|
|
||||||
ExtensionContext,
|
|
||||||
BeforeAgentStartEvent,
|
|
||||||
BeforeAgentStartEventResult,
|
|
||||||
ExtensionCommandContext,
|
|
||||||
} from "@earendil-works/pi-coding-agent";
|
|
||||||
import { compress } from "headroom-ai";
|
|
||||||
|
|
||||||
// Global state for manual model lock
|
|
||||||
let isLocked = false;
|
|
||||||
let lockedModel: any = null;
|
|
||||||
|
|
||||||
// Tags that trigger Headroom context compression.
|
|
||||||
// read / discuss / search skip compression entirely.
|
|
||||||
const COMPRESS_TAGS = new Set([
|
|
||||||
"devops-low",
|
|
||||||
"devops-high",
|
|
||||||
"code-analysis-low",
|
|
||||||
"code-analysis-high",
|
|
||||||
"codewrite-low",
|
|
||||||
"codewrite-high",
|
|
||||||
]);
|
|
||||||
|
|
||||||
// Minimum message size (in chars) before compression activates.
|
|
||||||
// ~5K tokens ≈ 20K characters (rough 4:1 ratio).
|
|
||||||
const COMPRESS_MIN_CHARS = 20_000;
|
|
||||||
|
|
||||||
// Current routing tag for the active turn (used by context handler)
|
|
||||||
let currentTag: string | null = null;
|
|
||||||
|
|
||||||
// Model ID mappings for routing
|
|
||||||
// TEMPORARY: All routes set to owl-alpha to stop cost drain.
|
|
||||||
// Restore cost-aware routing by replacing owl-alpha with the models commented below.
|
|
||||||
//
|
|
||||||
// RESTORE MAP (uncomment and replace owl-alpha lines):
|
|
||||||
// "free-core": { provider: "openrouter", id: "free" }, // was: openrouter/free (keep)
|
|
||||||
// "router-eval": { provider: "openrouter", id: "free" }, // was: openrouter/free (keep)
|
|
||||||
// "economy-devops": { provider: "opencode-go", id: "deepseek/deepseek-v4-flash" }, // cheap
|
|
||||||
// "economy-code": { provider: "opencode-go", id: "deepseek/deepseek-v4-flash" }, // cheap
|
|
||||||
// "precision-devops": { provider: "opencode-go", id: "deepseek/deepseek-v4-flash" }, // cheap
|
|
||||||
// "precision-code-high":{ provider: "opencode-go", id: "deepseek/deepseek-v4-pro" }, // expensive, only for complex code
|
|
||||||
// "precision-react": { provider: "opencode-go", id: "deepseek/deepseek-v4-pro" }, // expensive, only for complex React
|
|
||||||
// "context-heavy": { provider: "openrouter", id: "free" }, // was: openrouter/free (keep)
|
|
||||||
//
|
|
||||||
// THINKING restore: precision-code-high and precision-react → "high", everything else → off
|
|
||||||
const MODELS: Record<string, { provider: string; id: string }> = {
|
|
||||||
"free-core": { provider: "openrouter", id: "owl-alpha" },
|
|
||||||
"router-eval": { provider: "openrouter", id: "owl-alpha" },
|
|
||||||
"economy-devops": { provider: "openrouter", id: "owl-alpha" },
|
|
||||||
"economy-code": { provider: "openrouter", id: "owl-alpha" },
|
|
||||||
"precision-devops": { provider: "openrouter", id: "owl-alpha" },
|
|
||||||
"precision-code-high":{ provider: "openrouter", id: "owl-alpha" },
|
|
||||||
"precision-react": { provider: "openrouter", id: "owl-alpha" },
|
|
||||||
"context-heavy": { provider: "openrouter", id: "owl-alpha" },
|
|
||||||
};
|
|
||||||
|
|
||||||
// TEMPORARY: All thinking disabled while using owl-alpha.
|
|
||||||
// Restore: precision-code-high and precision-react → "high", everything else → off.
|
|
||||||
const THINKING: Record<string, string> = {};
|
|
||||||
|
|
||||||
function getModel(ctx: ExtensionContext, key: string): any {
|
|
||||||
const mapping = MODELS[key];
|
|
||||||
if (!mapping) return undefined;
|
|
||||||
return ctx.modelRegistry.find(mapping.provider, mapping.id);
|
|
||||||
}
|
|
||||||
|
|
||||||
function modelLabel(key: string): string {
|
|
||||||
const m = MODELS[key];
|
|
||||||
if (!m) return key;
|
|
||||||
// Return just the model name for short display (e.g. "v4-flash", "v4-pro", "free")
|
|
||||||
const id = m.id.includes("/") ? m.id : `${m.provider}/${m.id}`;
|
|
||||||
const parts = id.split("/");
|
|
||||||
return parts[parts.length - 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
export default function (pi: ExtensionAPI) {
|
|
||||||
|
|
||||||
// 1. Register /lock-model command
|
|
||||||
pi.registerCommand("lock-model", {
|
|
||||||
description: "Lock to a specific model. Disables dynamic routing until /unlock-model.",
|
|
||||||
handler: async (args: string, ctx: ExtensionCommandContext) => {
|
|
||||||
const modelId = (args || "").trim();
|
|
||||||
if (!modelId) {
|
|
||||||
ctx.ui.notify(
|
|
||||||
"Usage: /lock-model <provider/model-id>\nExample: /lock-model openrouter/anthropic/claude-3.5-sonnet",
|
|
||||||
"error"
|
|
||||||
);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const slashIdx = modelId.indexOf("/");
|
|
||||||
if (slashIdx < 0) {
|
|
||||||
ctx.ui.notify("Invalid model ID format. Use: provider/model-id", "error");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const provider = modelId.substring(0, slashIdx);
|
|
||||||
const id = modelId.substring(slashIdx + 1);
|
|
||||||
let model = ctx.modelRegistry.find(provider, id);
|
|
||||||
if (!model) {
|
|
||||||
// OpenRouter model IDs include provider prefix (e.g. "openrouter/owl-alpha")
|
|
||||||
// so try with the full provider/model-id format
|
|
||||||
model = ctx.modelRegistry.find(provider, `${provider}/${id}`);
|
|
||||||
}
|
|
||||||
if (!model) {
|
|
||||||
ctx.ui.notify(`Model not found: ${modelId}`, "error");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
isLocked = true;
|
|
||||||
lockedModel = model;
|
|
||||||
pi.setModel(model);
|
|
||||||
ctx.ui.notify(`🔒 Router disabled. Model locked to: ${modelId}`, "info");
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
// 2. Register /unlock-model command
|
|
||||||
pi.registerCommand("unlock-model", {
|
|
||||||
description: "Re-enable dynamic prompt routing.",
|
|
||||||
handler: async (_args: string, ctx: ExtensionCommandContext) => {
|
|
||||||
isLocked = false;
|
|
||||||
lockedModel = null;
|
|
||||||
ctx.ui.notify("🔓 Router enabled. Prompts will be dynamically analyzed and routed.", "info");
|
|
||||||
},
|
|
||||||
});
|
|
||||||
|
|
||||||
// 3. Intercept prompts before agent starts
|
|
||||||
pi.on("before_agent_start", async (event: BeforeAgentStartEvent, ctx: ExtensionContext): Promise<BeforeAgentStartEventResult | undefined> => {
|
|
||||||
const prompt = event.prompt;
|
|
||||||
|
|
||||||
// Pass-through if user locked the model manually
|
|
||||||
if (isLocked && lockedModel) {
|
|
||||||
pi.setModel(lockedModel);
|
|
||||||
ctx.ui.setStatus("router", `🔒 ${lockedModel.id || "unknown"}`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip routing for short prompts / simple greetings
|
|
||||||
if (prompt.trim().length < 15) {
|
|
||||||
const model = getModel(ctx, "router-eval");
|
|
||||||
if (model) pi.setModel(model);
|
|
||||||
ctx.ui.setStatus("router", `⚡ ${modelLabel("router-eval")}`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Show analyzing indicator in status bar
|
|
||||||
ctx.ui.setStatus("router", "🤔 Analyzing...");
|
|
||||||
|
|
||||||
try {
|
|
||||||
const OPENROUTER_API_KEY = process.env["OPENROUTER_API_KEY"] || "";
|
|
||||||
|
|
||||||
const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
|
|
||||||
method: "POST",
|
|
||||||
headers: {
|
|
||||||
"Authorization": `Bearer ${OPENROUTER_API_KEY}`,
|
|
||||||
"Content-Type": "application/json",
|
|
||||||
},
|
|
||||||
body: JSON.stringify({
|
|
||||||
model: "openrouter/owl-alpha",
|
|
||||||
messages: [
|
|
||||||
{
|
|
||||||
role: "system",
|
|
||||||
content:
|
|
||||||
'You are a minimal command interceptor router. Analyze the prompt and output ONLY a minified JSON object containing the tag and language. No markdown, no conversation.\n\nValid Tags:\n- "read" (reading/ingesting logs, docs, read-only code)\n- "discuss" (general chat, architectural concepts)\n- "search" (requires web search/external lookups)\n- "devops-low" (editing yaml, single dockerfile, basic bash, env adjustments)\n- "devops-high" (complex multi-container network layers, server crashes)\n- "code-analysis-low" (finding a bug in a short file)\n- "code-analysis-high" (refactoring multiple complex files, tracking syntax errors)\n- "codewrite-low" (generating simple syntax boilerplate or standard functions)\n- "codewrite-high" (complex features, heavy React state, complex PHP architectures)\n\nRespond exactly like this: {"tag":"tag-name","lang":"react|php|python|rust|bash|none"}',
|
|
||||||
},
|
|
||||||
{ role: "user", content: prompt },
|
|
||||||
],
|
|
||||||
max_tokens: 35,
|
|
||||||
temperature: 0,
|
|
||||||
}),
|
|
||||||
});
|
|
||||||
|
|
||||||
const payload = await response.json();
|
|
||||||
const content = payload?.choices?.[0]?.message?.content?.trim();
|
|
||||||
if (!content) {
|
|
||||||
const fallback = getModel(ctx, "free-core");
|
|
||||||
if (fallback) pi.setModel(fallback);
|
|
||||||
ctx.ui.setStatus("router", `⚠️ ${modelLabel("free-core")}`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
const decision = JSON.parse(content);
|
|
||||||
const tag: string = decision.tag || "read";
|
|
||||||
const lang: string = decision.lang || "none";
|
|
||||||
|
|
||||||
// Deterministic routing tree
|
|
||||||
let modelKey: string;
|
|
||||||
switch (tag) {
|
|
||||||
case "read":
|
|
||||||
case "discuss":
|
|
||||||
case "search":
|
|
||||||
modelKey = "free-core";
|
|
||||||
break;
|
|
||||||
case "devops-low":
|
|
||||||
modelKey = "economy-devops";
|
|
||||||
break;
|
|
||||||
case "devops-high":
|
|
||||||
modelKey = "precision-devops";
|
|
||||||
break;
|
|
||||||
case "code-analysis-low":
|
|
||||||
modelKey = "free-core";
|
|
||||||
break;
|
|
||||||
case "code-analysis-high":
|
|
||||||
modelKey = "context-heavy";
|
|
||||||
break;
|
|
||||||
case "codewrite-low":
|
|
||||||
modelKey = "economy-code";
|
|
||||||
break;
|
|
||||||
case "codewrite-high":
|
|
||||||
modelKey = lang === "react" ? "precision-react" : "precision-code-high";
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
modelKey = "free-core";
|
|
||||||
}
|
|
||||||
|
|
||||||
const model = getModel(ctx, modelKey);
|
|
||||||
if (model) {
|
|
||||||
pi.setModel(model);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Set thinking level based on task complexity
|
|
||||||
const thinkingLevel = THINKING[modelKey];
|
|
||||||
if (thinkingLevel) {
|
|
||||||
pi.setThinkingLevel(thinkingLevel as any);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Store tag for compression check in context event
|
|
||||||
currentTag = tag;
|
|
||||||
|
|
||||||
// Show routing decision in status bar (shortened to avoid truncation)
|
|
||||||
ctx.ui.setStatus("router", `${modelLabel(modelKey)}`);
|
|
||||||
|
|
||||||
} catch (_error) {
|
|
||||||
// Fallback gracefully on network drops
|
|
||||||
const fallback = getModel(ctx, "free-core");
|
|
||||||
if (fallback) pi.setModel(fallback);
|
|
||||||
ctx.ui.setStatus("router", `⚠️ fallback ${modelLabel("free-core")}`);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
// 4. Compress large contexts before LLM turns (Headroom)
|
|
||||||
pi.on("context", async (event, ctx) => {
|
|
||||||
// Only compress for analysis/coding/devops tags
|
|
||||||
if (!currentTag || !COMPRESS_TAGS.has(currentTag)) return;
|
|
||||||
|
|
||||||
// Quick size check before calling the proxy
|
|
||||||
const totalChars = JSON.stringify(event.messages).length;
|
|
||||||
if (totalChars < COMPRESS_MIN_CHARS) return;
|
|
||||||
|
|
||||||
try {
|
|
||||||
const result = await compress(event.messages, {
|
|
||||||
baseUrl: "http://192.168.20.13:8787",
|
|
||||||
fallback: true,
|
|
||||||
timeout: 15_000,
|
|
||||||
});
|
|
||||||
if (result.messages && result.messages.length > 0) {
|
|
||||||
const saved = ((result.tokensBefore - result.tokensAfter) / result.tokensBefore * 100).toFixed(0);
|
|
||||||
ctx.ui.setStatus("compression", `📦${saved}%`);
|
|
||||||
return { messages: result.messages };
|
|
||||||
}
|
|
||||||
} catch {
|
|
||||||
// Proxy down — pass through (fallback: true already handles transport errors)
|
|
||||||
ctx.ui.setStatus("compression", "⚠️");
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
45
extensions/smart-router/package-lock.json
generated
45
extensions/smart-router/package-lock.json
generated
@@ -1,45 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "smart-router",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"lockfileVersion": 3,
|
|
||||||
"requires": true,
|
|
||||||
"packages": {
|
|
||||||
"": {
|
|
||||||
"name": "smart-router",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"license": "ISC",
|
|
||||||
"dependencies": {
|
|
||||||
"headroom-ai": "^0.22.4"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"node_modules/headroom-ai": {
|
|
||||||
"version": "0.22.4",
|
|
||||||
"resolved": "https://registry.npmjs.org/headroom-ai/-/headroom-ai-0.22.4.tgz",
|
|
||||||
"integrity": "sha512-9a0rgB/jsWe8gs/ggyUwe6E8DYwKAuBvlUml2ApwlUjb5EfJ611X6X+WG0SiXw3nO6sdyV1/+Ah5uw9P7ecnjw==",
|
|
||||||
"license": "Apache-2.0",
|
|
||||||
"engines": {
|
|
||||||
"node": ">=18.0.0"
|
|
||||||
},
|
|
||||||
"peerDependencies": {
|
|
||||||
"@ai-sdk/provider": ">=1.0.0",
|
|
||||||
"@anthropic-ai/sdk": ">=0.30.0",
|
|
||||||
"ai": ">=6.0.0",
|
|
||||||
"openai": ">=4.0.0"
|
|
||||||
},
|
|
||||||
"peerDependenciesMeta": {
|
|
||||||
"@ai-sdk/provider": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"@anthropic-ai/sdk": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"ai": {
|
|
||||||
"optional": true
|
|
||||||
},
|
|
||||||
"openai": {
|
|
||||||
"optional": true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
{
|
|
||||||
"name": "smart-router",
|
|
||||||
"version": "1.0.0",
|
|
||||||
"description": "",
|
|
||||||
"main": "index.js",
|
|
||||||
"scripts": {
|
|
||||||
"test": "echo \"Error: no test specified\" && exit 1"
|
|
||||||
},
|
|
||||||
"keywords": [],
|
|
||||||
"author": "",
|
|
||||||
"license": "ISC",
|
|
||||||
"type": "commonjs",
|
|
||||||
"dependencies": {
|
|
||||||
"headroom-ai": "^0.22.4"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Reference in New Issue
Block a user