From cdd121cd62ae967d8d53763404c2eb8bcde9dea2 Mon Sep 17 00:00:00 2001 From: Sam Rolfe Date: Mon, 8 Jun 2026 18:52:30 +1000 Subject: [PATCH] feat: add router-status output to smart-router extension --- extensions/smart-router/index.ts | 207 +++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 extensions/smart-router/index.ts diff --git a/extensions/smart-router/index.ts b/extensions/smart-router/index.ts new file mode 100644 index 0000000..a50812f --- /dev/null +++ b/extensions/smart-router/index.ts @@ -0,0 +1,207 @@ +import type { + ExtensionAPI, + ExtensionContext, + BeforeAgentStartEvent, + BeforeAgentStartEventResult, + ExtensionCommandContext, +} from "@earendil-works/pi-coding-agent"; + +// Global state for manual model lock +let isLocked = false; +let lockedModel: any = null; + +// Model ID mappings for routing +const MODELS: Record = { + "free-core": { provider: "openrouter", id: "openrouter/owl-alpha" }, + "router-eval": { provider: "openrouter", id: "openrouter/owl-alpha" }, + "economy-devops": { provider: "openrouter", id: "qwen/qwen3.6-flash" }, + "economy-code": { provider: "openrouter", id: "deepseek/deepseek-v4-flash" }, + "precision-devops": { provider: "openrouter", id: "qwen/qwen-2.5-72b-instruct" }, + "precision-code-high":{ provider: "openrouter", id: "deepseek/deepseek-v4-pro" }, + "precision-react": { provider: "openrouter", id: "qwen/qwen3-coder-plus" }, + "context-heavy": { provider: "openrouter", id: "moonshotai/kimi-k2.6" }, +}; + +function getModel(ctx: ExtensionContext, key: string): any { + const mapping = MODELS[key]; + if (!mapping) return undefined; + return ctx.modelRegistry.find(mapping.provider, mapping.id); +} + +function modelLabel(key: string): string { + const m = MODELS[key]; + return m ? `${m.provider}/${m.id}` : key; +} + +export default function (pi: ExtensionAPI) { + + // 1. Register /lock-model command + pi.registerCommand("lock-model", { + description: "Lock to a specific model. Disables dynamic routing until /unlock-model.", + handler: async (args: string, ctx: ExtensionCommandContext) => { + const modelId = (args || "").trim(); + if (!modelId) { + ctx.ui.notify( + "Usage: /lock-model \nExample: /lock-model openrouter/anthropic/claude-3.5-sonnet", + "error" + ); + return; + } + const slashIdx = modelId.indexOf("/"); + if (slashIdx < 0) { + ctx.ui.notify("Invalid model ID format. Use: provider/model-id", "error"); + return; + } + const provider = modelId.substring(0, slashIdx); + const id = modelId.substring(slashIdx + 1); + const model = ctx.modelRegistry.find(provider, id); + if (!model) { + ctx.ui.notify(`Model not found: ${modelId}`, "error"); + return; + } + isLocked = true; + lockedModel = model; + pi.setModel(model); + ctx.ui.notify(`🔒 Router disabled. Model locked to: ${modelId}`, "info"); + }, + }); + + // 2. Register /unlock-model command + pi.registerCommand("unlock-model", { + description: "Re-enable dynamic prompt routing.", + handler: async (_args: string, ctx: ExtensionCommandContext) => { + isLocked = false; + lockedModel = null; + ctx.ui.notify("🔓 Router enabled. Prompts will be dynamically analyzed and routed.", "info"); + }, + }); + + // 3. Intercept prompts before agent starts + pi.on("before_agent_start", async (event: BeforeAgentStartEvent, ctx: ExtensionContext): Promise => { + const prompt = event.prompt; + + // Pass-through if user locked the model manually + if (isLocked && lockedModel) { + pi.setModel(lockedModel); + pi.sendMessage({ + customType: "router-status", + content: `🔒 Locked → ${lockedModel.id || "unknown"}`, + display: false, + }); + return; + } + + // Skip routing for short prompts / simple greetings + if (prompt.trim().length < 15) { + const model = getModel(ctx, "router-eval"); + if (model) pi.setModel(model); + pi.sendMessage({ + customType: "router-status", + content: `⚡ Short prompt → ${modelLabel("router-eval")}`, + display: false, + }); + return; + } + + // Show analyzing indicator + pi.sendMessage({ + customType: "router-status", + content: "🤔 Analyzing prompt intent...", + display: false, + }); + + try { + const OPENROUTER_API_KEY = process.env["OPENROUTER_API_KEY"] || ""; + + const response = await fetch("https://openrouter.ai/api/v1/chat/completions", { + method: "POST", + headers: { + "Authorization": `Bearer ${OPENROUTER_API_KEY}`, + "Content-Type": "application/json", + }, + body: JSON.stringify({ + model: "openrouter/owl-alpha", + messages: [ + { + role: "system", + content: + 'You are a minimal command interceptor router. Analyze the prompt and output ONLY a minified JSON object containing the tag and language. No markdown, no conversation.\n\nValid Tags:\n- "read" (reading/ingesting logs, docs, read-only code)\n- "discuss" (general chat, architectural concepts)\n- "search" (requires web search/external lookups)\n- "devops-low" (editing yaml, single dockerfile, basic bash, env adjustments)\n- "devops-high" (complex multi-container network layers, server crashes)\n- "code-analysis-low" (finding a bug in a short file)\n- "code-analysis-high" (refactoring multiple complex files, tracking syntax errors)\n- "codewrite-low" (generating simple syntax boilerplate or standard functions)\n- "codewrite-high" (complex features, heavy React state, complex PHP architectures)\n\nRespond exactly like this: {"tag":"tag-name","lang":"react|php|python|rust|bash|none"}', + }, + { role: "user", content: prompt }, + ], + max_tokens: 35, + temperature: 0, + }), + }); + + const payload = await response.json(); + const content = payload?.choices?.[0]?.message?.content?.trim(); + if (!content) { + const fallback = getModel(ctx, "free-core"); + if (fallback) pi.setModel(fallback); + pi.sendMessage({ + customType: "router-status", + content: `⚠️ Analysis failed → fallback ${modelLabel("free-core")}`, + display: false, + }); + return; + } + + const decision = JSON.parse(content); + const tag: string = decision.tag || "read"; + const lang: string = decision.lang || "none"; + + // Deterministic routing tree + let modelKey: string; + switch (tag) { + case "read": + case "discuss": + case "search": + modelKey = "free-core"; + break; + case "devops-low": + modelKey = "economy-devops"; + break; + case "devops-high": + modelKey = "precision-devops"; + break; + case "code-analysis-low": + modelKey = "free-core"; + break; + case "code-analysis-high": + modelKey = "context-heavy"; + break; + case "codewrite-low": + modelKey = "economy-code"; + break; + case "codewrite-high": + modelKey = lang === "react" ? "precision-react" : "precision-code-high"; + break; + default: + modelKey = "free-core"; + } + + const model = getModel(ctx, modelKey); + if (model) { + pi.setModel(model); + } + + // Show routing decision + pi.sendMessage({ + customType: "router-status", + content: `🎯 ${tag} (${lang}) → ${modelLabel(modelKey)}`, + display: false, + }); + + } catch (_error) { + // Fallback gracefully on network drops + const fallback = getModel(ctx, "free-core"); + if (fallback) pi.setModel(fallback); + pi.sendMessage({ + customType: "router-status", + content: `⚠️ Error → fallback ${modelLabel("free-core")}`, + display: false, + }); + } + }); +}