feat: add router-status output to smart-router extension
This commit is contained in:
207
extensions/smart-router/index.ts
Normal file
207
extensions/smart-router/index.ts
Normal file
@@ -0,0 +1,207 @@
|
|||||||
|
import type {
|
||||||
|
ExtensionAPI,
|
||||||
|
ExtensionContext,
|
||||||
|
BeforeAgentStartEvent,
|
||||||
|
BeforeAgentStartEventResult,
|
||||||
|
ExtensionCommandContext,
|
||||||
|
} from "@earendil-works/pi-coding-agent";
|
||||||
|
|
||||||
|
// Global state for manual model lock
|
||||||
|
let isLocked = false;
|
||||||
|
let lockedModel: any = null;
|
||||||
|
|
||||||
|
// Model ID mappings for routing
|
||||||
|
const MODELS: Record<string, { provider: string; id: string }> = {
|
||||||
|
"free-core": { provider: "openrouter", id: "openrouter/owl-alpha" },
|
||||||
|
"router-eval": { provider: "openrouter", id: "openrouter/owl-alpha" },
|
||||||
|
"economy-devops": { provider: "openrouter", id: "qwen/qwen3.6-flash" },
|
||||||
|
"economy-code": { provider: "openrouter", id: "deepseek/deepseek-v4-flash" },
|
||||||
|
"precision-devops": { provider: "openrouter", id: "qwen/qwen-2.5-72b-instruct" },
|
||||||
|
"precision-code-high":{ provider: "openrouter", id: "deepseek/deepseek-v4-pro" },
|
||||||
|
"precision-react": { provider: "openrouter", id: "qwen/qwen3-coder-plus" },
|
||||||
|
"context-heavy": { provider: "openrouter", id: "moonshotai/kimi-k2.6" },
|
||||||
|
};
|
||||||
|
|
||||||
|
function getModel(ctx: ExtensionContext, key: string): any {
|
||||||
|
const mapping = MODELS[key];
|
||||||
|
if (!mapping) return undefined;
|
||||||
|
return ctx.modelRegistry.find(mapping.provider, mapping.id);
|
||||||
|
}
|
||||||
|
|
||||||
|
function modelLabel(key: string): string {
|
||||||
|
const m = MODELS[key];
|
||||||
|
return m ? `${m.provider}/${m.id}` : key;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default function (pi: ExtensionAPI) {
|
||||||
|
|
||||||
|
// 1. Register /lock-model command
|
||||||
|
pi.registerCommand("lock-model", {
|
||||||
|
description: "Lock to a specific model. Disables dynamic routing until /unlock-model.",
|
||||||
|
handler: async (args: string, ctx: ExtensionCommandContext) => {
|
||||||
|
const modelId = (args || "").trim();
|
||||||
|
if (!modelId) {
|
||||||
|
ctx.ui.notify(
|
||||||
|
"Usage: /lock-model <provider/model-id>\nExample: /lock-model openrouter/anthropic/claude-3.5-sonnet",
|
||||||
|
"error"
|
||||||
|
);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const slashIdx = modelId.indexOf("/");
|
||||||
|
if (slashIdx < 0) {
|
||||||
|
ctx.ui.notify("Invalid model ID format. Use: provider/model-id", "error");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const provider = modelId.substring(0, slashIdx);
|
||||||
|
const id = modelId.substring(slashIdx + 1);
|
||||||
|
const model = ctx.modelRegistry.find(provider, id);
|
||||||
|
if (!model) {
|
||||||
|
ctx.ui.notify(`Model not found: ${modelId}`, "error");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
isLocked = true;
|
||||||
|
lockedModel = model;
|
||||||
|
pi.setModel(model);
|
||||||
|
ctx.ui.notify(`🔒 Router disabled. Model locked to: ${modelId}`, "info");
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// 2. Register /unlock-model command
|
||||||
|
pi.registerCommand("unlock-model", {
|
||||||
|
description: "Re-enable dynamic prompt routing.",
|
||||||
|
handler: async (_args: string, ctx: ExtensionCommandContext) => {
|
||||||
|
isLocked = false;
|
||||||
|
lockedModel = null;
|
||||||
|
ctx.ui.notify("🔓 Router enabled. Prompts will be dynamically analyzed and routed.", "info");
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// 3. Intercept prompts before agent starts
|
||||||
|
pi.on("before_agent_start", async (event: BeforeAgentStartEvent, ctx: ExtensionContext): Promise<BeforeAgentStartEventResult | undefined> => {
|
||||||
|
const prompt = event.prompt;
|
||||||
|
|
||||||
|
// Pass-through if user locked the model manually
|
||||||
|
if (isLocked && lockedModel) {
|
||||||
|
pi.setModel(lockedModel);
|
||||||
|
pi.sendMessage({
|
||||||
|
customType: "router-status",
|
||||||
|
content: `🔒 Locked → ${lockedModel.id || "unknown"}`,
|
||||||
|
display: false,
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip routing for short prompts / simple greetings
|
||||||
|
if (prompt.trim().length < 15) {
|
||||||
|
const model = getModel(ctx, "router-eval");
|
||||||
|
if (model) pi.setModel(model);
|
||||||
|
pi.sendMessage({
|
||||||
|
customType: "router-status",
|
||||||
|
content: `⚡ Short prompt → ${modelLabel("router-eval")}`,
|
||||||
|
display: false,
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show analyzing indicator
|
||||||
|
pi.sendMessage({
|
||||||
|
customType: "router-status",
|
||||||
|
content: "🤔 Analyzing prompt intent...",
|
||||||
|
display: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
const OPENROUTER_API_KEY = process.env["OPENROUTER_API_KEY"] || "";
|
||||||
|
|
||||||
|
const response = await fetch("https://openrouter.ai/api/v1/chat/completions", {
|
||||||
|
method: "POST",
|
||||||
|
headers: {
|
||||||
|
"Authorization": `Bearer ${OPENROUTER_API_KEY}`,
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
},
|
||||||
|
body: JSON.stringify({
|
||||||
|
model: "openrouter/owl-alpha",
|
||||||
|
messages: [
|
||||||
|
{
|
||||||
|
role: "system",
|
||||||
|
content:
|
||||||
|
'You are a minimal command interceptor router. Analyze the prompt and output ONLY a minified JSON object containing the tag and language. No markdown, no conversation.\n\nValid Tags:\n- "read" (reading/ingesting logs, docs, read-only code)\n- "discuss" (general chat, architectural concepts)\n- "search" (requires web search/external lookups)\n- "devops-low" (editing yaml, single dockerfile, basic bash, env adjustments)\n- "devops-high" (complex multi-container network layers, server crashes)\n- "code-analysis-low" (finding a bug in a short file)\n- "code-analysis-high" (refactoring multiple complex files, tracking syntax errors)\n- "codewrite-low" (generating simple syntax boilerplate or standard functions)\n- "codewrite-high" (complex features, heavy React state, complex PHP architectures)\n\nRespond exactly like this: {"tag":"tag-name","lang":"react|php|python|rust|bash|none"}',
|
||||||
|
},
|
||||||
|
{ role: "user", content: prompt },
|
||||||
|
],
|
||||||
|
max_tokens: 35,
|
||||||
|
temperature: 0,
|
||||||
|
}),
|
||||||
|
});
|
||||||
|
|
||||||
|
const payload = await response.json();
|
||||||
|
const content = payload?.choices?.[0]?.message?.content?.trim();
|
||||||
|
if (!content) {
|
||||||
|
const fallback = getModel(ctx, "free-core");
|
||||||
|
if (fallback) pi.setModel(fallback);
|
||||||
|
pi.sendMessage({
|
||||||
|
customType: "router-status",
|
||||||
|
content: `⚠️ Analysis failed → fallback ${modelLabel("free-core")}`,
|
||||||
|
display: false,
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const decision = JSON.parse(content);
|
||||||
|
const tag: string = decision.tag || "read";
|
||||||
|
const lang: string = decision.lang || "none";
|
||||||
|
|
||||||
|
// Deterministic routing tree
|
||||||
|
let modelKey: string;
|
||||||
|
switch (tag) {
|
||||||
|
case "read":
|
||||||
|
case "discuss":
|
||||||
|
case "search":
|
||||||
|
modelKey = "free-core";
|
||||||
|
break;
|
||||||
|
case "devops-low":
|
||||||
|
modelKey = "economy-devops";
|
||||||
|
break;
|
||||||
|
case "devops-high":
|
||||||
|
modelKey = "precision-devops";
|
||||||
|
break;
|
||||||
|
case "code-analysis-low":
|
||||||
|
modelKey = "free-core";
|
||||||
|
break;
|
||||||
|
case "code-analysis-high":
|
||||||
|
modelKey = "context-heavy";
|
||||||
|
break;
|
||||||
|
case "codewrite-low":
|
||||||
|
modelKey = "economy-code";
|
||||||
|
break;
|
||||||
|
case "codewrite-high":
|
||||||
|
modelKey = lang === "react" ? "precision-react" : "precision-code-high";
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
modelKey = "free-core";
|
||||||
|
}
|
||||||
|
|
||||||
|
const model = getModel(ctx, modelKey);
|
||||||
|
if (model) {
|
||||||
|
pi.setModel(model);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Show routing decision
|
||||||
|
pi.sendMessage({
|
||||||
|
customType: "router-status",
|
||||||
|
content: `🎯 ${tag} (${lang}) → ${modelLabel(modelKey)}`,
|
||||||
|
display: false,
|
||||||
|
});
|
||||||
|
|
||||||
|
} catch (_error) {
|
||||||
|
// Fallback gracefully on network drops
|
||||||
|
const fallback = getModel(ctx, "free-core");
|
||||||
|
if (fallback) pi.setModel(fallback);
|
||||||
|
pi.sendMessage({
|
||||||
|
customType: "router-status",
|
||||||
|
content: `⚠️ Error → fallback ${modelLabel("free-core")}`,
|
||||||
|
display: false,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user