fix: cost reduction - switch most routes to v4-flash/free, only use v4-pro + high thinking for genuinely complex code. Shorten footer status to avoid truncation.

This commit is contained in:
2026-06-10 21:29:40 +10:00
parent f6a4fc03b1
commit 0e3837dfc3

View File

@@ -30,22 +30,21 @@ const COMPRESS_MIN_CHARS = 20_000;
let currentTag: string | null = null;
// Model ID mappings for routing
// Cost-conscious: flash/economy for most tasks, Pro only for genuinely complex work
const MODELS: Record<string, { provider: string; id: string }> = {
"free-core": { provider: "openrouter", id: "free" },
"router-eval": { provider: "openrouter", id: "free" },
"economy-devops": { provider: "opencode-go", id: "deepseek/deepseek-v4-flash" },
"economy-code": { provider: "opencode-go", id: "deepseek/deepseek-v4-pro" },
"precision-devops": { provider: "openrouter", id: "deepseek/deepseek-v4-pro" },
"precision-code-high":{ provider: "openrouter", id: "deepseek/deepseek-v4-pro" },
"precision-react": { provider: "openrouter", id: "deepseek/deepseek-v4-pro" },
"context-heavy": { provider: "openrouter", id: "moonshotai/kimi-k2.6" },
"economy-code": { provider: "opencode-go", id: "deepseek/deepseek-v4-flash" },
"precision-devops": { provider: "opencode-go", id: "deepseek/deepseek-v4-flash" },
"precision-code-high":{ provider: "opencode-go", id: "deepseek/deepseek-v4-pro" },
"precision-react": { provider: "opencode-go", id: "deepseek/deepseek-v4-pro" },
"context-heavy": { provider: "openrouter", id: "free" },
};
// Thinking level mappings for routing (per tag complexity)
// Keys not listed keep the current default thinking level.
// Thinking level mappings — only set for tasks that truly benefit.
// Everything else keeps the default (off) for speed and cost.
const THINKING: Record<string, string> = {
"economy-code": "low",
"precision-devops": "medium",
"precision-code-high":"high",
"precision-react": "high",
};
@@ -59,9 +58,10 @@ function getModel(ctx: ExtensionContext, key: string): any {
function modelLabel(key: string): string {
const m = MODELS[key];
if (!m) return key;
// Model IDs already include provider prefix (e.g. "openrouter/owl-alpha")
// so just use the ID directly to avoid double-prefixing
return m.id.includes("/") ? m.id : `${m.provider}/${m.id}`;
// Return just the model name for short display (e.g. "v4-flash", "v4-pro", "free")
const id = m.id.includes("/") ? m.id : `${m.provider}/${m.id}`;
const parts = id.split("/");
return parts[parts.length - 1];
}
export default function (pi: ExtensionAPI) {
@@ -163,7 +163,7 @@ export default function (pi: ExtensionAPI) {
if (!content) {
const fallback = getModel(ctx, "free-core");
if (fallback) pi.setModel(fallback);
ctx.ui.setStatus("router", `⚠️ fallback ${modelLabel("free-core")}`);
ctx.ui.setStatus("router", `⚠️ ${modelLabel("free-core")}`);
return;
}
@@ -215,8 +215,8 @@ export default function (pi: ExtensionAPI) {
// Store tag for compression check in context event
currentTag = tag;
// Show routing decision in status bar
ctx.ui.setStatus("router", `🎯 ${tag}${modelLabel(modelKey)}`);
// Show routing decision in status bar (shortened to avoid truncation)
ctx.ui.setStatus("router", `${modelLabel(modelKey)}`);
} catch (_error) {
// Fallback gracefully on network drops
@@ -248,7 +248,7 @@ export default function (pi: ExtensionAPI) {
}
} catch {
// Proxy down — pass through (fallback: true already handles transport errors)
ctx.ui.setStatus("compression", "⚠️ offline");
ctx.ui.setStatus("compression", "⚠️");
}
});
}