fix: cost reduction - switch most routes to v4-flash/free, only use v4-pro + high thinking for genuinely complex code. Shorten footer status to avoid truncation.
This commit is contained in:
@@ -30,22 +30,21 @@ const COMPRESS_MIN_CHARS = 20_000;
|
||||
let currentTag: string | null = null;
|
||||
|
||||
// Model ID mappings for routing
|
||||
// Cost-conscious: flash/economy for most tasks, Pro only for genuinely complex work
|
||||
const MODELS: Record<string, { provider: string; id: string }> = {
|
||||
"free-core": { provider: "openrouter", id: "free" },
|
||||
"router-eval": { provider: "openrouter", id: "free" },
|
||||
"economy-devops": { provider: "opencode-go", id: "deepseek/deepseek-v4-flash" },
|
||||
"economy-code": { provider: "opencode-go", id: "deepseek/deepseek-v4-pro" },
|
||||
"precision-devops": { provider: "openrouter", id: "deepseek/deepseek-v4-pro" },
|
||||
"precision-code-high":{ provider: "openrouter", id: "deepseek/deepseek-v4-pro" },
|
||||
"precision-react": { provider: "openrouter", id: "deepseek/deepseek-v4-pro" },
|
||||
"context-heavy": { provider: "openrouter", id: "moonshotai/kimi-k2.6" },
|
||||
"economy-code": { provider: "opencode-go", id: "deepseek/deepseek-v4-flash" },
|
||||
"precision-devops": { provider: "opencode-go", id: "deepseek/deepseek-v4-flash" },
|
||||
"precision-code-high":{ provider: "opencode-go", id: "deepseek/deepseek-v4-pro" },
|
||||
"precision-react": { provider: "opencode-go", id: "deepseek/deepseek-v4-pro" },
|
||||
"context-heavy": { provider: "openrouter", id: "free" },
|
||||
};
|
||||
|
||||
// Thinking level mappings for routing (per tag complexity)
|
||||
// Keys not listed keep the current default thinking level.
|
||||
// Thinking level mappings — only set for tasks that truly benefit.
|
||||
// Everything else keeps the default (off) for speed and cost.
|
||||
const THINKING: Record<string, string> = {
|
||||
"economy-code": "low",
|
||||
"precision-devops": "medium",
|
||||
"precision-code-high":"high",
|
||||
"precision-react": "high",
|
||||
};
|
||||
@@ -59,9 +58,10 @@ function getModel(ctx: ExtensionContext, key: string): any {
|
||||
function modelLabel(key: string): string {
|
||||
const m = MODELS[key];
|
||||
if (!m) return key;
|
||||
// Model IDs already include provider prefix (e.g. "openrouter/owl-alpha")
|
||||
// so just use the ID directly to avoid double-prefixing
|
||||
return m.id.includes("/") ? m.id : `${m.provider}/${m.id}`;
|
||||
// Return just the model name for short display (e.g. "v4-flash", "v4-pro", "free")
|
||||
const id = m.id.includes("/") ? m.id : `${m.provider}/${m.id}`;
|
||||
const parts = id.split("/");
|
||||
return parts[parts.length - 1];
|
||||
}
|
||||
|
||||
export default function (pi: ExtensionAPI) {
|
||||
@@ -163,7 +163,7 @@ export default function (pi: ExtensionAPI) {
|
||||
if (!content) {
|
||||
const fallback = getModel(ctx, "free-core");
|
||||
if (fallback) pi.setModel(fallback);
|
||||
ctx.ui.setStatus("router", `⚠️ fallback ${modelLabel("free-core")}`);
|
||||
ctx.ui.setStatus("router", `⚠️ ${modelLabel("free-core")}`);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -215,8 +215,8 @@ export default function (pi: ExtensionAPI) {
|
||||
// Store tag for compression check in context event
|
||||
currentTag = tag;
|
||||
|
||||
// Show routing decision in status bar
|
||||
ctx.ui.setStatus("router", `🎯 ${tag} → ${modelLabel(modelKey)}`);
|
||||
// Show routing decision in status bar (shortened to avoid truncation)
|
||||
ctx.ui.setStatus("router", `${modelLabel(modelKey)}`);
|
||||
|
||||
} catch (_error) {
|
||||
// Fallback gracefully on network drops
|
||||
@@ -243,12 +243,12 @@ export default function (pi: ExtensionAPI) {
|
||||
});
|
||||
if (result.messages && result.messages.length > 0) {
|
||||
const saved = ((result.tokensBefore - result.tokensAfter) / result.tokensBefore * 100).toFixed(0);
|
||||
ctx.ui.setStatus("compression", `📦 ${saved}%`);
|
||||
ctx.ui.setStatus("compression", `📦${saved}%`);
|
||||
return { messages: result.messages };
|
||||
}
|
||||
} catch {
|
||||
// Proxy down — pass through (fallback: true already handles transport errors)
|
||||
ctx.ui.setStatus("compression", "⚠️ offline");
|
||||
ctx.ui.setStatus("compression", "⚠️");
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user