fix: cost reduction - switch most routes to v4-flash/free, only use v4-pro + high thinking for genuinely complex code. Shorten footer status to avoid truncation.
This commit is contained in:
@@ -30,22 +30,21 @@ const COMPRESS_MIN_CHARS = 20_000;
|
|||||||
let currentTag: string | null = null;
|
let currentTag: string | null = null;
|
||||||
|
|
||||||
// Model ID mappings for routing
|
// Model ID mappings for routing
|
||||||
|
// Cost-conscious: flash/economy for most tasks, Pro only for genuinely complex work
|
||||||
const MODELS: Record<string, { provider: string; id: string }> = {
|
const MODELS: Record<string, { provider: string; id: string }> = {
|
||||||
"free-core": { provider: "openrouter", id: "free" },
|
"free-core": { provider: "openrouter", id: "free" },
|
||||||
"router-eval": { provider: "openrouter", id: "free" },
|
"router-eval": { provider: "openrouter", id: "free" },
|
||||||
"economy-devops": { provider: "opencode-go", id: "deepseek/deepseek-v4-flash" },
|
"economy-devops": { provider: "opencode-go", id: "deepseek/deepseek-v4-flash" },
|
||||||
"economy-code": { provider: "opencode-go", id: "deepseek/deepseek-v4-pro" },
|
"economy-code": { provider: "opencode-go", id: "deepseek/deepseek-v4-flash" },
|
||||||
"precision-devops": { provider: "openrouter", id: "deepseek/deepseek-v4-pro" },
|
"precision-devops": { provider: "opencode-go", id: "deepseek/deepseek-v4-flash" },
|
||||||
"precision-code-high":{ provider: "openrouter", id: "deepseek/deepseek-v4-pro" },
|
"precision-code-high":{ provider: "opencode-go", id: "deepseek/deepseek-v4-pro" },
|
||||||
"precision-react": { provider: "openrouter", id: "deepseek/deepseek-v4-pro" },
|
"precision-react": { provider: "opencode-go", id: "deepseek/deepseek-v4-pro" },
|
||||||
"context-heavy": { provider: "openrouter", id: "moonshotai/kimi-k2.6" },
|
"context-heavy": { provider: "openrouter", id: "free" },
|
||||||
};
|
};
|
||||||
|
|
||||||
// Thinking level mappings for routing (per tag complexity)
|
// Thinking level mappings — only set for tasks that truly benefit.
|
||||||
// Keys not listed keep the current default thinking level.
|
// Everything else keeps the default (off) for speed and cost.
|
||||||
const THINKING: Record<string, string> = {
|
const THINKING: Record<string, string> = {
|
||||||
"economy-code": "low",
|
|
||||||
"precision-devops": "medium",
|
|
||||||
"precision-code-high":"high",
|
"precision-code-high":"high",
|
||||||
"precision-react": "high",
|
"precision-react": "high",
|
||||||
};
|
};
|
||||||
@@ -59,9 +58,10 @@ function getModel(ctx: ExtensionContext, key: string): any {
|
|||||||
function modelLabel(key: string): string {
|
function modelLabel(key: string): string {
|
||||||
const m = MODELS[key];
|
const m = MODELS[key];
|
||||||
if (!m) return key;
|
if (!m) return key;
|
||||||
// Model IDs already include provider prefix (e.g. "openrouter/owl-alpha")
|
// Return just the model name for short display (e.g. "v4-flash", "v4-pro", "free")
|
||||||
// so just use the ID directly to avoid double-prefixing
|
const id = m.id.includes("/") ? m.id : `${m.provider}/${m.id}`;
|
||||||
return m.id.includes("/") ? m.id : `${m.provider}/${m.id}`;
|
const parts = id.split("/");
|
||||||
|
return parts[parts.length - 1];
|
||||||
}
|
}
|
||||||
|
|
||||||
export default function (pi: ExtensionAPI) {
|
export default function (pi: ExtensionAPI) {
|
||||||
@@ -163,7 +163,7 @@ export default function (pi: ExtensionAPI) {
|
|||||||
if (!content) {
|
if (!content) {
|
||||||
const fallback = getModel(ctx, "free-core");
|
const fallback = getModel(ctx, "free-core");
|
||||||
if (fallback) pi.setModel(fallback);
|
if (fallback) pi.setModel(fallback);
|
||||||
ctx.ui.setStatus("router", `⚠️ fallback ${modelLabel("free-core")}`);
|
ctx.ui.setStatus("router", `⚠️ ${modelLabel("free-core")}`);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -215,8 +215,8 @@ export default function (pi: ExtensionAPI) {
|
|||||||
// Store tag for compression check in context event
|
// Store tag for compression check in context event
|
||||||
currentTag = tag;
|
currentTag = tag;
|
||||||
|
|
||||||
// Show routing decision in status bar
|
// Show routing decision in status bar (shortened to avoid truncation)
|
||||||
ctx.ui.setStatus("router", `🎯 ${tag} → ${modelLabel(modelKey)}`);
|
ctx.ui.setStatus("router", `${modelLabel(modelKey)}`);
|
||||||
|
|
||||||
} catch (_error) {
|
} catch (_error) {
|
||||||
// Fallback gracefully on network drops
|
// Fallback gracefully on network drops
|
||||||
@@ -248,7 +248,7 @@ export default function (pi: ExtensionAPI) {
|
|||||||
}
|
}
|
||||||
} catch {
|
} catch {
|
||||||
// Proxy down — pass through (fallback: true already handles transport errors)
|
// Proxy down — pass through (fallback: true already handles transport errors)
|
||||||
ctx.ui.setStatus("compression", "⚠️ offline");
|
ctx.ui.setStatus("compression", "⚠️");
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user