Add 5 pi extensions: pi-subagents, pi-crew, rpiv-pi, pi-interactive-shell, pi-intercom
This commit is contained in:
63
extensions/pi-crew/src/runtime/agent-control.ts
Normal file
63
extensions/pi-crew/src/runtime/agent-control.ts
Normal file
@@ -0,0 +1,63 @@
|
||||
import type { PiTeamsConfig } from "../config/config.ts";
|
||||
import type { TeamRunManifest } from "../state/types.ts";
|
||||
import { appendTaskAttentionEvent } from "./attention-events.ts";
|
||||
import type { CrewAgentRecord } from "./crew-agent-runtime.ts";
|
||||
import { upsertCrewAgent } from "./crew-agent-records.ts";
|
||||
|
||||
export interface CrewControlConfig {
|
||||
enabled: boolean;
|
||||
needsAttentionAfterMs: number;
|
||||
}
|
||||
|
||||
const DEFAULT_NEEDS_ATTENTION_MS = 60_000;
|
||||
|
||||
function positiveInt(value: unknown): number | undefined {
|
||||
return typeof value === "number" && Number.isInteger(value) && value > 0 ? value : undefined;
|
||||
}
|
||||
|
||||
export function resolveCrewControlConfig(config: PiTeamsConfig | undefined): CrewControlConfig {
|
||||
const raw = config as PiTeamsConfig & { control?: { enabled?: unknown; needsAttentionAfterMs?: unknown } } | undefined;
|
||||
return {
|
||||
enabled: raw?.control?.enabled === false ? false : true,
|
||||
needsAttentionAfterMs: positiveInt(raw?.control?.needsAttentionAfterMs) ?? DEFAULT_NEEDS_ATTENTION_MS,
|
||||
};
|
||||
}
|
||||
|
||||
export function activityAgeMs(agent: CrewAgentRecord, now = Date.now()): number | undefined {
|
||||
const timestamp = agent.progress?.lastActivityAt ?? agent.startedAt;
|
||||
if (!timestamp) return undefined;
|
||||
const ms = now - new Date(timestamp).getTime();
|
||||
return Number.isFinite(ms) ? Math.max(0, ms) : undefined;
|
||||
}
|
||||
|
||||
export function formatActivityAge(agent: CrewAgentRecord, now = Date.now()): string | undefined {
|
||||
const age = activityAgeMs(agent, now);
|
||||
if (age === undefined) return undefined;
|
||||
if (age < 1000) return "active now";
|
||||
const seconds = Math.floor(age / 1000);
|
||||
if (seconds < 60) return agent.progress?.activityState === "needs_attention" ? `no activity for ${seconds}s` : `active ${seconds}s ago`;
|
||||
const minutes = Math.floor(seconds / 60);
|
||||
return agent.progress?.activityState === "needs_attention" ? `no activity for ${minutes}m` : `active ${minutes}m ago`;
|
||||
}
|
||||
|
||||
export function applyAttentionState(manifest: TeamRunManifest, agent: CrewAgentRecord, config: CrewControlConfig, now = Date.now()): CrewAgentRecord {
|
||||
if (!config.enabled || agent.status !== "running") return agent;
|
||||
const age = activityAgeMs(agent, now);
|
||||
if (age === undefined || age <= config.needsAttentionAfterMs) return agent;
|
||||
if (agent.progress?.activityState === "needs_attention") return agent;
|
||||
const updated: CrewAgentRecord = {
|
||||
...agent,
|
||||
progress: {
|
||||
...(agent.progress ?? { recentTools: [], recentOutput: [], toolCount: agent.toolUses ?? 0 }),
|
||||
activityState: "needs_attention",
|
||||
},
|
||||
};
|
||||
upsertCrewAgent(manifest, updated);
|
||||
appendTaskAttentionEvent({
|
||||
manifest,
|
||||
taskId: agent.taskId,
|
||||
message: `${agent.agent} needs attention (no observed activity for ${Math.floor(age / 1000)}s).`,
|
||||
data: { activityState: "needs_attention", reason: "idle", elapsedMs: age, taskId: agent.taskId, agentName: agent.agent, suggestedAction: "Check worker status, wait, steer, or cancel if needed." },
|
||||
});
|
||||
return updated;
|
||||
}
|
||||
72
extensions/pi-crew/src/runtime/agent-memory.ts
Normal file
72
extensions/pi-crew/src/runtime/agent-memory.ts
Normal file
@@ -0,0 +1,72 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as os from "node:os";
|
||||
import * as path from "node:path";
|
||||
|
||||
export type AgentMemoryScope = "user" | "project" | "local";
|
||||
const MAX_MEMORY_LINES = 200;
|
||||
|
||||
export function isUnsafeMemoryName(name: string): boolean {
|
||||
return !name || name.length > 128 || !/^[a-zA-Z0-9][a-zA-Z0-9._-]*$/.test(name);
|
||||
}
|
||||
|
||||
export function isSymlink(filePath: string): boolean {
|
||||
try {
|
||||
return fs.lstatSync(filePath).isSymbolicLink();
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export function safeReadMemoryFile(filePath: string): string | undefined {
|
||||
if (!fs.existsSync(filePath) || isSymlink(filePath)) return undefined;
|
||||
try {
|
||||
return fs.readFileSync(filePath, "utf-8");
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export function resolveMemoryDir(agentName: string, scope: AgentMemoryScope, cwd: string): string {
|
||||
if (isUnsafeMemoryName(agentName)) throw new Error(`Unsafe agent name for memory directory: ${agentName}`);
|
||||
if (scope === "user") return path.join(os.homedir(), ".pi", "agent-memory", agentName);
|
||||
if (scope === "project") return path.join(cwd, ".pi", "agent-memory", agentName);
|
||||
return path.join(cwd, ".pi", "agent-memory-local", agentName);
|
||||
}
|
||||
|
||||
export function ensureMemoryDir(memoryDir: string): void {
|
||||
if (fs.existsSync(memoryDir)) {
|
||||
if (isSymlink(memoryDir)) throw new Error(`Refusing to use symlinked memory directory: ${memoryDir}`);
|
||||
return;
|
||||
}
|
||||
fs.mkdirSync(memoryDir, { recursive: true });
|
||||
}
|
||||
|
||||
export function readMemoryIndex(memoryDir: string): string | undefined {
|
||||
if (isSymlink(memoryDir)) return undefined;
|
||||
const content = safeReadMemoryFile(path.join(memoryDir, "MEMORY.md"));
|
||||
if (content === undefined) return undefined;
|
||||
const lines = content.split(/\r?\n/);
|
||||
return lines.length > MAX_MEMORY_LINES ? `${lines.slice(0, MAX_MEMORY_LINES).join("\n")}\n... (truncated at 200 lines)` : content;
|
||||
}
|
||||
|
||||
export function buildMemoryBlock(agentName: string, scope: AgentMemoryScope, cwd: string, writable: boolean): string {
|
||||
const memoryDir = resolveMemoryDir(agentName, scope, cwd);
|
||||
if (writable) ensureMemoryDir(memoryDir);
|
||||
const existing = readMemoryIndex(memoryDir);
|
||||
const mode = writable ? "read-write" : "read-only";
|
||||
return [
|
||||
`# Agent Memory (${mode})`,
|
||||
`Memory scope: ${scope}`,
|
||||
`Memory directory: ${memoryDir}`,
|
||||
writable ? "Use this persistent directory to maintain useful long-term notes for this agent." : "You may reference existing memory, but do not create or modify memory files.",
|
||||
"",
|
||||
existing ? `## Current MEMORY.md\n${existing}` : "No MEMORY.md exists yet.",
|
||||
writable ? [
|
||||
"",
|
||||
"## Memory Instructions",
|
||||
"- Keep MEMORY.md concise (under 200 lines); store details in separate linked files.",
|
||||
"- Reject stale memories; update or remove outdated notes.",
|
||||
"- Use safe relative filenames inside the memory directory only.",
|
||||
].join("\n") : "",
|
||||
].filter(Boolean).join("\n");
|
||||
}
|
||||
114
extensions/pi-crew/src/runtime/agent-observability.ts
Normal file
114
extensions/pi-crew/src/runtime/agent-observability.ts
Normal file
@@ -0,0 +1,114 @@
|
||||
import * as fs from "node:fs";
|
||||
import type { TeamRunManifest } from "../state/types.ts";
|
||||
import { agentOutputPath, readCrewAgents } from "./crew-agent-records.ts";
|
||||
import type { CrewAgentRecord } from "./crew-agent-runtime.ts";
|
||||
|
||||
const TOOL_LABELS: Record<string, string> = {
|
||||
read: "reading",
|
||||
bash: "running command",
|
||||
edit: "editing",
|
||||
write: "writing",
|
||||
grep: "searching",
|
||||
find: "finding files",
|
||||
ls: "listing",
|
||||
};
|
||||
|
||||
export interface TextTailResult {
|
||||
path: string;
|
||||
text: string;
|
||||
bytes: number;
|
||||
truncated: boolean;
|
||||
}
|
||||
|
||||
export function readTextTail(filePath: string, maxBytes = 64_000): TextTailResult {
|
||||
if (!fs.existsSync(filePath)) return { path: filePath, text: "", bytes: 0, truncated: false };
|
||||
const stat = fs.statSync(filePath);
|
||||
const bytesToRead = Math.min(stat.size, Math.max(0, maxBytes));
|
||||
const fd = fs.openSync(filePath, "r");
|
||||
try {
|
||||
const buffer = Buffer.alloc(bytesToRead);
|
||||
fs.readSync(fd, buffer, 0, bytesToRead, stat.size - bytesToRead);
|
||||
return { path: filePath, text: buffer.toString("utf-8"), bytes: stat.size, truncated: stat.size > bytesToRead };
|
||||
} finally {
|
||||
fs.closeSync(fd);
|
||||
}
|
||||
}
|
||||
|
||||
function compactDuration(ms: number | undefined): string | undefined {
|
||||
if (ms === undefined || !Number.isFinite(ms)) return undefined;
|
||||
if (ms < 1000) return `${Math.round(ms)}ms`;
|
||||
if (ms < 60_000) return `${(ms / 1000).toFixed(1)}s`;
|
||||
return `${Math.floor(ms / 60_000)}m${Math.floor((ms % 60_000) / 1000)}s`;
|
||||
}
|
||||
|
||||
function ageBetween(start: string | undefined, end: string | undefined): string | undefined {
|
||||
if (!start) return undefined;
|
||||
const stop = end ? new Date(end).getTime() : Date.now();
|
||||
const ms = Math.max(0, stop - new Date(start).getTime());
|
||||
return compactDuration(ms);
|
||||
}
|
||||
|
||||
function activityText(agent: CrewAgentRecord): string {
|
||||
const parts: string[] = [];
|
||||
if (agent.progress?.activityState) parts.push(agent.progress.activityState);
|
||||
if (agent.progress?.currentTool) parts.push(TOOL_LABELS[agent.progress.currentTool] ?? `tool=${agent.progress.currentTool}`);
|
||||
if (agent.toolUses !== undefined) parts.push(`tools=${agent.toolUses}`);
|
||||
if (agent.progress?.tokens !== undefined) parts.push(`tokens=${agent.progress.tokens}`);
|
||||
if (agent.progress?.turns !== undefined) parts.push(`turns=${agent.progress.turns}`);
|
||||
const duration = compactDuration(agent.progress?.durationMs) ?? ageBetween(agent.startedAt, agent.completedAt);
|
||||
if (duration) parts.push(duration);
|
||||
if (agent.progress?.failedTool) parts.push(`failedTool=${agent.progress.failedTool}`);
|
||||
if (agent.progress?.recentOutput?.length) parts.push(`last=${agent.progress.recentOutput.at(-1)}`);
|
||||
return parts.join(" ") || "idle";
|
||||
}
|
||||
|
||||
function statusGlyph(status: CrewAgentRecord["status"]): string {
|
||||
if (status === "completed") return "✓";
|
||||
if (status === "failed") return "✗";
|
||||
if (status === "running") return "▶";
|
||||
if (status === "cancelled" || status === "stopped") return "■";
|
||||
return "·";
|
||||
}
|
||||
|
||||
function outputWarning(manifest: TeamRunManifest, agent: CrewAgentRecord): string {
|
||||
if (agent.status !== "completed") return "";
|
||||
try {
|
||||
const outputPath = agentOutputPath(manifest, agent.taskId);
|
||||
if (!fs.existsSync(outputPath)) return " no-output";
|
||||
return fs.statSync(outputPath).size === 0 ? " no-output" : "";
|
||||
} catch {
|
||||
return " no-output";
|
||||
}
|
||||
}
|
||||
|
||||
function agentLine(manifest: TeamRunManifest, agent: CrewAgentRecord): string {
|
||||
return `- ${statusGlyph(agent.status)} ${agent.taskId} ${agent.role} → ${agent.agent} · ${agent.status} · ${agent.runtime} · ${activityText(agent)}${outputWarning(manifest, agent)}${agent.error ? ` · error=${agent.error}` : ""}`;
|
||||
}
|
||||
|
||||
export function buildAgentDashboard(manifest: TeamRunManifest): { text: string; groups: Record<string, CrewAgentRecord[]> } {
|
||||
const agents = readCrewAgents(manifest);
|
||||
const groups: Record<string, CrewAgentRecord[]> = {
|
||||
running: agents.filter((agent) => agent.status === "running"),
|
||||
queued: agents.filter((agent) => agent.status === "queued"),
|
||||
recent: agents.filter((agent) => agent.status !== "running" && agent.status !== "queued"),
|
||||
};
|
||||
const lines = [
|
||||
`Crew agents for ${manifest.runId}`,
|
||||
`Run: ${manifest.status} · ${manifest.team}/${manifest.workflow ?? "none"} · agents=${agents.length}`,
|
||||
`Counts: running=${groups.running.length}, queued=${groups.queued.length}, recent=${groups.recent.length}`,
|
||||
"",
|
||||
"## Running",
|
||||
...(groups.running.length ? groups.running.map((agent) => agentLine(manifest, agent)) : ["- (none)"]),
|
||||
"",
|
||||
"## Queued",
|
||||
...(groups.queued.length ? groups.queued.map((agent) => agentLine(manifest, agent)) : ["- (none)"]),
|
||||
"",
|
||||
"## Recent",
|
||||
...(groups.recent.length ? groups.recent.slice(-10).map((agent) => agentLine(manifest, agent)) : ["- (none)"]),
|
||||
];
|
||||
return { text: lines.join("\n"), groups };
|
||||
}
|
||||
|
||||
export function readAgentOutput(manifest: TeamRunManifest, taskId: string, maxBytes?: number): TextTailResult {
|
||||
return readTextTail(agentOutputPath(manifest, taskId), maxBytes);
|
||||
}
|
||||
26
extensions/pi-crew/src/runtime/async-marker.ts
Normal file
26
extensions/pi-crew/src/runtime/async-marker.ts
Normal file
@@ -0,0 +1,26 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import { atomicWriteJson } from "../state/atomic-write.ts";
|
||||
import type { TeamRunManifest } from "../state/types.ts";
|
||||
|
||||
export interface AsyncStartMarker {
|
||||
pid: number;
|
||||
startedAt: string;
|
||||
}
|
||||
|
||||
export function asyncStartMarkerPath(manifest: Pick<TeamRunManifest, "stateRoot">): string {
|
||||
return path.join(manifest.stateRoot, "async.pid");
|
||||
}
|
||||
|
||||
export function writeAsyncStartMarker(manifest: Pick<TeamRunManifest, "stateRoot">, marker: AsyncStartMarker): void {
|
||||
atomicWriteJson(asyncStartMarkerPath(manifest), marker);
|
||||
}
|
||||
|
||||
export function hasAsyncStartMarker(manifest: Pick<TeamRunManifest, "stateRoot">): boolean {
|
||||
try {
|
||||
const raw = JSON.parse(fs.readFileSync(asyncStartMarkerPath(manifest), "utf-8")) as Partial<AsyncStartMarker>;
|
||||
return typeof raw.pid === "number" && Number.isInteger(raw.pid) && raw.pid > 0 && typeof raw.startedAt === "string" && raw.startedAt.length > 0;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
77
extensions/pi-crew/src/runtime/async-runner.ts
Normal file
77
extensions/pi-crew/src/runtime/async-runner.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
import { spawn, type SpawnOptions } from "node:child_process";
|
||||
import { createRequire } from "node:module";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import { fileURLToPath, pathToFileURL } from "node:url";
|
||||
import { appendEvent } from "../state/event-log.ts";
|
||||
import type { TeamRunManifest } from "../state/types.ts";
|
||||
|
||||
export type FileExists = (filePath: string) => boolean;
|
||||
|
||||
const requireFromHere = createRequire(import.meta.url);
|
||||
|
||||
function packageRootFromRuntime(): string {
|
||||
return path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..", "..");
|
||||
}
|
||||
|
||||
function jitiRegisterPathFromPackageJson(packageJsonPath: string): string {
|
||||
return path.join(path.dirname(packageJsonPath), "lib", "jiti-register.mjs");
|
||||
}
|
||||
|
||||
export function resolveJitiRegisterPath(packageRoot = packageRootFromRuntime(), exists: FileExists = fs.existsSync): string | undefined {
|
||||
const candidates = [
|
||||
path.join(packageRoot, "node_modules", "jiti", "lib", "jiti-register.mjs"),
|
||||
path.join(packageRoot, "..", "..", "node_modules", "jiti", "lib", "jiti-register.mjs"),
|
||||
];
|
||||
try {
|
||||
candidates.push(jitiRegisterPathFromPackageJson(requireFromHere.resolve("jiti/package.json")));
|
||||
} catch {
|
||||
// Fall through to explicit candidate checks.
|
||||
}
|
||||
return [...new Set(candidates)].find((candidate) => exists(candidate));
|
||||
}
|
||||
|
||||
export function getBackgroundRunnerCommand(runnerPath: string, cwd: string, runId: string, jitiRegisterPath: string | false | undefined = resolveJitiRegisterPath()): { args: string[]; loader: "jiti" } {
|
||||
if (!jitiRegisterPath) throw new Error("pi-crew background runner cannot start: jiti loader not found. Reinstall pi-crew (`pi install npm:pi-crew`) or ensure node_modules/jiti is present.");
|
||||
return {
|
||||
args: ["--import", pathToFileURL(jitiRegisterPath).href, runnerPath, "--cwd", cwd, "--run-id", runId],
|
||||
loader: "jiti",
|
||||
};
|
||||
}
|
||||
|
||||
export interface SpawnBackgroundTeamRunResult {
|
||||
pid?: number;
|
||||
logPath: string;
|
||||
}
|
||||
|
||||
export function buildBackgroundSpawnOptions(manifest: TeamRunManifest, logFd: number): SpawnOptions {
|
||||
return {
|
||||
cwd: manifest.cwd,
|
||||
detached: true,
|
||||
stdio: ["ignore", logFd, logFd],
|
||||
env: { ...process.env },
|
||||
windowsHide: true,
|
||||
};
|
||||
}
|
||||
|
||||
export function spawnBackgroundTeamRun(manifest: TeamRunManifest): SpawnBackgroundTeamRunResult {
|
||||
const runnerPath = path.join(path.dirname(fileURLToPath(import.meta.url)), "background-runner.ts");
|
||||
const logPath = path.join(manifest.stateRoot, "background.log");
|
||||
fs.mkdirSync(manifest.stateRoot, { recursive: true });
|
||||
const logFd = fs.openSync(logPath, "a");
|
||||
try {
|
||||
const jitiRegisterPath = resolveJitiRegisterPath();
|
||||
if (!jitiRegisterPath) {
|
||||
const message = "pi-crew background runner cannot start: jiti loader not found. Reinstall pi-crew (`pi install npm:pi-crew`) or ensure node_modules/jiti is present.";
|
||||
appendEvent(manifest.eventsPath, { type: "async.failed", runId: manifest.runId, message });
|
||||
throw new Error(message);
|
||||
}
|
||||
const command = getBackgroundRunnerCommand(runnerPath, manifest.cwd, manifest.runId, jitiRegisterPath);
|
||||
fs.appendFileSync(logPath, `[pi-crew] background loader=${command.loader}\n`, "utf-8");
|
||||
const child = spawn(process.execPath, command.args, buildBackgroundSpawnOptions(manifest, logFd));
|
||||
child.unref();
|
||||
return { pid: child.pid, logPath };
|
||||
} finally {
|
||||
fs.closeSync(logFd);
|
||||
}
|
||||
}
|
||||
28
extensions/pi-crew/src/runtime/attention-events.ts
Normal file
28
extensions/pi-crew/src/runtime/attention-events.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import { appendEvent, readEvents } from "../state/event-log.ts";
|
||||
import type { CrewAttentionEventData, TeamRunManifest } from "../state/types.ts";
|
||||
|
||||
export interface AppendTaskAttentionInput {
|
||||
manifest: TeamRunManifest;
|
||||
taskId?: string;
|
||||
message: string;
|
||||
data: CrewAttentionEventData;
|
||||
}
|
||||
|
||||
export function appendTaskAttentionEvent(input: AppendTaskAttentionInput): boolean {
|
||||
const recent = readEvents(input.manifest.eventsPath).slice(-200);
|
||||
const dedupKey = `${input.taskId ?? ""}:${input.data.reason}:${input.data.activityState}`;
|
||||
const duplicate = recent.some(
|
||||
(event) =>
|
||||
event.type === "task.attention" &&
|
||||
`${event.taskId ?? ""}:${event.data?.reason ?? ""}:${event.data?.activityState ?? ""}` === dedupKey,
|
||||
);
|
||||
if (duplicate) return false;
|
||||
appendEvent(input.manifest.eventsPath, {
|
||||
type: "task.attention",
|
||||
runId: input.manifest.runId,
|
||||
taskId: input.taskId,
|
||||
message: input.message,
|
||||
data: { ...input.data },
|
||||
});
|
||||
return true;
|
||||
}
|
||||
59
extensions/pi-crew/src/runtime/background-runner.ts
Normal file
59
extensions/pi-crew/src/runtime/background-runner.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
import { allAgents, discoverAgents } from "../agents/discover-agents.ts";
|
||||
import { allTeams, discoverTeams } from "../teams/discover-teams.ts";
|
||||
import { appendEvent } from "../state/event-log.ts";
|
||||
import { loadRunManifestById, saveRunManifest, updateRunStatus } from "../state/state-store.ts";
|
||||
import { allWorkflows, discoverWorkflows } from "../workflows/discover-workflows.ts";
|
||||
import { loadConfig } from "../config/config.ts";
|
||||
import { executeTeamRun } from "./team-runner.ts";
|
||||
import { resolveCrewRuntime, runtimeResolutionState } from "./runtime-resolver.ts";
|
||||
import { directTeamAndWorkflowFromRun } from "./direct-run.ts";
|
||||
import { expandParallelResearchWorkflow } from "./parallel-research.ts";
|
||||
import { writeAsyncStartMarker } from "./async-marker.ts";
|
||||
|
||||
function argValue(name: string): string | undefined {
|
||||
const index = process.argv.indexOf(name);
|
||||
if (index === -1) return undefined;
|
||||
return process.argv[index + 1];
|
||||
}
|
||||
|
||||
async function main(): Promise<void> {
|
||||
const cwd = argValue("--cwd");
|
||||
const runId = argValue("--run-id");
|
||||
if (!cwd || !runId) throw new Error("Usage: background-runner.ts --cwd <cwd> --run-id <runId>");
|
||||
|
||||
const loaded = loadRunManifestById(cwd, runId);
|
||||
if (!loaded) throw new Error(`Run '${runId}' not found.`);
|
||||
let { manifest, tasks } = loaded;
|
||||
appendEvent(manifest.eventsPath, { type: "async.started", runId: manifest.runId, data: { pid: process.pid } });
|
||||
writeAsyncStartMarker(manifest, { pid: process.pid, startedAt: new Date().toISOString() });
|
||||
|
||||
try {
|
||||
const agents = allAgents(discoverAgents(cwd));
|
||||
const direct = directTeamAndWorkflowFromRun(manifest, tasks, agents);
|
||||
const team = direct?.team ?? allTeams(discoverTeams(cwd)).find((candidate) => candidate.name === manifest.team);
|
||||
if (!team) throw new Error(`Team '${manifest.team}' not found.`);
|
||||
const baseWorkflow = direct?.workflow ?? allWorkflows(discoverWorkflows(cwd)).find((candidate) => candidate.name === manifest.workflow);
|
||||
if (!baseWorkflow) throw new Error(`Workflow '${manifest.workflow ?? ""}' not found.`);
|
||||
const workflow = expandParallelResearchWorkflow(baseWorkflow, cwd);
|
||||
const loadedConfig = loadConfig(cwd);
|
||||
const runConfig = manifest.runConfig && typeof manifest.runConfig === "object" && !Array.isArray(manifest.runConfig) ? manifest.runConfig as typeof loadedConfig.config : loadedConfig.config;
|
||||
const runtime = manifest.runtimeResolution ? { kind: manifest.runtimeResolution.kind, requestedMode: manifest.runtimeResolution.requestedMode, available: manifest.runtimeResolution.available, fallback: manifest.runtimeResolution.fallback, steer: manifest.runtimeResolution.kind === "live-session", resume: manifest.runtimeResolution.kind === "live-session", liveToolActivity: manifest.runtimeResolution.kind === "live-session", transcript: manifest.runtimeResolution.kind !== "scaffold", reason: manifest.runtimeResolution.reason, safety: manifest.runtimeResolution.safety } : await resolveCrewRuntime(runConfig);
|
||||
const runtimeResolution = manifest.runtimeResolution ?? runtimeResolutionState(runtime);
|
||||
manifest = { ...manifest, runtimeResolution, runConfig, updatedAt: new Date().toISOString() };
|
||||
saveRunManifest(manifest);
|
||||
appendEvent(manifest.eventsPath, { type: "runtime.resolved", runId: manifest.runId, message: `Runtime resolved: ${runtime.kind} safety=${runtime.safety}`, data: { runtimeResolution, async: true } });
|
||||
if (runtime.safety === "blocked") throw new Error(runtime.reason ?? "Child worker execution is disabled; refusing to create no-op scaffold subagents.");
|
||||
const executeWorkers = runtime.kind !== "scaffold";
|
||||
const result = await executeTeamRun({ manifest, tasks, team, workflow, agents, executeWorkers, limits: runConfig.limits, runtime, runtimeConfig: runConfig.runtime, skillOverride: manifest.skillOverride, reliability: runConfig.reliability });
|
||||
manifest = result.manifest;
|
||||
tasks = result.tasks;
|
||||
appendEvent(manifest.eventsPath, { type: "async.completed", runId: manifest.runId, data: { status: manifest.status, tasks: tasks.length } });
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
manifest = updateRunStatus(manifest, "failed", message);
|
||||
appendEvent(manifest.eventsPath, { type: "async.failed", runId: manifest.runId, message });
|
||||
process.exitCode = 1;
|
||||
}
|
||||
}
|
||||
|
||||
await main();
|
||||
51
extensions/pi-crew/src/runtime/cancellation.ts
Normal file
51
extensions/pi-crew/src/runtime/cancellation.ts
Normal file
@@ -0,0 +1,51 @@
|
||||
export type CancellationReasonCode = "caller_cancelled" | "leader_interrupted" | "provider_timeout" | "worker_timeout" | "tool_timeout" | "shutdown" | "unknown";
|
||||
|
||||
export interface CancellationReason {
|
||||
code: CancellationReasonCode;
|
||||
message: string;
|
||||
cause?: unknown;
|
||||
}
|
||||
|
||||
const KNOWN_CODES: ReadonlySet<string> = new Set(["caller_cancelled", "leader_interrupted", "provider_timeout", "worker_timeout", "tool_timeout", "shutdown", "unknown"]);
|
||||
|
||||
export class CrewCancellationError extends Error {
|
||||
readonly reason: CancellationReason;
|
||||
|
||||
constructor(reason: CancellationReason) {
|
||||
super(reason.message);
|
||||
this.name = "CrewCancellationError";
|
||||
this.reason = reason;
|
||||
}
|
||||
}
|
||||
|
||||
function reasonFromString(value: string): CancellationReason {
|
||||
const trimmed = value.trim();
|
||||
if (KNOWN_CODES.has(trimmed)) return { code: trimmed as CancellationReasonCode, message: `Cancelled: ${trimmed}` };
|
||||
return { code: "caller_cancelled", message: trimmed || "Cancelled by caller." };
|
||||
}
|
||||
|
||||
export function cancellationReasonFromUnknown(value: unknown): CancellationReason {
|
||||
if (value instanceof CrewCancellationError) return value.reason;
|
||||
if (value instanceof Error) return { code: "caller_cancelled", message: value.message || "Cancelled by caller.", cause: value };
|
||||
if (typeof value === "string") return reasonFromString(value);
|
||||
if (value && typeof value === "object" && !Array.isArray(value)) {
|
||||
const record = value as { code?: unknown; reason?: unknown; message?: unknown; cause?: unknown };
|
||||
const rawCode = typeof record.code === "string" ? record.code : typeof record.reason === "string" ? record.reason : undefined;
|
||||
const code = rawCode && KNOWN_CODES.has(rawCode) ? rawCode as CancellationReasonCode : "caller_cancelled";
|
||||
const message = typeof record.message === "string" && record.message.trim() ? record.message.trim() : `Cancelled: ${code}`;
|
||||
return { code, message, cause: record.cause ?? value };
|
||||
}
|
||||
return { code: "caller_cancelled", message: "Cancelled by caller." };
|
||||
}
|
||||
|
||||
export function cancellationReasonFromSignal(signal: AbortSignal | undefined): CancellationReason {
|
||||
return cancellationReasonFromUnknown(signal?.reason);
|
||||
}
|
||||
|
||||
export function cancellationErrorFromSignal(signal: AbortSignal | undefined): CrewCancellationError {
|
||||
return new CrewCancellationError(cancellationReasonFromSignal(signal));
|
||||
}
|
||||
|
||||
export function throwIfCancelled(signal: AbortSignal | undefined): void {
|
||||
if (signal?.aborted) throw cancellationErrorFromSignal(signal);
|
||||
}
|
||||
457
extensions/pi-crew/src/runtime/child-pi.ts
Normal file
457
extensions/pi-crew/src/runtime/child-pi.ts
Normal file
@@ -0,0 +1,457 @@
|
||||
import { spawn, type ChildProcess, type SpawnOptions } from "node:child_process";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import type { AgentConfig } from "../agents/agent-config.ts";
|
||||
import type { WorkerExitStatus } from "../state/types.ts";
|
||||
import { buildPiWorkerArgs, checkCrewDepth, cleanupTempDir } from "./pi-args.ts";
|
||||
import { getPiSpawnCommand } from "./pi-spawn.ts";
|
||||
import { DEFAULT_CHILD_PI } from "../config/defaults.ts";
|
||||
import { logInternalError } from "../utils/internal-error.ts";
|
||||
import { attachPostExitStdioGuard, trySignalChild } from "./post-exit-stdio-guard.ts";
|
||||
import { redactJsonLine } from "../utils/redaction.ts";
|
||||
|
||||
const POST_EXIT_STDIO_GUARD_MS = DEFAULT_CHILD_PI.postExitStdioGuardMs;
|
||||
const FINAL_DRAIN_MS = DEFAULT_CHILD_PI.finalDrainMs;
|
||||
const HARD_KILL_MS = DEFAULT_CHILD_PI.hardKillMs;
|
||||
const RESPONSE_TIMEOUT_MS = DEFAULT_CHILD_PI.responseTimeoutMs;
|
||||
const MAX_CAPTURE_BYTES = DEFAULT_CHILD_PI.maxCaptureBytes;
|
||||
const MAX_ASSISTANT_TEXT_CHARS = DEFAULT_CHILD_PI.maxAssistantTextChars;
|
||||
const MAX_TOOL_RESULT_CHARS = DEFAULT_CHILD_PI.maxToolResultChars;
|
||||
const MAX_TOOL_INPUT_CHARS = DEFAULT_CHILD_PI.maxToolInputChars;
|
||||
const MAX_COMPACT_CONTENT_CHARS = DEFAULT_CHILD_PI.maxCompactContentChars;
|
||||
const activeChildProcesses = new Map<number, ChildProcess>();
|
||||
const childHardKillTimers = new Map<number, NodeJS.Timeout>();
|
||||
|
||||
function appendBoundedTail(current: string, chunk: string, maxBytes = MAX_CAPTURE_BYTES): string {
|
||||
const combined = current + chunk;
|
||||
if (Buffer.byteLength(combined, "utf-8") <= maxBytes) return combined;
|
||||
let tail = combined.slice(Math.max(0, combined.length - maxBytes));
|
||||
while (Buffer.byteLength(tail, "utf-8") > maxBytes) tail = tail.slice(1024);
|
||||
return `[pi-crew captured output truncated to last ${Math.round(maxBytes / 1024)} KiB]\n${tail}`;
|
||||
}
|
||||
|
||||
function clearHardKillTimer(pid: number | undefined): void {
|
||||
if (!pid) return;
|
||||
const timer = childHardKillTimers.get(pid);
|
||||
if (!timer) return;
|
||||
clearTimeout(timer);
|
||||
childHardKillTimers.delete(pid);
|
||||
}
|
||||
|
||||
function killProcessTree(pid: number | undefined, child?: ChildProcess): void {
|
||||
if (!pid || !Number.isInteger(pid) || pid <= 0) return;
|
||||
if (child && child.exitCode !== null) return;
|
||||
try {
|
||||
if (process.platform === "win32") {
|
||||
spawn("taskkill", ["/pid", String(pid), "/t", "/f"], { stdio: "ignore", windowsHide: true });
|
||||
return;
|
||||
}
|
||||
try {
|
||||
process.kill(-pid, "SIGTERM");
|
||||
} catch (error) {
|
||||
logInternalError("child-pi.sigterm", error, `pid=${pid}`);
|
||||
try {
|
||||
process.kill(pid, "SIGTERM");
|
||||
} catch (fallbackError) {
|
||||
logInternalError("child-pi.sigterm-absolute", fallbackError, `pid=${pid}`);
|
||||
}
|
||||
}
|
||||
clearHardKillTimer(pid);
|
||||
const hardKillTimer = setTimeout(() => {
|
||||
try {
|
||||
process.kill(-pid, "SIGKILL");
|
||||
} catch (error) {
|
||||
logInternalError("child-pi.sigkill", error, `pid=${pid}`);
|
||||
try {
|
||||
process.kill(pid, "SIGKILL");
|
||||
} catch (fallbackError) {
|
||||
logInternalError("child-pi.sigkill-absolute", fallbackError, `pid=${pid}`);
|
||||
}
|
||||
}
|
||||
childHardKillTimers.delete(pid);
|
||||
}, HARD_KILL_MS);
|
||||
hardKillTimer.unref();
|
||||
child?.once("exit", () => clearHardKillTimer(pid));
|
||||
childHardKillTimers.set(pid, hardKillTimer);
|
||||
} catch (error) {
|
||||
logInternalError("child-pi.kill-process-tree", error, `pid=${pid}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function terminateActiveChildPiProcesses(): number {
|
||||
const entries = [...activeChildProcesses.entries()];
|
||||
for (const [pid, child] of entries) killProcessTree(pid, child);
|
||||
return entries.length;
|
||||
}
|
||||
|
||||
export interface ChildPiRunInput {
|
||||
cwd: string;
|
||||
task: string;
|
||||
agent: AgentConfig;
|
||||
model?: string;
|
||||
skillPaths?: string[];
|
||||
signal?: AbortSignal;
|
||||
transcriptPath?: string;
|
||||
onStdoutLine?: (line: string) => void;
|
||||
onJsonEvent?: (event: unknown) => void;
|
||||
onSpawn?: (pid: number) => void;
|
||||
maxDepth?: number;
|
||||
finalDrainMs?: number;
|
||||
hardKillMs?: number;
|
||||
responseTimeoutMs?: number;
|
||||
}
|
||||
|
||||
export interface ChildPiRunResult {
|
||||
exitCode: number | null;
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
error?: string;
|
||||
exitStatus?: WorkerExitStatus;
|
||||
}
|
||||
|
||||
export function buildChildPiSpawnOptions(cwd: string, env: NodeJS.ProcessEnv): SpawnOptions {
|
||||
return {
|
||||
cwd,
|
||||
env,
|
||||
stdio: ["ignore", "pipe", "pipe"],
|
||||
detached: process.platform !== "win32",
|
||||
windowsHide: true,
|
||||
};
|
||||
}
|
||||
|
||||
function appendTranscript(input: ChildPiRunInput, line: string): void {
|
||||
if (!input.transcriptPath) return;
|
||||
fs.mkdirSync(path.dirname(input.transcriptPath), { recursive: true });
|
||||
fs.appendFileSync(input.transcriptPath, `${redactJsonLine(line)}\n`, "utf-8");
|
||||
}
|
||||
|
||||
function compactString(value: string, maxChars = MAX_COMPACT_CONTENT_CHARS): string {
|
||||
if (value.length <= maxChars) return value;
|
||||
return `${value.slice(0, maxChars)}\n[pi-crew compacted ${value.length - maxChars} chars]`;
|
||||
}
|
||||
|
||||
function compactValue(value: unknown): unknown {
|
||||
if (typeof value === "string") return compactString(value);
|
||||
if (Array.isArray(value)) return value.slice(0, 20).map(compactValue);
|
||||
const record = asRecord(value);
|
||||
if (!record) return value;
|
||||
const compacted: Record<string, unknown> = {};
|
||||
for (const [key, entry] of Object.entries(record).slice(0, 20)) compacted[key] = compactValue(entry);
|
||||
return compacted;
|
||||
}
|
||||
|
||||
function compactContentPart(part: unknown): unknown | undefined {
|
||||
const record = asRecord(part);
|
||||
if (!record) return undefined;
|
||||
if (record.type === "text") return { type: "text", text: typeof record.text === "string" ? compactString(record.text, MAX_ASSISTANT_TEXT_CHARS) : "" };
|
||||
if (record.type === "toolCall") return { type: "toolCall", name: record.name, input: compactValue(typeof record.input === "string" ? compactString(record.input, MAX_TOOL_INPUT_CHARS) : record.input) };
|
||||
if (record.type === "toolResult") return { type: "toolResult", name: record.name, content: compactValue(typeof record.content === "string" ? compactString(record.content, MAX_TOOL_RESULT_CHARS) : record.content) };
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function compactChildPiEvent(event: unknown): unknown | undefined {
|
||||
const record = asRecord(event);
|
||||
if (!record) return undefined;
|
||||
if (record.type === "message_update") return undefined;
|
||||
if (record.type === "tool_execution_start" || record.type === "tool_execution_end") {
|
||||
return { type: record.type, toolName: record.toolName, args: record.args };
|
||||
}
|
||||
if (record.type === "tool_result_end" || record.type === "message_end" || record.type === "message") {
|
||||
const message = asRecord(record.message);
|
||||
if (message?.role === "user" || message?.role === "system") return undefined;
|
||||
const content = Array.isArray(message?.content) ? message.content.map(compactContentPart).filter((part) => part !== undefined) : undefined;
|
||||
return {
|
||||
type: record.type,
|
||||
...(typeof record.text === "string" ? { text: record.text } : {}),
|
||||
...(message ? { message: { role: message.role, ...(content ? { content } : {}), usage: message.usage, model: message.model, errorMessage: message.errorMessage, stopReason: message.stopReason } } : {}),
|
||||
usage: record.usage,
|
||||
model: record.model,
|
||||
provider: record.provider,
|
||||
stopReason: record.stopReason,
|
||||
};
|
||||
}
|
||||
return record.type ? { type: record.type } : undefined;
|
||||
}
|
||||
|
||||
function displayTextFromCompactEvent(event: unknown): string | undefined {
|
||||
const record = asRecord(event);
|
||||
if (!record) return undefined;
|
||||
if (record.type === "tool_execution_start") {
|
||||
return typeof record.toolName === "string" ? `tool: ${record.toolName}` : "tool started";
|
||||
}
|
||||
if (record.type !== "message" && record.type !== "message_end") return undefined;
|
||||
const message = asRecord(record.message);
|
||||
if (message?.role !== undefined && message.role !== "assistant") return undefined;
|
||||
const content = Array.isArray(message?.content) ? message.content : [];
|
||||
const text = content.flatMap((part) => {
|
||||
const item = asRecord(part);
|
||||
return item?.type === "text" && typeof item.text === "string" ? [item.text] : [];
|
||||
}).join("\n").trim();
|
||||
return text || (typeof record.text === "string" ? record.text : undefined);
|
||||
}
|
||||
|
||||
function compactChildPiLine(line: string): { persistedLine: string; event?: unknown; displayLine?: string; json: boolean } {
|
||||
try {
|
||||
const parsed = JSON.parse(line);
|
||||
const compact = compactChildPiEvent(parsed);
|
||||
return { json: true, event: compact, persistedLine: compact ? JSON.stringify(compact) : "", displayLine: displayTextFromCompactEvent(compact) };
|
||||
} catch {
|
||||
return { json: false, persistedLine: line, displayLine: line };
|
||||
}
|
||||
}
|
||||
|
||||
export class ChildPiLineObserver {
|
||||
private buffer = "";
|
||||
private readonly input: ChildPiRunInput;
|
||||
|
||||
constructor(input: ChildPiRunInput) {
|
||||
this.input = input;
|
||||
}
|
||||
|
||||
observe(text: string): void {
|
||||
this.buffer += text;
|
||||
const lines = this.buffer.split(/\r?\n/);
|
||||
this.buffer = lines.pop() ?? "";
|
||||
for (const line of lines) this.emitLine(line);
|
||||
}
|
||||
|
||||
flush(): void {
|
||||
if (!this.buffer) return;
|
||||
const line = this.buffer;
|
||||
this.buffer = "";
|
||||
this.emitLine(line);
|
||||
}
|
||||
|
||||
private emitLine(line: string): void {
|
||||
if (!line.trim()) return;
|
||||
const compact = compactChildPiLine(line);
|
||||
if (compact.event !== undefined) {
|
||||
try {
|
||||
this.input.onJsonEvent?.(compact.event);
|
||||
} catch (error) {
|
||||
logInternalError("child-pi.on-json-event", error, `line=${compact.persistedLine ?? compact.displayLine ?? ""}`);
|
||||
}
|
||||
}
|
||||
if (compact.persistedLine) appendTranscript(this.input, compact.persistedLine);
|
||||
if (compact.displayLine?.trim()) {
|
||||
try {
|
||||
this.input.onStdoutLine?.(compact.displayLine);
|
||||
} catch (error) {
|
||||
logInternalError("child-pi.on-stdout-line", error, `line=${compact.displayLine}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
function observeStdoutChunk(input: ChildPiRunInput, text: string): void {
|
||||
const observer = new ChildPiLineObserver(input);
|
||||
observer.observe(text);
|
||||
observer.flush();
|
||||
}
|
||||
|
||||
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
||||
return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
|
||||
}
|
||||
|
||||
function isFinalAssistantEvent(event: unknown): boolean {
|
||||
const obj = asRecord(event);
|
||||
if (!obj || obj.type !== "message_end") return false;
|
||||
const message = asRecord(obj.message);
|
||||
const role = message?.role;
|
||||
if (role !== undefined && role !== "assistant") return false;
|
||||
const stopReason = typeof message?.stopReason === "string" ? message.stopReason : typeof obj.stopReason === "string" ? obj.stopReason : undefined;
|
||||
if (stopReason !== undefined && stopReason !== "stop") return false;
|
||||
const content = Array.isArray(message?.content) ? message.content : [];
|
||||
return !content.some((part) => asRecord(part)?.type === "toolCall");
|
||||
}
|
||||
|
||||
export async function runChildPi(input: ChildPiRunInput): Promise<ChildPiRunResult> {
|
||||
const depth = checkCrewDepth(input.maxDepth);
|
||||
if (depth.blocked) return { exitCode: 1, stdout: "", stderr: `pi-crew depth guard blocked child worker: depth ${depth.depth} >= max ${depth.maxDepth}` };
|
||||
const mock = process.env.PI_TEAMS_MOCK_CHILD_PI;
|
||||
if (mock) {
|
||||
if (mock === "success") {
|
||||
const stdout = `Mock child Pi success for ${input.agent.name}\n`;
|
||||
observeStdoutChunk(input, stdout);
|
||||
return { exitCode: 0, stdout, stderr: "" };
|
||||
}
|
||||
if (mock === "json-success" || mock === "adaptive-plan") {
|
||||
const text = mock === "adaptive-plan" && input.task.includes("ADAPTIVE_PLAN_JSON_START")
|
||||
? `Adaptive mock plan\nADAPTIVE_PLAN_JSON_START\n${JSON.stringify({ phases: [{ name: "research", tasks: [{ role: "explorer", task: "Explore adaptive target" }, { role: "analyst", task: "Analyze adaptive target" }, { role: "planner", task: "Plan adaptive target" }] }, { name: "build", tasks: [{ role: "executor", task: "Implement adaptive target" }] }, { name: "check", tasks: [{ role: "reviewer", task: "Review adaptive target" }, { role: "test-engineer", task: "Test adaptive target" }, { role: "writer", task: "Summarize adaptive target" }] }] })}\nADAPTIVE_PLAN_JSON_END`
|
||||
: `Mock JSON success for ${input.agent.name}`;
|
||||
const stdout = `${JSON.stringify({ type: "message", message: { role: "assistant", content: [{ type: "text", text }] } })}\n${JSON.stringify({ type: "message_end", usage: { input: 10, output: 5, cost: 0.001, turns: 1 } })}\n`;
|
||||
observeStdoutChunk(input, stdout);
|
||||
return { exitCode: 0, stdout, stderr: "" };
|
||||
}
|
||||
if (mock === "retryable-failure") return { exitCode: 1, stdout: "", stderr: "rate limit: mock failure" };
|
||||
return { exitCode: 1, stdout: "", stderr: `mock failure: ${mock}` };
|
||||
}
|
||||
const built = buildPiWorkerArgs({ task: input.task, agent: input.agent, model: input.model, sessionEnabled: false, maxDepth: input.maxDepth, skillPaths: input.skillPaths });
|
||||
const spawnSpec = getPiSpawnCommand(built.args);
|
||||
try {
|
||||
return await new Promise<ChildPiRunResult>((resolve) => {
|
||||
const child = spawn(spawnSpec.command, spawnSpec.args, buildChildPiSpawnOptions(input.cwd, { ...process.env, ...built.env }));
|
||||
if (child.pid) {
|
||||
activeChildProcesses.set(child.pid, child);
|
||||
input.onSpawn?.(child.pid);
|
||||
}
|
||||
let stdout = "";
|
||||
let stderr = "";
|
||||
let settled = false;
|
||||
let childExited = false;
|
||||
let postExitGuardCleanup: (() => void) | undefined;
|
||||
let finalDrainTimer: NodeJS.Timeout | undefined;
|
||||
let hardKillTimer: NodeJS.Timeout | undefined;
|
||||
let noResponseTimer: NodeJS.Timeout | undefined;
|
||||
const finalDrainMs = input.finalDrainMs ?? FINAL_DRAIN_MS;
|
||||
const hardKillMs = input.hardKillMs ?? HARD_KILL_MS;
|
||||
const responseTimeoutEnv = Number.parseInt(process.env.PI_TEAMS_CHILD_RESPONSE_TIMEOUT_MS ?? "", 10);
|
||||
const responseTimeoutMs = Number.isFinite(responseTimeoutEnv) && responseTimeoutEnv >= 0 ? responseTimeoutEnv : input.responseTimeoutMs ?? RESPONSE_TIMEOUT_MS;
|
||||
let responseTimeoutHit = false;
|
||||
let forcedFinalDrain = false;
|
||||
let abortRequested = input.signal?.aborted === true;
|
||||
let hardKilled = false;
|
||||
const cleanupErrors: string[] = [];
|
||||
const restartNoResponseTimer = (): void => {
|
||||
if (responseTimeoutMs <= 0) return;
|
||||
if (noResponseTimer) clearTimeout(noResponseTimer);
|
||||
noResponseTimer = setTimeout(() => {
|
||||
responseTimeoutHit = true;
|
||||
killProcessTree(child.pid, child);
|
||||
try {
|
||||
child.kill(process.platform === "win32" ? undefined : "SIGTERM");
|
||||
} catch (error) {
|
||||
logInternalError("child-pi.response-timeout-term", error, `pid=${child.pid}`);
|
||||
}
|
||||
}, responseTimeoutMs);
|
||||
noResponseTimer.unref();
|
||||
};
|
||||
const clearNoResponseTimer = (): void => {
|
||||
if (noResponseTimer) clearTimeout(noResponseTimer);
|
||||
noResponseTimer = undefined;
|
||||
};
|
||||
restartNoResponseTimer();
|
||||
const lineObserver = new ChildPiLineObserver({
|
||||
...input,
|
||||
onStdoutLine: (line) => {
|
||||
restartNoResponseTimer();
|
||||
stdout = appendBoundedTail(stdout, `${line}\n`);
|
||||
input.onStdoutLine?.(line);
|
||||
},
|
||||
onJsonEvent: (event) => {
|
||||
restartNoResponseTimer();
|
||||
input.onJsonEvent?.(event);
|
||||
if (!isFinalAssistantEvent(event) || childExited || settled || finalDrainTimer) return;
|
||||
finalDrainTimer = setTimeout(() => {
|
||||
if (settled || childExited) return;
|
||||
forcedFinalDrain = true;
|
||||
try {
|
||||
child.kill(process.platform === "win32" ? undefined : "SIGTERM");
|
||||
} catch (error) {
|
||||
logInternalError("child-pi.final-drain-term", error, `pid=${child.pid}`);
|
||||
}
|
||||
hardKillTimer = setTimeout(() => {
|
||||
if (settled || childExited) return;
|
||||
try {
|
||||
hardKilled = true;
|
||||
child.kill(process.platform === "win32" ? undefined : "SIGKILL");
|
||||
} catch (error) {
|
||||
logInternalError("child-pi.final-drain-kill", error, `pid=${child.pid}`);
|
||||
}
|
||||
}, hardKillMs);
|
||||
hardKillTimer.unref();
|
||||
}, finalDrainMs);
|
||||
finalDrainTimer.unref();
|
||||
},
|
||||
});
|
||||
|
||||
const clearFinalDrainTimers = (): void => {
|
||||
if (finalDrainTimer) clearTimeout(finalDrainTimer);
|
||||
if (hardKillTimer) clearTimeout(hardKillTimer);
|
||||
finalDrainTimer = undefined;
|
||||
hardKillTimer = undefined;
|
||||
};
|
||||
const clearPostExitGuard = (): void => {
|
||||
if (postExitGuardCleanup) {
|
||||
postExitGuardCleanup();
|
||||
postExitGuardCleanup = undefined;
|
||||
}
|
||||
};
|
||||
const clearChildPiTimeouts = (): void => {
|
||||
clearNoResponseTimer();
|
||||
clearFinalDrainTimers();
|
||||
clearPostExitGuard();
|
||||
};
|
||||
|
||||
const settle = (result: ChildPiRunResult): void => {
|
||||
if (settled) return;
|
||||
settled = true;
|
||||
clearChildPiTimeouts();
|
||||
lineObserver.flush();
|
||||
input.signal?.removeEventListener("abort", abort);
|
||||
try {
|
||||
cleanupTempDir(built.tempDir);
|
||||
} catch (error) {
|
||||
cleanupErrors.push(error instanceof Error ? error.message : String(error));
|
||||
}
|
||||
resolve({ ...result, exitStatus: result.exitStatus ?? { exitCode: result.exitCode, cancelled: abortRequested, timedOut: responseTimeoutHit, killed: hardKilled, cleanupErrors, finalDrainMs } });
|
||||
};
|
||||
|
||||
const abort = (): void => {
|
||||
abortRequested = true;
|
||||
killProcessTree(child.pid, child);
|
||||
if (process.platform !== "win32") {
|
||||
trySignalChild(child, "SIGTERM");
|
||||
}
|
||||
try {
|
||||
child.kill(process.platform === "win32" ? undefined : "SIGTERM");
|
||||
} catch {
|
||||
// Ignore kill races.
|
||||
}
|
||||
};
|
||||
|
||||
input.signal?.addEventListener("abort", abort, { once: true });
|
||||
child.stdout?.on("data", (chunk: Buffer) => {
|
||||
restartNoResponseTimer();
|
||||
lineObserver.observe(chunk.toString("utf-8"));
|
||||
});
|
||||
child.stderr?.on("data", (chunk: Buffer) => {
|
||||
restartNoResponseTimer();
|
||||
stderr = appendBoundedTail(stderr, chunk.toString("utf-8"));
|
||||
});
|
||||
child.on("error", (error) => {
|
||||
settle({ exitCode: null, stdout, stderr, error: error.message });
|
||||
});
|
||||
child.on("exit", () => {
|
||||
if (child.pid) {
|
||||
activeChildProcesses.delete(child.pid);
|
||||
clearHardKillTimer(child.pid);
|
||||
}
|
||||
childExited = true;
|
||||
clearNoResponseTimer();
|
||||
clearFinalDrainTimers();
|
||||
if (!postExitGuardCleanup) {
|
||||
postExitGuardCleanup = attachPostExitStdioGuard(child, {
|
||||
idleMs: POST_EXIT_STDIO_GUARD_MS,
|
||||
hardMs: HARD_KILL_MS,
|
||||
});
|
||||
}
|
||||
});
|
||||
child.on("close", (exitCode) => {
|
||||
if (child.pid) {
|
||||
activeChildProcesses.delete(child.pid);
|
||||
clearHardKillTimer(child.pid);
|
||||
}
|
||||
const timeoutError = responseTimeoutHit && !stderr.trim() ? { error: `Child Pi produced no new output for ${responseTimeoutMs}ms; process was terminated as unresponsive.` } : undefined;
|
||||
const finalExitCode = forcedFinalDrain && !timeoutError ? 0 : exitCode;
|
||||
// A final assistant event is the child Pi contract for "the worker produced its answer".
|
||||
// Some Pi processes can linger during post-final cleanup/stdio shutdown; finalDrain terminates
|
||||
// that lingering process so the parent can continue, but it must not turn a completed
|
||||
// subagent answer into a failed task. Real pre-final response timeouts still report errors.
|
||||
settle({ exitCode: finalExitCode, stdout, stderr, ...(timeoutError ? { error: timeoutError.error } : {}), exitStatus: { exitCode: finalExitCode, cancelled: abortRequested, timedOut: responseTimeoutHit, killed: hardKilled, cleanupErrors, finalDrainMs } });
|
||||
});
|
||||
});
|
||||
} finally {
|
||||
cleanupTempDir(built.tempDir);
|
||||
}
|
||||
}
|
||||
190
extensions/pi-crew/src/runtime/completion-guard.ts
Normal file
190
extensions/pi-crew/src/runtime/completion-guard.ts
Normal file
@@ -0,0 +1,190 @@
|
||||
import * as fs from "node:fs";
|
||||
import type { TeamTaskState, TeamRunManifest } from "../state/types.ts";
|
||||
|
||||
// ============================================================================
|
||||
// Phase 1.2: Completion Mutation Guard — detects tasks that claim success but
|
||||
// made no observable mutations. Used by task-runner.ts.
|
||||
// ============================================================================
|
||||
|
||||
export interface CompletionMutationGuardInput {
|
||||
role: string;
|
||||
taskText?: string;
|
||||
transcriptPath?: string;
|
||||
stdout?: string;
|
||||
}
|
||||
|
||||
export interface CompletionMutationGuardResult {
|
||||
expectedMutation: boolean;
|
||||
observedMutation: boolean;
|
||||
reason?: "no_mutation_observed";
|
||||
observedTools: string[];
|
||||
}
|
||||
|
||||
const MUTATING_ROLES = new Set(["executor", "test-engineer"]);
|
||||
const MUTATING_TOOLS = new Set(["edit", "write", "multi_edit", "apply_patch", "replace_in_file", "insert", "delete_files", "create_file", "overwrite", "patch"]);
|
||||
const READ_ONLY_COMMANDS = /^(pwd|ls|dir|cat|type|sed|grep|rg|find|git\s+(status|diff|log|show|branch|remote|rev-parse|ls-files)|npm\s+(test|run\s+(typecheck|check|lint|test|ci))|node\s+--test)\b/i;
|
||||
const MUTATING_COMMANDS = /\b(rm\s+-|del\s+|erase\s+|mv\s+|move\s+|cp\s+|copy\s+|mkdir\b|touch\b|git\s+(add|commit|push|reset|clean|checkout|switch|merge|rebase|stash)|npm\s+(install|i|uninstall|publish|version)|pnpm\s+(add|install|remove)|yarn\s+(add|install|remove)|python\b.*>|node\b.*>|echo\b.*>|Set-Content|Out-File|sed\s+-i|tee\b|dd\b.*of=|wget\b.*-O|curl\b.*-o)\b/i;
|
||||
const READ_ONLY_HINTS = /\b(read-only|no edits?|do not edit|không sửa|khong sua|chỉ đọc|chi doc|plan only|chỉ lập plan|review only|audit only)\b/i;
|
||||
|
||||
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
||||
return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
|
||||
}
|
||||
|
||||
function commandText(value: unknown): string {
|
||||
const record = asRecord(value);
|
||||
if (!record) return typeof value === "string" ? value : "";
|
||||
for (const key of ["command", "cmd", "script", "input"]) {
|
||||
const raw = record[key];
|
||||
if (typeof raw === "string") return raw;
|
||||
}
|
||||
return JSON.stringify(record);
|
||||
}
|
||||
|
||||
function isMutatingTool(tool: string, args: unknown): boolean {
|
||||
const normalized = tool.toLowerCase();
|
||||
if (MUTATING_TOOLS.has(normalized)) return true;
|
||||
if (normalized === "bash" || normalized === "shell" || normalized === "powershell") {
|
||||
const command = commandText(args).trim();
|
||||
if (!command) return false;
|
||||
// Check mutating patterns first: sed -i is mutating even though plain sed is read-only.
|
||||
if (MUTATING_COMMANDS.test(command)) return true;
|
||||
if (READ_ONLY_COMMANDS.test(command)) return false;
|
||||
// If the command doesn't match either list, treat unknown bash calls as potentially mutating.
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
function collectToolCallsFromEvent(event: unknown): Array<{ tool: string; args?: unknown }> {
|
||||
const record = asRecord(event);
|
||||
if (!record) return [];
|
||||
const calls: Array<{ tool: string; args?: unknown }> = [];
|
||||
const directTool = record.toolName ?? record.name ?? record.tool;
|
||||
if (typeof directTool === "string" && (record.type === "tool_execution_start" || record.type === "toolCall" || record.type === "tool_call")) {
|
||||
calls.push({ tool: directTool, args: record.args ?? record.input });
|
||||
}
|
||||
const content = Array.isArray(record.content) ? record.content : asRecord(record.message)?.content;
|
||||
if (Array.isArray(content)) {
|
||||
for (const part of content) {
|
||||
const item = asRecord(part);
|
||||
if (!item) continue;
|
||||
const tool = item.name ?? item.toolName ?? item.tool;
|
||||
if (typeof tool === "string" && (item.type === "toolCall" || item.type === "tool_call" || item.type === "tool_execution_start")) calls.push({ tool, args: item.input ?? item.args });
|
||||
}
|
||||
}
|
||||
return calls;
|
||||
}
|
||||
|
||||
function transcriptText(input: CompletionMutationGuardInput): string {
|
||||
if (input.transcriptPath && fs.existsSync(input.transcriptPath)) return fs.readFileSync(input.transcriptPath, "utf-8");
|
||||
return input.stdout ?? "";
|
||||
}
|
||||
|
||||
export function expectsImplementationMutation(input: Pick<CompletionMutationGuardInput, "role" | "taskText">): boolean {
|
||||
if (!MUTATING_ROLES.has(input.role)) return false;
|
||||
return !READ_ONLY_HINTS.test(input.taskText ?? "");
|
||||
}
|
||||
|
||||
export function evaluateCompletionMutationGuard(input: CompletionMutationGuardInput): CompletionMutationGuardResult {
|
||||
const expectedMutation = expectsImplementationMutation(input);
|
||||
const observedTools: string[] = [];
|
||||
let observedMutation = false;
|
||||
const text = transcriptText(input);
|
||||
for (const line of text.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
let event: unknown;
|
||||
try { event = JSON.parse(trimmed); } catch { continue; }
|
||||
for (const call of collectToolCallsFromEvent(event)) {
|
||||
observedTools.push(call.tool);
|
||||
if (isMutatingTool(call.tool, call.args)) observedMutation = true;
|
||||
}
|
||||
}
|
||||
return {
|
||||
expectedMutation,
|
||||
observedMutation,
|
||||
observedTools,
|
||||
...(expectedMutation && !observedMutation ? { reason: "no_mutation_observed" as const } : {}),
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Phase 11a: Artifact-based Completion Verification — a second layer that
|
||||
// checks whether a completed task actually produced meaningful artifacts.
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Guard against false-positive task completions.
|
||||
*
|
||||
* Checks whether a task that claims success actually produced meaningful output.
|
||||
* Returns a verification result with the green level (0-3) and any warnings.
|
||||
*/
|
||||
export interface CompletionVerifyResult {
|
||||
/** 0 = no output, 1 = minimal, 2 = moderate, 3 = strong */
|
||||
greenLevel: number;
|
||||
/** Warnings about potentially incomplete work */
|
||||
warnings: string[];
|
||||
}
|
||||
|
||||
const MAX_OUTPUT_PREVIEW = 200;
|
||||
|
||||
function isTrivialError(error: string | undefined): boolean {
|
||||
if (!error) return false;
|
||||
return error.trim().length === 0;
|
||||
}
|
||||
|
||||
export function verifyTaskCompletion(
|
||||
task: TeamTaskState,
|
||||
manifest: TeamRunManifest,
|
||||
): CompletionVerifyResult {
|
||||
const warnings: string[] = [];
|
||||
let greenLevel = 0;
|
||||
|
||||
// Check 1: Has an error?
|
||||
if (task.error && !isTrivialError(task.error)) {
|
||||
return { greenLevel: 0, warnings: [`Task has error: ${task.error}`] };
|
||||
}
|
||||
|
||||
// Check 2: Has result artifact?
|
||||
if (task.resultArtifact) {
|
||||
greenLevel += 1;
|
||||
}
|
||||
|
||||
// Check 3: Has transcript?
|
||||
if (task.transcriptArtifact) {
|
||||
greenLevel += 1;
|
||||
}
|
||||
|
||||
// Check 4: For implementation tasks, verify artifacts were actually produced
|
||||
const runArtifacts = manifest.artifacts.filter(
|
||||
(a) => a.producer === task.id || a.producer === task.agent,
|
||||
);
|
||||
if (runArtifacts.length > 0) {
|
||||
greenLevel += 1;
|
||||
} else if (greenLevel < 3) {
|
||||
warnings.push("No run-level artifacts produced by this task");
|
||||
}
|
||||
|
||||
// Check 5: Usage tracking — did the task actually consume tokens?
|
||||
if (task.usage) {
|
||||
const totalTokens = (task.usage.input ?? 0) + (task.usage.output ?? 0);
|
||||
if (totalTokens === 0 && greenLevel < 3) {
|
||||
warnings.push("Task reports zero token usage — may not have executed");
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
greenLevel: Math.min(greenLevel, 3),
|
||||
warnings,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Format a preview of task output for diagnostic display.
|
||||
*/
|
||||
export function formatOutputPreview(output: string | undefined): string {
|
||||
if (!output) return "(no output)";
|
||||
const trimmed = output.trim();
|
||||
if (trimmed.length <= MAX_OUTPUT_PREVIEW) return trimmed;
|
||||
return trimmed.slice(0, MAX_OUTPUT_PREVIEW) + "...";
|
||||
}
|
||||
56
extensions/pi-crew/src/runtime/concurrency.ts
Normal file
56
extensions/pi-crew/src/runtime/concurrency.ts
Normal file
@@ -0,0 +1,56 @@
|
||||
import { DEFAULT_CONCURRENCY } from "../config/defaults.ts";
|
||||
|
||||
export interface ResolveBatchConcurrencyInput {
|
||||
workflowName: string;
|
||||
workflowMaxConcurrency?: number;
|
||||
teamMaxConcurrency?: number;
|
||||
limitMaxConcurrentWorkers?: number;
|
||||
allowUnboundedConcurrency?: boolean;
|
||||
hardCap?: number;
|
||||
readyCount: number;
|
||||
workspaceMode?: "single" | "worktree";
|
||||
readyRoles?: string[];
|
||||
}
|
||||
|
||||
export interface BatchConcurrencyDecision {
|
||||
maxConcurrent: number;
|
||||
selectedCount: number;
|
||||
defaultConcurrency: number;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export function defaultWorkflowConcurrency(workflowName: string, workflowMaxConcurrency?: number): number {
|
||||
if (workflowMaxConcurrency !== undefined) return workflowMaxConcurrency;
|
||||
if (workflowName === "parallel-research") return DEFAULT_CONCURRENCY.workflow.parallelResearch;
|
||||
if (workflowName === "research") return DEFAULT_CONCURRENCY.workflow.research;
|
||||
if (workflowName === "implementation" || workflowName === "review" || workflowName === "default") return DEFAULT_CONCURRENCY.workflow.implementation;
|
||||
return DEFAULT_CONCURRENCY.fallback;
|
||||
}
|
||||
|
||||
function positiveInteger(value: number | undefined): number | undefined {
|
||||
if (value === undefined || !Number.isFinite(value)) return undefined;
|
||||
return Math.max(1, Math.trunc(value));
|
||||
}
|
||||
|
||||
export function resolveBatchConcurrency(input: ResolveBatchConcurrencyInput): BatchConcurrencyDecision {
|
||||
const workflowMax = positiveInteger(input.workflowMaxConcurrency);
|
||||
const defaultConcurrency = defaultWorkflowConcurrency(input.workflowName, workflowMax);
|
||||
const limitMax = positiveInteger(input.limitMaxConcurrentWorkers);
|
||||
const teamMax = positiveInteger(input.teamMaxConcurrency);
|
||||
const requested = limitMax ?? teamMax ?? workflowMax ?? defaultWorkflowConcurrency(input.workflowName);
|
||||
let source: "limit" | "team" | "workflow";
|
||||
if (limitMax !== undefined) source = "limit";
|
||||
else if (teamMax !== undefined) source = "team";
|
||||
else source = "workflow";
|
||||
const hardCap = positiveInteger(input.hardCap) ?? DEFAULT_CONCURRENCY.hardCap;
|
||||
const maxConcurrent = input.allowUnboundedConcurrency ? requested : Math.min(requested, hardCap);
|
||||
const readyCount = Math.max(0, Math.trunc(Number.isFinite(input.readyCount) ? input.readyCount : 0));
|
||||
const cappedReason = maxConcurrent < requested ? `;capped:${hardCap}` : "";
|
||||
const unboundedReason = input.allowUnboundedConcurrency && requested > hardCap ? `;unbounded:${hardCap}` : "";
|
||||
return {
|
||||
maxConcurrent,
|
||||
selectedCount: readyCount === 0 ? 0 : Math.min(readyCount, maxConcurrent),
|
||||
defaultConcurrency,
|
||||
reason: `${source}:${requested}${cappedReason}${unboundedReason};ready:${readyCount}`,
|
||||
};
|
||||
}
|
||||
88
extensions/pi-crew/src/runtime/crash-recovery.ts
Normal file
88
extensions/pi-crew/src/runtime/crash-recovery.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
|
||||
import type { MetricRegistry } from "../observability/metric-registry.ts";
|
||||
import { appendEvent, scanSequence } from "../state/event-log.ts";
|
||||
import { withRunLockSync } from "../state/locks.ts";
|
||||
import { loadRunManifestById, saveRunTasks, updateRunStatus } from "../state/state-store.ts";
|
||||
import type { TeamTaskState } from "../state/types.ts";
|
||||
import { isWorkerHeartbeatStale } from "./worker-heartbeat.ts";
|
||||
import type { ManifestCache } from "./manifest-cache.ts";
|
||||
import { checkProcessLiveness } from "./process-status.ts";
|
||||
import { reconcileStaleRun, type ReconcileResult } from "./stale-reconciler.ts";
|
||||
|
||||
export interface RecoveryPlan {
|
||||
runId: string;
|
||||
resumableTasks: string[];
|
||||
preservedTasks: string[];
|
||||
lastEventSeq: number;
|
||||
}
|
||||
|
||||
function isTerminalTask(task: TeamTaskState): boolean {
|
||||
return task.status === "completed" || task.status === "failed" || task.status === "cancelled" || task.status === "skipped";
|
||||
}
|
||||
|
||||
function shouldRecoverTask(task: TeamTaskState, deadMs: number): boolean {
|
||||
if (task.status !== "running") return false;
|
||||
if (!task.heartbeat) return true;
|
||||
return task.heartbeat.alive === false || isWorkerHeartbeatStale(task.heartbeat, deadMs);
|
||||
}
|
||||
|
||||
export function detectInterruptedRuns(cwd: string, manifestCache: ManifestCache, deadMs = 300_000): RecoveryPlan[] {
|
||||
const plans: RecoveryPlan[] = [];
|
||||
for (const manifest of manifestCache.list(50)) {
|
||||
if (manifest.status !== "running") continue;
|
||||
if (manifest.async?.pid !== undefined && checkProcessLiveness(manifest.async.pid).alive) continue;
|
||||
const loaded = loadRunManifestById(cwd, manifest.runId);
|
||||
if (!loaded) continue;
|
||||
const resumableTasks = loaded.tasks.filter((task) => shouldRecoverTask(task, deadMs)).map((task) => task.id);
|
||||
if (!resumableTasks.length) continue;
|
||||
plans.push({ runId: manifest.runId, resumableTasks, preservedTasks: loaded.tasks.filter(isTerminalTask).map((task) => task.id), lastEventSeq: scanSequence(loaded.manifest.eventsPath) });
|
||||
}
|
||||
return plans;
|
||||
}
|
||||
|
||||
export async function applyRecoveryPlan(plan: RecoveryPlan, ctx: Pick<ExtensionContext, "cwd">, registry?: MetricRegistry): Promise<void> {
|
||||
const loaded = loadRunManifestById(ctx.cwd, plan.runId);
|
||||
if (!loaded) throw new Error(`Run '${plan.runId}' not found.`);
|
||||
const reset = new Set(plan.resumableTasks);
|
||||
const tasks = loaded.tasks.map((task) => reset.has(task.id) ? { ...task, status: "queued" as const, startedAt: undefined, finishedAt: undefined, error: undefined, heartbeat: undefined } : task);
|
||||
saveRunTasks(loaded.manifest, tasks);
|
||||
appendEvent(loaded.manifest.eventsPath, { type: "crew.run.resumed", runId: plan.runId, message: `Recovered ${plan.resumableTasks.length} interrupted task(s).`, data: { recoveredFromSeq: plan.lastEventSeq, resumableTasks: plan.resumableTasks } });
|
||||
registry?.counter("crew.run.count", "Total runs by status").inc({ status: "resumed" });
|
||||
}
|
||||
|
||||
export function declineRecoveryPlan(plan: RecoveryPlan, ctx: Pick<ExtensionContext, "cwd">): void {
|
||||
const loaded = loadRunManifestById(ctx.cwd, plan.runId);
|
||||
if (!loaded) throw new Error(`Run '${plan.runId}' not found.`);
|
||||
// Log the event first — if appendEvent fails, state remains consistent.
|
||||
appendEvent(loaded.manifest.eventsPath, { type: "crew.run.recovery_declined", runId: plan.runId, message: "Interrupted run was not resumed.", data: { recoveredFromSeq: plan.lastEventSeq } });
|
||||
updateRunStatus(loaded.manifest, "cancelled", "interrupted-not-resumed");
|
||||
}
|
||||
|
||||
/**
|
||||
* Run 3-phase stale reconciliation on all active runs.
|
||||
* Returns results for each reconciled run.
|
||||
*/
|
||||
export function reconcileAllStaleRuns(cwd: string, manifestCache: ManifestCache, now = Date.now()): ReconcileResult[] {
|
||||
const results: ReconcileResult[] = [];
|
||||
for (const manifest of manifestCache.list(50)) {
|
||||
if (manifest.status !== "running") continue;
|
||||
const loaded = loadRunManifestById(cwd, manifest.runId);
|
||||
if (!loaded) continue;
|
||||
// Use lock to prevent race with cancel/status handlers modifying the same run
|
||||
withRunLockSync(loaded.manifest, () => {
|
||||
// Re-read inside lock to get freshest data
|
||||
const fresh = loadRunManifestById(cwd, manifest.runId);
|
||||
if (!fresh || fresh.manifest.status !== "running") return;
|
||||
const result = reconcileStaleRun(fresh.manifest, fresh.tasks, now);
|
||||
if (result.repaired) {
|
||||
if (result.repairedTasks) saveRunTasks(fresh.manifest, result.repairedTasks);
|
||||
updateRunStatus(fresh.manifest, "failed", `Stale run reconciled: ${result.detail}`);
|
||||
appendEvent(fresh.manifest.eventsPath, { type: "crew.run.reconciled_stale", runId: manifest.runId, message: result.detail, data: { verdict: result.verdict } });
|
||||
}
|
||||
if (result.verdict !== "healthy") {
|
||||
results.push(result);
|
||||
}
|
||||
});
|
||||
}
|
||||
return results;
|
||||
}
|
||||
253
extensions/pi-crew/src/runtime/crew-agent-records.ts
Normal file
253
extensions/pi-crew/src/runtime/crew-agent-records.ts
Normal file
@@ -0,0 +1,253 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
|
||||
import { atomicWriteJson, readJsonFile } from "../state/atomic-write.ts";
|
||||
import { readJsonFileCoalesced } from "../utils/file-coalescer.ts";
|
||||
import type { CrewAgentProgress, CrewAgentRecord, CrewRuntimeKind } from "./crew-agent-runtime.ts";
|
||||
import { taskStatusToAgentStatus } from "./crew-agent-runtime.ts";
|
||||
import { logInternalError } from "../utils/internal-error.ts";
|
||||
import { assertSafePathId, resolveRealContainedPath } from "../utils/safe-paths.ts";
|
||||
import { redactSecretString, redactSecrets } from "../utils/redaction.ts";
|
||||
|
||||
export function agentsPath(manifest: TeamRunManifest): string {
|
||||
return path.join(manifest.stateRoot, "agents.json");
|
||||
}
|
||||
|
||||
export function agentsRoot(manifest: TeamRunManifest): string {
|
||||
return path.join(manifest.stateRoot, "agents");
|
||||
}
|
||||
|
||||
function safeAgentTaskId(taskId: string): string {
|
||||
return assertSafePathId("taskId", taskId.includes(":") ? taskId.split(":").pop()! : taskId);
|
||||
}
|
||||
|
||||
export function agentStateDir(manifest: TeamRunManifest, taskId: string): string {
|
||||
return path.join(agentsRoot(manifest), safeAgentTaskId(taskId));
|
||||
}
|
||||
|
||||
export function ensureAgentStateDir(manifest: TeamRunManifest, taskId: string): string {
|
||||
const root = agentsRoot(manifest);
|
||||
fs.mkdirSync(root, { recursive: true });
|
||||
if (fs.lstatSync(root).isSymbolicLink()) throw new Error(`Invalid agents root: ${root}`);
|
||||
const dir = agentStateDir(manifest, taskId);
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
if (fs.lstatSync(dir).isSymbolicLink()) throw new Error(`Invalid agent state directory: ${dir}`);
|
||||
resolveRealContainedPath(root, path.basename(dir));
|
||||
return dir;
|
||||
}
|
||||
|
||||
function safeExistingAgentFile(manifest: TeamRunManifest, taskId: string, fileName: string): string {
|
||||
const filePath = path.join(agentStateDir(manifest, taskId), fileName);
|
||||
if (!fs.existsSync(filePath)) return filePath;
|
||||
if (fs.lstatSync(filePath).isSymbolicLink()) throw new Error(`Invalid agent state file: ${filePath}`);
|
||||
return resolveRealContainedPath(agentsRoot(manifest), path.join(safeAgentTaskId(taskId), fileName));
|
||||
}
|
||||
|
||||
export function agentStateFile(manifest: TeamRunManifest, taskId: string, fileName: string): string {
|
||||
ensureAgentStateDir(manifest, taskId);
|
||||
return safeExistingAgentFile(manifest, taskId, fileName);
|
||||
}
|
||||
|
||||
export function agentStatusPath(manifest: TeamRunManifest, taskId: string): string {
|
||||
return path.join(agentStateDir(manifest, taskId), "status.json");
|
||||
}
|
||||
|
||||
export function agentEventsPath(manifest: TeamRunManifest, taskId: string): string {
|
||||
return path.join(agentStateDir(manifest, taskId), "events.jsonl");
|
||||
}
|
||||
|
||||
export function agentOutputPath(manifest: TeamRunManifest, taskId: string): string {
|
||||
return path.join(agentStateDir(manifest, taskId), "output.log");
|
||||
}
|
||||
|
||||
const AGENT_READER_TTL_MS = 200;
|
||||
const ASYNC_AGENT_READER_CACHE_MAX_ENTRIES = 128;
|
||||
|
||||
const asyncAgentReaderCache = new Map<string, { expiresAt: number; records: CrewAgentRecord[]; inFlight?: Promise<CrewAgentRecord[]> }>();
|
||||
|
||||
function setAsyncAgentReaderCache(filePath: string, entry: { expiresAt: number; records: CrewAgentRecord[]; inFlight?: Promise<CrewAgentRecord[]> }): void {
|
||||
const now = Date.now();
|
||||
for (const [key, cached] of asyncAgentReaderCache) {
|
||||
if (cached.expiresAt <= now && !cached.inFlight) asyncAgentReaderCache.delete(key);
|
||||
}
|
||||
if (asyncAgentReaderCache.has(filePath)) asyncAgentReaderCache.delete(filePath);
|
||||
asyncAgentReaderCache.set(filePath, entry);
|
||||
while (asyncAgentReaderCache.size > ASYNC_AGENT_READER_CACHE_MAX_ENTRIES) {
|
||||
const oldest = asyncAgentReaderCache.keys().next().value;
|
||||
if (!oldest) break;
|
||||
asyncAgentReaderCache.delete(oldest);
|
||||
}
|
||||
}
|
||||
|
||||
export function readCrewAgents(manifest: TeamRunManifest): CrewAgentRecord[] {
|
||||
try {
|
||||
return readJsonFileCoalesced(agentsPath(manifest), AGENT_READER_TTL_MS, () => readJsonFile<CrewAgentRecord[]>(agentsPath(manifest)) ?? []);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
export async function readCrewAgentsAsync(manifest: TeamRunManifest): Promise<CrewAgentRecord[]> {
|
||||
const filePath = agentsPath(manifest);
|
||||
const now = Date.now();
|
||||
const cached = asyncAgentReaderCache.get(filePath);
|
||||
if (cached && cached.expiresAt > now) return cached.records;
|
||||
if (cached?.inFlight) return cached.inFlight;
|
||||
const inFlight = (async (): Promise<CrewAgentRecord[]> => {
|
||||
try {
|
||||
const parsed = JSON.parse(await fs.promises.readFile(filePath, "utf-8")) as unknown;
|
||||
const records = Array.isArray(parsed) ? redactSecrets(parsed) as CrewAgentRecord[] : [];
|
||||
setAsyncAgentReaderCache(filePath, { expiresAt: Date.now() + AGENT_READER_TTL_MS, records });
|
||||
return records;
|
||||
} catch {
|
||||
setAsyncAgentReaderCache(filePath, { expiresAt: Date.now() + AGENT_READER_TTL_MS, records: [] });
|
||||
return [];
|
||||
}
|
||||
})();
|
||||
setAsyncAgentReaderCache(filePath, { expiresAt: now + AGENT_READER_TTL_MS, records: cached?.records ?? [], inFlight });
|
||||
return inFlight;
|
||||
}
|
||||
|
||||
export function saveCrewAgents(manifest: TeamRunManifest, records: CrewAgentRecord[]): void {
|
||||
fs.mkdirSync(manifest.stateRoot, { recursive: true });
|
||||
const filePath = agentsPath(manifest);
|
||||
atomicWriteJson(filePath, redactSecrets(records));
|
||||
asyncAgentReaderCache.delete(filePath);
|
||||
for (const record of records) writeCrewAgentStatus(manifest, record);
|
||||
}
|
||||
|
||||
export function upsertCrewAgent(manifest: TeamRunManifest, record: CrewAgentRecord): void {
|
||||
const records = readCrewAgents(manifest).filter((item) => item.id !== record.id);
|
||||
records.push(record);
|
||||
saveCrewAgents(manifest, records);
|
||||
writeCrewAgentStatus(manifest, record);
|
||||
}
|
||||
|
||||
export function writeCrewAgentStatus(manifest: TeamRunManifest, record: CrewAgentRecord): void {
|
||||
ensureAgentStateDir(manifest, record.taskId);
|
||||
atomicWriteJson(agentStatusPath(manifest, record.taskId), redactSecrets(record));
|
||||
}
|
||||
|
||||
export function readCrewAgentStatus(manifest: TeamRunManifest, taskOrAgentId: string): CrewAgentRecord | undefined {
|
||||
try {
|
||||
return readJsonFile<CrewAgentRecord>(safeExistingAgentFile(manifest, taskOrAgentId, "status.json"));
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
const agentEventSeqCache = new Map<string, { size: number; mtimeMs: number; seq: number }>();
|
||||
|
||||
function nextAgentEventSeq(filePath: string): number {
|
||||
if (!fs.existsSync(filePath)) return 1;
|
||||
const stat = fs.statSync(filePath);
|
||||
const cached = agentEventSeqCache.get(filePath);
|
||||
if (cached && cached.size === stat.size && cached.mtimeMs === stat.mtimeMs) return cached.seq + 1;
|
||||
let max = 0;
|
||||
for (const line of fs.readFileSync(filePath, "utf-8").split(/\r?\n/)) {
|
||||
if (!line.trim()) continue;
|
||||
try {
|
||||
const parsed = JSON.parse(line) as { seq?: unknown };
|
||||
if (typeof parsed.seq === "number" && Number.isFinite(parsed.seq)) max = Math.max(max, parsed.seq);
|
||||
else max += 1;
|
||||
} catch {
|
||||
max += 1;
|
||||
}
|
||||
}
|
||||
agentEventSeqCache.set(filePath, { size: stat.size, mtimeMs: stat.mtimeMs, seq: max });
|
||||
return max + 1;
|
||||
}
|
||||
|
||||
export function appendCrewAgentEvent(manifest: TeamRunManifest, taskId: string, event: unknown): void {
|
||||
ensureAgentStateDir(manifest, taskId);
|
||||
const filePath = agentStateFile(manifest, taskId, "events.jsonl");
|
||||
const seq = nextAgentEventSeq(filePath);
|
||||
fs.appendFileSync(filePath, `${JSON.stringify(redactSecrets({ seq, time: new Date().toISOString(), event }))}\n`, "utf-8");
|
||||
try {
|
||||
const stat = fs.statSync(filePath);
|
||||
agentEventSeqCache.set(filePath, { size: stat.size, mtimeMs: stat.mtimeMs, seq });
|
||||
} catch (error) {
|
||||
logInternalError("crew-agent-records.stat", error, `filePath=${filePath}`);
|
||||
}
|
||||
}
|
||||
|
||||
export interface CrewAgentEventCursorOptions {
|
||||
sinceSeq?: number;
|
||||
limit?: number;
|
||||
}
|
||||
|
||||
export function readCrewAgentEvents(manifest: TeamRunManifest, taskId: string): unknown[] {
|
||||
return readCrewAgentEventsCursor(manifest, taskId).events;
|
||||
}
|
||||
|
||||
export function readCrewAgentEventsCursor(manifest: TeamRunManifest, taskId: string, options: CrewAgentEventCursorOptions = {}): { path: string; events: unknown[]; nextSeq: number; total: number } {
|
||||
let filePath: string;
|
||||
try {
|
||||
filePath = agentEventsPath(manifest, taskId);
|
||||
} catch {
|
||||
return { path: "", events: [], nextSeq: options.sinceSeq ?? 0, total: 0 };
|
||||
}
|
||||
if (!fs.existsSync(filePath)) return { path: filePath, events: [], nextSeq: options.sinceSeq ?? 0, total: 0 };
|
||||
try {
|
||||
filePath = safeExistingAgentFile(manifest, taskId, "events.jsonl");
|
||||
} catch {
|
||||
return { path: "", events: [], nextSeq: options.sinceSeq ?? 0, total: 0 };
|
||||
}
|
||||
const sinceSeq = typeof options.sinceSeq === "number" && Number.isInteger(options.sinceSeq) && options.sinceSeq >= 0 ? options.sinceSeq : 0;
|
||||
const limit = typeof options.limit === "number" && Number.isInteger(options.limit) && options.limit >= 0 ? options.limit : undefined;
|
||||
const parsed = fs.readFileSync(filePath, "utf-8").split(/\r?\n/).filter(Boolean).map((line, index) => {
|
||||
try {
|
||||
const event = JSON.parse(line) as Record<string, unknown>;
|
||||
if (typeof event.seq !== "number") event.seq = index + 1;
|
||||
return event;
|
||||
} catch {
|
||||
return { seq: index + 1, raw: line };
|
||||
}
|
||||
});
|
||||
const filtered = parsed.filter((event) => typeof event.seq === "number" && event.seq > sinceSeq);
|
||||
const events = limit !== undefined ? filtered.slice(0, limit) : filtered;
|
||||
const returnedMaxSeq = events.reduce((max, event) => typeof event.seq === "number" ? Math.max(max, event.seq) : max, sinceSeq);
|
||||
return { path: filePath, events, nextSeq: returnedMaxSeq, total: filtered.length };
|
||||
}
|
||||
|
||||
export function appendCrewAgentOutput(manifest: TeamRunManifest, taskId: string, text: string): void {
|
||||
if (!text.trim()) return;
|
||||
ensureAgentStateDir(manifest, taskId);
|
||||
fs.appendFileSync(agentStateFile(manifest, taskId, "output.log"), `${redactSecretString(text)}\n`, "utf-8");
|
||||
}
|
||||
|
||||
export function emptyCrewAgentProgress(): CrewAgentProgress {
|
||||
return { recentTools: [], recentOutput: [], toolCount: 0 };
|
||||
}
|
||||
|
||||
function modelFromTask(task: TeamTaskState): string | undefined {
|
||||
const attempts = task.modelAttempts;
|
||||
if (!attempts?.length) return undefined;
|
||||
return attempts.find((attempt) => attempt.success)?.model ?? attempts.at(-1)?.model;
|
||||
}
|
||||
|
||||
export function recordFromTask(manifest: TeamRunManifest, task: TeamTaskState, runtime: CrewRuntimeKind): CrewAgentRecord {
|
||||
return {
|
||||
id: `${manifest.runId}:${task.id}`,
|
||||
runId: manifest.runId,
|
||||
taskId: task.id,
|
||||
agent: task.agent,
|
||||
role: task.role,
|
||||
runtime,
|
||||
status: taskStatusToAgentStatus(task.status),
|
||||
startedAt: task.startedAt ?? new Date().toISOString(),
|
||||
completedAt: task.finishedAt,
|
||||
resultArtifactPath: task.resultArtifact?.path,
|
||||
transcriptPath: task.transcriptArtifact?.path ?? task.logArtifact?.path,
|
||||
statusPath: agentStatusPath(manifest, task.id),
|
||||
eventsPath: agentEventsPath(manifest, task.id),
|
||||
outputPath: agentOutputPath(manifest, task.id),
|
||||
toolUses: task.agentProgress?.toolCount,
|
||||
jsonEvents: task.jsonEvents,
|
||||
model: modelFromTask(task),
|
||||
routing: task.modelRouting,
|
||||
usage: task.usage,
|
||||
progress: task.agentProgress,
|
||||
error: task.error,
|
||||
};
|
||||
}
|
||||
59
extensions/pi-crew/src/runtime/crew-agent-runtime.ts
Normal file
59
extensions/pi-crew/src/runtime/crew-agent-runtime.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
import type { TeamTaskStatus } from "../state/contracts.ts";
|
||||
import type { CrewActivityState, ModelRoutingState, UsageState } from "../state/types.ts";
|
||||
|
||||
export type CrewRuntimeKind = "scaffold" | "child-process" | "live-session";
|
||||
export type CrewAgentStatus = "queued" | "running" | "waiting" | "completed" | "failed" | "cancelled" | "stopped";
|
||||
|
||||
export interface CrewAgentRecentTool {
|
||||
tool: string;
|
||||
args?: string;
|
||||
endedAt: string;
|
||||
}
|
||||
|
||||
export interface CrewAgentProgress {
|
||||
currentTool?: string;
|
||||
currentToolArgs?: string;
|
||||
currentToolStartedAt?: string;
|
||||
recentTools: CrewAgentRecentTool[];
|
||||
recentOutput: string[];
|
||||
toolCount: number;
|
||||
tokens?: number;
|
||||
turns?: number;
|
||||
durationMs?: number;
|
||||
lastActivityAt?: string;
|
||||
activityState?: CrewActivityState;
|
||||
failedTool?: string;
|
||||
}
|
||||
|
||||
export interface CrewAgentRecord {
|
||||
id: string;
|
||||
runId: string;
|
||||
taskId: string;
|
||||
agent: string;
|
||||
role: string;
|
||||
runtime: CrewRuntimeKind;
|
||||
status: CrewAgentStatus;
|
||||
startedAt: string;
|
||||
completedAt?: string;
|
||||
resultArtifactPath?: string;
|
||||
transcriptPath?: string;
|
||||
statusPath?: string;
|
||||
eventsPath?: string;
|
||||
outputPath?: string;
|
||||
toolUses?: number;
|
||||
jsonEvents?: number;
|
||||
model?: string;
|
||||
routing?: ModelRoutingState;
|
||||
usage?: UsageState;
|
||||
progress?: CrewAgentProgress;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export function taskStatusToAgentStatus(status: TeamTaskStatus): CrewAgentStatus {
|
||||
if (status === "completed") return "completed";
|
||||
if (status === "failed") return "failed";
|
||||
if (status === "cancelled" || status === "skipped") return "cancelled";
|
||||
if (status === "running") return "running";
|
||||
if (status === "waiting") return "waiting";
|
||||
return "queued";
|
||||
}
|
||||
47
extensions/pi-crew/src/runtime/deadletter.ts
Normal file
47
extensions/pi-crew/src/runtime/deadletter.ts
Normal file
@@ -0,0 +1,47 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import type { TeamRunManifest } from "../state/types.ts";
|
||||
|
||||
import { logInternalError } from "../utils/internal-error.ts";
|
||||
|
||||
export type DeadletterReason = "max-retries" | "heartbeat-dead" | "manual";
|
||||
|
||||
export interface DeadletterEntry {
|
||||
taskId: string;
|
||||
runId: string;
|
||||
reason: DeadletterReason;
|
||||
attempts: number;
|
||||
lastError?: string;
|
||||
attemptId?: string;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
export function deadletterPath(manifest: TeamRunManifest): string {
|
||||
return path.join(manifest.stateRoot, "deadletter.jsonl");
|
||||
}
|
||||
|
||||
export function appendDeadletter(manifest: TeamRunManifest, entry: DeadletterEntry): void {
|
||||
try {
|
||||
fs.mkdirSync(manifest.stateRoot, { recursive: true });
|
||||
fs.appendFileSync(deadletterPath(manifest), `${JSON.stringify(entry)}\n`, "utf-8");
|
||||
} catch (error) {
|
||||
logInternalError("deadletter.append", error, `taskId=${entry.taskId}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function readDeadletter(manifest: TeamRunManifest, maxEntries = 1000): DeadletterEntry[] {
|
||||
const filePath = deadletterPath(manifest);
|
||||
if (!fs.existsSync(filePath)) return [];
|
||||
// Read last maxEntries lines only to limit memory.
|
||||
const raw = fs.readFileSync(filePath, "utf-8");
|
||||
const lines = raw.split(/\r?\n/).filter(Boolean);
|
||||
const tail = lines.slice(-maxEntries);
|
||||
return tail.flatMap((line) => {
|
||||
try {
|
||||
const parsed = JSON.parse(line) as DeadletterEntry;
|
||||
return parsed && typeof parsed.taskId === "string" && typeof parsed.runId === "string" ? [parsed] : [];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
});
|
||||
}
|
||||
175
extensions/pi-crew/src/runtime/delivery-coordinator.ts
Normal file
175
extensions/pi-crew/src/runtime/delivery-coordinator.ts
Normal file
@@ -0,0 +1,175 @@
|
||||
import type { NotificationDescriptor } from "../extension/notification-router.ts";
|
||||
import { logInternalError } from "../utils/internal-error.ts";
|
||||
|
||||
export interface PendingDelivery {
|
||||
runId: string;
|
||||
payload: unknown;
|
||||
timestamp: number;
|
||||
type: "result" | "notification" | "steer";
|
||||
generation?: number;
|
||||
}
|
||||
|
||||
export interface DeliveryCoordinatorDeps {
|
||||
/** Emit an event to the active Pi event bus. */
|
||||
emit?: (event: string, data: unknown) => void;
|
||||
/** Send a follow-up message to the active session (for notifications). */
|
||||
sendFollowUp?: (title: string, body: string) => void;
|
||||
/** Send a wake-up message to the active session (for async results). */
|
||||
sendWakeUp?: (message: string) => void;
|
||||
}
|
||||
|
||||
const PENDING_TTL_MS = 24 * 60 * 60 * 1000; // 24 hours
|
||||
|
||||
export class DeliveryCoordinator {
|
||||
private ownerSessionId: string | undefined;
|
||||
private active = false;
|
||||
private generation = 0;
|
||||
private pending: PendingDelivery[] = [];
|
||||
private flushing = false;
|
||||
private readonly deps: DeliveryCoordinatorDeps;
|
||||
private ttlTimer: ReturnType<typeof setInterval> | undefined;
|
||||
|
||||
constructor(deps: DeliveryCoordinatorDeps) {
|
||||
this.deps = deps;
|
||||
this.ttlTimer = setInterval(() => this.evictExpired(), 60_000);
|
||||
this.ttlTimer.unref();
|
||||
}
|
||||
|
||||
activate(sessionId: string): void {
|
||||
this.ownerSessionId = sessionId;
|
||||
this.active = true;
|
||||
this.flushQueuedResults();
|
||||
}
|
||||
|
||||
deactivate(): void {
|
||||
this.active = false;
|
||||
this.ownerSessionId = undefined;
|
||||
this.generation += 1;
|
||||
}
|
||||
|
||||
isActive(): boolean {
|
||||
return this.active;
|
||||
}
|
||||
|
||||
getPendingCount(): number {
|
||||
return this.pending.length;
|
||||
}
|
||||
|
||||
deliverResult(runId: string, result: unknown): void {
|
||||
if (this.active && this.deps.emit) {
|
||||
try {
|
||||
this.deps.emit("pi-crew:run-result", result);
|
||||
return;
|
||||
} catch (error) {
|
||||
logInternalError("delivery-coordinator.deliverResult", error, `runId=${runId}`);
|
||||
}
|
||||
}
|
||||
if (!this.flushing) this.enqueue({ runId, payload: result, timestamp: Date.now(), type: "result" });
|
||||
}
|
||||
|
||||
deliverNotification(notification: NotificationDescriptor): void {
|
||||
let delivered = false;
|
||||
if (this.active && this.deps.sendFollowUp) {
|
||||
try {
|
||||
this.deps.sendFollowUp(notification.title, notification.body ?? "");
|
||||
delivered = true;
|
||||
} catch (error) {
|
||||
logInternalError("delivery-coordinator.deliverNotification", error, `id=${notification.id}`);
|
||||
}
|
||||
}
|
||||
if (delivered) {
|
||||
if (this.deps.emit) {
|
||||
try {
|
||||
this.deps.emit("pi-crew:notification", notification);
|
||||
} catch { /* secondary delivery, ignore errors */ }
|
||||
}
|
||||
return;
|
||||
}
|
||||
if (!this.flushing) this.enqueue({ runId: notification.runId ?? "", payload: notification, timestamp: Date.now(), type: "notification" });
|
||||
}
|
||||
|
||||
deliverSteer(runId: string, message: string): void {
|
||||
if (this.active && this.deps.sendWakeUp) {
|
||||
try {
|
||||
this.deps.sendWakeUp(message);
|
||||
return;
|
||||
} catch (error) {
|
||||
logInternalError("delivery-coordinator.deliverSteer", error, `runId=${runId}`);
|
||||
}
|
||||
}
|
||||
if (!this.flushing) this.enqueue({ runId, payload: message, timestamp: Date.now(), type: "steer" });
|
||||
}
|
||||
|
||||
flushQueuedResults(): void {
|
||||
if (!this.active || this.pending.length === 0) return;
|
||||
const batch = this.pending.splice(0);
|
||||
this.flushing = true;
|
||||
try {
|
||||
const retryLater: PendingDelivery[] = [];
|
||||
for (const delivery of batch) {
|
||||
if (delivery.type === "steer" && delivery.generation !== undefined && delivery.generation !== this.generation) {
|
||||
logInternalError("delivery-coordinator.flush.stale", undefined, `runId=${delivery.runId} type=${delivery.type}`);
|
||||
continue;
|
||||
}
|
||||
try {
|
||||
if (!this.deliverQueued(delivery)) retryLater.push({ ...delivery, generation: this.generation });
|
||||
} catch (error) {
|
||||
logInternalError("delivery-coordinator.flush", error, `runId=${delivery.runId} type=${delivery.type}`);
|
||||
retryLater.push({ ...delivery, generation: this.generation });
|
||||
}
|
||||
}
|
||||
this.pending.unshift(...retryLater);
|
||||
} finally {
|
||||
this.flushing = false;
|
||||
}
|
||||
}
|
||||
|
||||
dispose(): void {
|
||||
this.deactivate();
|
||||
this.pending.length = 0;
|
||||
if (this.ttlTimer) {
|
||||
clearInterval(this.ttlTimer);
|
||||
this.ttlTimer = undefined;
|
||||
}
|
||||
}
|
||||
|
||||
private deliverQueued(delivery: PendingDelivery): boolean {
|
||||
switch (delivery.type) {
|
||||
case "result":
|
||||
if (!this.deps.emit) return false;
|
||||
this.deps.emit("pi-crew:run-result", delivery.payload);
|
||||
return true;
|
||||
case "notification": {
|
||||
const notification = delivery.payload as NotificationDescriptor;
|
||||
if (!this.deps.sendFollowUp) return false;
|
||||
this.deps.sendFollowUp(notification.title, notification.body ?? "");
|
||||
try {
|
||||
this.deps.emit?.("pi-crew:notification", notification);
|
||||
} catch {
|
||||
// Secondary event delivery must not consume the user-facing notification.
|
||||
}
|
||||
return true;
|
||||
}
|
||||
case "steer": {
|
||||
if (!this.deps.sendWakeUp) return false;
|
||||
const message = typeof delivery.payload === "string" ? delivery.payload : String(delivery.payload);
|
||||
this.deps.sendWakeUp(message);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private enqueue(delivery: PendingDelivery): void {
|
||||
this.pending.push({ ...delivery, generation: this.generation });
|
||||
}
|
||||
|
||||
private evictExpired(): void {
|
||||
const cutoff = Date.now() - PENDING_TTL_MS;
|
||||
const before = this.pending.length;
|
||||
this.pending = this.pending.filter((d) => d.timestamp > cutoff);
|
||||
const evicted = before - this.pending.length;
|
||||
if (evicted > 0) {
|
||||
logInternalError("delivery-coordinator.evict", undefined, `evicted=${evicted} remaining=${this.pending.length}`);
|
||||
}
|
||||
}
|
||||
}
|
||||
100
extensions/pi-crew/src/runtime/diagnostic-export.ts
Normal file
100
extensions/pi-crew/src/runtime/diagnostic-export.ts
Normal file
@@ -0,0 +1,100 @@
|
||||
import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
|
||||
import type { MetricRegistry } from "../observability/metric-registry.ts";
|
||||
import type { MetricSnapshot } from "../observability/metrics-primitives.ts";
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import { readCrewAgents } from "./crew-agent-records.ts";
|
||||
import { readEvents, type TeamEvent } from "../state/event-log.ts";
|
||||
import { loadRunManifestById } from "../state/state-store.ts";
|
||||
import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
|
||||
import { summarizeHeartbeats, type HeartbeatSummary } from "../ui/heartbeat-aggregator.ts";
|
||||
import type { RunUiSnapshot } from "../ui/snapshot-types.ts";
|
||||
import { redactSecrets } from "../utils/redaction.ts";
|
||||
export { redactSecrets } from "../utils/redaction.ts";
|
||||
|
||||
export interface DiagnosticReport {
|
||||
schemaVersion?: number;
|
||||
runId: string;
|
||||
exportedAt: string;
|
||||
manifest: TeamRunManifest;
|
||||
tasks: TeamTaskState[];
|
||||
recentEvents: TeamEvent[];
|
||||
heartbeat: HeartbeatSummary;
|
||||
agents: unknown[];
|
||||
envRedacted: Record<string, string>;
|
||||
metricsSnapshot?: MetricSnapshot[];
|
||||
}
|
||||
|
||||
const SECRET_KEY_PATTERN = /(token|key|password|secret|credential|auth)/i;
|
||||
|
||||
function envRedacted(): Record<string, string> {
|
||||
const output: Record<string, string> = {};
|
||||
for (const [key, value] of Object.entries(process.env)) {
|
||||
if (SECRET_KEY_PATTERN.test(key)) output[key] = "***";
|
||||
else if (typeof value === "string") output[key] = value;
|
||||
}
|
||||
return output;
|
||||
}
|
||||
|
||||
function buildSnapshot(manifest: TeamRunManifest, tasks: TeamTaskState[]): RunUiSnapshot {
|
||||
const agents = readCrewAgents(manifest);
|
||||
return {
|
||||
runId: manifest.runId,
|
||||
cwd: manifest.cwd,
|
||||
fetchedAt: Date.now(),
|
||||
signature: `${manifest.runId}:${manifest.updatedAt}`,
|
||||
manifest,
|
||||
tasks,
|
||||
agents,
|
||||
progress: {
|
||||
total: tasks.length,
|
||||
completed: tasks.filter((task) => task.status === "completed").length,
|
||||
running: tasks.filter((task) => task.status === "running").length,
|
||||
failed: tasks.filter((task) => task.status === "failed").length,
|
||||
queued: tasks.filter((task) => task.status === "queued").length,
|
||||
},
|
||||
usage: { tokensIn: 0, tokensOut: 0, toolUses: 0 },
|
||||
mailbox: { inboxUnread: 0, outboxPending: 0, needsAttention: 0 },
|
||||
recentEvents: [],
|
||||
recentOutputLines: [],
|
||||
};
|
||||
}
|
||||
|
||||
export async function exportDiagnostic(ctx: Pick<ExtensionContext, "cwd">, runId: string, options: { registry?: MetricRegistry } = {}): Promise<{ path: string; report: DiagnosticReport }> {
|
||||
const loaded = loadRunManifestById(ctx.cwd, runId);
|
||||
if (!loaded) throw new Error(`Run '${runId}' not found.`);
|
||||
const exportedAt = new Date().toISOString();
|
||||
const safeTimestamp = exportedAt.replace(/[:.]/g, "-");
|
||||
const recentEvents = readEvents(loaded.manifest.eventsPath).slice(-200);
|
||||
const metricsSnapshot = options.registry?.snapshot();
|
||||
const report: DiagnosticReport = {
|
||||
...(metricsSnapshot ? { schemaVersion: 2 } : {}),
|
||||
runId,
|
||||
exportedAt,
|
||||
manifest: redactSecrets(loaded.manifest) as TeamRunManifest,
|
||||
tasks: redactSecrets(loaded.tasks) as TeamTaskState[],
|
||||
recentEvents: redactSecrets(recentEvents) as TeamEvent[],
|
||||
heartbeat: summarizeHeartbeats(buildSnapshot(loaded.manifest, loaded.tasks)),
|
||||
agents: redactSecrets(readCrewAgents(loaded.manifest)) as unknown[],
|
||||
envRedacted: envRedacted(),
|
||||
...(metricsSnapshot ? { metricsSnapshot: redactSecrets(metricsSnapshot) as MetricSnapshot[] } : {}),
|
||||
};
|
||||
const dir = path.join(loaded.manifest.artifactsRoot, "diagnostic");
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
const filePath = path.join(dir, `diagnostic-${safeTimestamp}.json`);
|
||||
fs.writeFileSync(filePath, `${JSON.stringify(report, null, 2)}\n`, "utf-8");
|
||||
return { path: filePath, report };
|
||||
}
|
||||
|
||||
export function listRecentDiagnostic(dir: string, windowMs: number, now = Date.now()): string | undefined {
|
||||
try {
|
||||
if (!fs.existsSync(dir)) return undefined;
|
||||
return fs.readdirSync(dir)
|
||||
.filter((file) => file.startsWith("diagnostic-") && file.endsWith(".json"))
|
||||
.map((file) => ({ file, mtimeMs: fs.statSync(path.join(dir, file)).mtimeMs }))
|
||||
.filter((entry) => now - entry.mtimeMs < windowMs)
|
||||
.sort((a, b) => b.mtimeMs - a.mtimeMs)[0]?.file;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
35
extensions/pi-crew/src/runtime/direct-run.ts
Normal file
35
extensions/pi-crew/src/runtime/direct-run.ts
Normal file
@@ -0,0 +1,35 @@
|
||||
import type { AgentConfig } from "../agents/agent-config.ts";
|
||||
import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
|
||||
import type { TeamConfig } from "../teams/team-config.ts";
|
||||
import type { WorkflowConfig } from "../workflows/workflow-config.ts";
|
||||
|
||||
export function isDirectRun(manifest: Pick<TeamRunManifest, "team" | "workflow">): boolean {
|
||||
return manifest.workflow === "direct-agent";
|
||||
}
|
||||
|
||||
export function directTeamAndWorkflowFromRun(manifest: TeamRunManifest, tasks: TeamTaskState[], agents: AgentConfig[]): { team: TeamConfig; workflow: WorkflowConfig } | undefined {
|
||||
if (!isDirectRun(manifest)) return undefined;
|
||||
const firstTask = tasks[0];
|
||||
const agentName = firstTask?.agent ?? (manifest.team.replace(/^direct-/, "") || "executor");
|
||||
const agent = agents.find((candidate) => candidate.name === agentName);
|
||||
const role = firstTask?.role ?? "agent";
|
||||
const stepId = firstTask?.stepId ?? "01_agent";
|
||||
return {
|
||||
team: {
|
||||
name: manifest.team,
|
||||
description: `Direct subagent run for ${agentName}`,
|
||||
source: "builtin",
|
||||
filePath: "<generated>",
|
||||
roles: [{ name: role, agent: agentName, description: agent?.description }],
|
||||
defaultWorkflow: "direct-agent",
|
||||
workspaceMode: manifest.workspaceMode,
|
||||
},
|
||||
workflow: {
|
||||
name: manifest.workflow ?? "direct-agent",
|
||||
description: `Direct task for ${agentName}`,
|
||||
source: "builtin",
|
||||
filePath: "<generated>",
|
||||
steps: [{ id: stepId, role, task: "{goal}", model: firstTask?.model }],
|
||||
},
|
||||
};
|
||||
}
|
||||
76
extensions/pi-crew/src/runtime/effectiveness.ts
Normal file
76
extensions/pi-crew/src/runtime/effectiveness.ts
Normal file
@@ -0,0 +1,76 @@
|
||||
import type { CrewRuntimeConfig } from "../config/config.ts";
|
||||
import type { PolicyDecision, TeamRunManifest, TeamTaskState } from "../state/types.ts";
|
||||
|
||||
export type EffectivenessGuardMode = "off" | "warn" | "block" | "fail";
|
||||
export type WorkerExecutionState = "enabled" | "disabled/scaffold";
|
||||
export type RunEffectivenessSeverity = "ok" | "warning" | "blocked" | "failed";
|
||||
|
||||
export interface RunEffectivenessSummary {
|
||||
completed: number;
|
||||
observable: number;
|
||||
noObservedWorkTaskIds: string[];
|
||||
needsAttentionTaskIds: string[];
|
||||
workerExecution: WorkerExecutionState;
|
||||
guardMode: EffectivenessGuardMode;
|
||||
severity: RunEffectivenessSeverity;
|
||||
}
|
||||
|
||||
export function taskHasObservableWorkerActivity(task: TeamTaskState): boolean {
|
||||
return Boolean(
|
||||
(task.agentProgress?.toolCount ?? 0) > 0
|
||||
|| task.usage
|
||||
|| task.transcriptArtifact
|
||||
|| task.modelAttempts?.some((attempt) => attempt.success)
|
||||
|| task.jsonEvents,
|
||||
);
|
||||
}
|
||||
|
||||
export function resolveEffectivenessGuardMode(runtimeConfig: CrewRuntimeConfig | undefined, manifest?: TeamRunManifest): EffectivenessGuardMode {
|
||||
const configured = runtimeConfig?.effectivenessGuard;
|
||||
if (configured === "off" || configured === "warn" || configured === "block" || configured === "fail") return configured;
|
||||
if (manifest?.runtimeResolution?.safety === "explicit_dry_run") return "off";
|
||||
return "warn";
|
||||
}
|
||||
|
||||
export function evaluateRunEffectiveness(input: { manifest?: TeamRunManifest; tasks: TeamTaskState[]; executeWorkers: boolean; runtimeConfig?: CrewRuntimeConfig }): RunEffectivenessSummary {
|
||||
const completedTasks = input.tasks.filter((task) => task.status === "completed");
|
||||
const noObservedWorkTasks = completedTasks.filter((task) => !taskHasObservableWorkerActivity(task));
|
||||
const needsAttentionTasks = input.tasks.filter((task) => task.agentProgress?.activityState === "needs_attention");
|
||||
const workerExecution: WorkerExecutionState = input.executeWorkers ? "enabled" : "disabled/scaffold";
|
||||
const guardMode = resolveEffectivenessGuardMode(input.runtimeConfig, input.manifest);
|
||||
const observable = Math.max(0, completedTasks.length - noObservedWorkTasks.length - needsAttentionTasks.length);
|
||||
let severity: RunEffectivenessSeverity = "ok";
|
||||
if (input.executeWorkers && guardMode !== "off" && noObservedWorkTasks.length > 0) {
|
||||
severity = guardMode === "fail" ? "failed" : guardMode === "block" ? "blocked" : "warning";
|
||||
}
|
||||
return {
|
||||
completed: completedTasks.length,
|
||||
observable,
|
||||
noObservedWorkTaskIds: noObservedWorkTasks.map((task) => task.id),
|
||||
needsAttentionTaskIds: needsAttentionTasks.map((task) => task.id),
|
||||
workerExecution,
|
||||
guardMode,
|
||||
severity,
|
||||
};
|
||||
}
|
||||
|
||||
export function formatRunEffectivenessLines(summary: RunEffectivenessSummary): string[] {
|
||||
return [
|
||||
`Score: ${summary.observable}/${Math.max(1, summary.completed)} completed task(s) with observable worker activity`,
|
||||
`Worker execution: ${summary.workerExecution}`,
|
||||
`Guard: ${summary.guardMode} severity=${summary.severity}`,
|
||||
`No observable worker activity: ${summary.noObservedWorkTaskIds.length ? summary.noObservedWorkTaskIds.join(", ") : "none"}`,
|
||||
`Needs attention: ${summary.needsAttentionTaskIds.length ? summary.needsAttentionTaskIds.join(", ") : "none"}`,
|
||||
];
|
||||
}
|
||||
|
||||
export function effectivenessPolicyDecision(summary: RunEffectivenessSummary): PolicyDecision | undefined {
|
||||
if (summary.severity !== "warning" && summary.severity !== "blocked" && summary.severity !== "failed") return undefined;
|
||||
const action = summary.severity === "failed" ? "fail" : summary.severity === "blocked" ? "block" : "notify";
|
||||
return {
|
||||
action,
|
||||
reason: "ineffective_worker",
|
||||
message: `Run effectiveness guard ${summary.guardMode}: no observable worker activity for ${summary.noObservedWorkTaskIds.join(", ")}.`,
|
||||
createdAt: new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
82
extensions/pi-crew/src/runtime/foreground-control.ts
Normal file
82
extensions/pi-crew/src/runtime/foreground-control.ts
Normal file
@@ -0,0 +1,82 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import { appendEvent } from "../state/event-log.ts";
|
||||
import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
|
||||
import { checkProcessLiveness, isActiveRunStatus } from "./process-status.ts";
|
||||
import { readCrewAgents } from "./crew-agent-records.ts";
|
||||
|
||||
export type ForegroundControlRequestType = "interrupt" | "status";
|
||||
|
||||
export interface ForegroundControlStatus {
|
||||
runId: string;
|
||||
status: TeamRunManifest["status"];
|
||||
active: boolean;
|
||||
asyncPid?: number;
|
||||
asyncAlive?: boolean;
|
||||
runningTasks: string[];
|
||||
runningAgents: string[];
|
||||
controlPath: string;
|
||||
lastRequest?: ForegroundControlRequest;
|
||||
}
|
||||
|
||||
export interface ForegroundControlRequest {
|
||||
id: string;
|
||||
type: ForegroundControlRequestType;
|
||||
createdAt: string;
|
||||
reason: string;
|
||||
acknowledged: boolean;
|
||||
}
|
||||
|
||||
export function foregroundControlPath(manifest: TeamRunManifest): string {
|
||||
return path.join(manifest.stateRoot, "foreground-control.json");
|
||||
}
|
||||
|
||||
function readLastRequest(controlPath: string): ForegroundControlRequest | undefined {
|
||||
if (!fs.existsSync(controlPath)) return undefined;
|
||||
try {
|
||||
const parsed = JSON.parse(fs.readFileSync(controlPath, "utf-8")) as { requests?: ForegroundControlRequest[] };
|
||||
return parsed.requests?.at(-1);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export function readForegroundControlStatus(manifest: TeamRunManifest, tasks: TeamTaskState[]): ForegroundControlStatus {
|
||||
const controlPath = foregroundControlPath(manifest);
|
||||
const asyncAlive = manifest.async?.pid !== undefined ? checkProcessLiveness(manifest.async.pid).alive : undefined;
|
||||
return {
|
||||
runId: manifest.runId,
|
||||
status: manifest.status,
|
||||
active: isActiveRunStatus(manifest.status),
|
||||
asyncPid: manifest.async?.pid,
|
||||
asyncAlive,
|
||||
runningTasks: tasks.filter((task) => task.status === "running").map((task) => task.id),
|
||||
runningAgents: readCrewAgents(manifest).filter((agent) => agent.status === "running").map((agent) => agent.id),
|
||||
controlPath,
|
||||
lastRequest: readLastRequest(controlPath),
|
||||
};
|
||||
}
|
||||
|
||||
export function writeForegroundInterruptRequest(manifest: TeamRunManifest, reason = "User requested foreground interrupt."): ForegroundControlRequest {
|
||||
const controlPath = foregroundControlPath(manifest);
|
||||
let requests: ForegroundControlRequest[] = [];
|
||||
if (fs.existsSync(controlPath)) {
|
||||
try {
|
||||
const parsed = JSON.parse(fs.readFileSync(controlPath, "utf-8")) as { requests?: ForegroundControlRequest[] };
|
||||
requests = Array.isArray(parsed.requests) ? parsed.requests : [];
|
||||
} catch {
|
||||
requests = [];
|
||||
}
|
||||
}
|
||||
const request: ForegroundControlRequest = {
|
||||
id: `fg_${Date.now().toString(36)}_${Math.random().toString(16).slice(2, 10)}`,
|
||||
type: "interrupt",
|
||||
createdAt: new Date().toISOString(),
|
||||
reason,
|
||||
acknowledged: false,
|
||||
};
|
||||
fs.mkdirSync(path.dirname(controlPath), { recursive: true });
|
||||
fs.writeFileSync(controlPath, `${JSON.stringify({ requests: [...requests, request] }, null, 2)}\n`, "utf-8");
|
||||
appendEvent(manifest.eventsPath, { type: "foreground.interrupt_requested", runId: manifest.runId, message: reason, data: { requestId: request.id, controlPath } });
|
||||
return request;
|
||||
}
|
||||
46
extensions/pi-crew/src/runtime/green-contract.ts
Normal file
46
extensions/pi-crew/src/runtime/green-contract.ts
Normal file
@@ -0,0 +1,46 @@
|
||||
import type { GreenLevel, VerificationContract, VerificationEvidence } from "../state/types.ts";
|
||||
|
||||
const GREEN_ORDER: Record<GreenLevel, number> = {
|
||||
none: 0,
|
||||
targeted: 1,
|
||||
package: 2,
|
||||
workspace: 3,
|
||||
merge_ready: 4,
|
||||
};
|
||||
|
||||
export interface GreenContractOutcome {
|
||||
requiredGreenLevel: GreenLevel;
|
||||
observedGreenLevel: GreenLevel;
|
||||
satisfied: boolean;
|
||||
}
|
||||
|
||||
export function greenLevelSatisfies(observed: GreenLevel, required: GreenLevel): boolean {
|
||||
return GREEN_ORDER[observed] >= GREEN_ORDER[required];
|
||||
}
|
||||
|
||||
export function evaluateGreenContract(contract: VerificationContract, evidence?: VerificationEvidence): GreenContractOutcome {
|
||||
const observedGreenLevel = evidence?.observedGreenLevel ?? "none";
|
||||
return {
|
||||
requiredGreenLevel: contract.requiredGreenLevel,
|
||||
observedGreenLevel,
|
||||
satisfied: greenLevelSatisfies(observedGreenLevel, contract.requiredGreenLevel),
|
||||
};
|
||||
}
|
||||
|
||||
export function inferGreenLevelFromTask(success: boolean, contract: VerificationContract): GreenLevel {
|
||||
if (!success) return "none";
|
||||
if (contract.requiredGreenLevel === "none") return "none";
|
||||
return contract.allowManualEvidence ? contract.requiredGreenLevel : "targeted";
|
||||
}
|
||||
|
||||
export function createVerificationEvidence(contract: VerificationContract, success: boolean, notes: string): VerificationEvidence {
|
||||
const observedGreenLevel = inferGreenLevelFromTask(success, contract);
|
||||
const outcome = evaluateGreenContract(contract, { requiredGreenLevel: contract.requiredGreenLevel, observedGreenLevel, satisfied: false, commands: [], notes });
|
||||
return {
|
||||
requiredGreenLevel: contract.requiredGreenLevel,
|
||||
observedGreenLevel,
|
||||
satisfied: outcome.satisfied,
|
||||
commands: contract.commands.map((cmd) => ({ cmd, status: "not_run" as const })),
|
||||
notes,
|
||||
};
|
||||
}
|
||||
106
extensions/pi-crew/src/runtime/group-join.ts
Normal file
106
extensions/pi-crew/src/runtime/group-join.ts
Normal file
@@ -0,0 +1,106 @@
|
||||
import type { CrewRuntimeConfig } from "../config/config.ts";
|
||||
import { writeArtifact } from "../state/artifact-store.ts";
|
||||
import { appendEvent } from "../state/event-log.ts";
|
||||
import { appendMailboxMessage, findMailboxMessageByRequestId, readDeliveryState } from "../state/mailbox.ts";
|
||||
import type { ArtifactDescriptor, TeamRunManifest, TeamTaskState } from "../state/types.ts";
|
||||
import { aggregateTaskOutputs } from "./task-output-context.ts";
|
||||
|
||||
export type CrewGroupJoinMode = "off" | "group" | "smart";
|
||||
|
||||
export interface CrewGroupJoinDelivery {
|
||||
batchId: string;
|
||||
mode: CrewGroupJoinMode;
|
||||
partial: boolean;
|
||||
taskIds: string[];
|
||||
completed: string[];
|
||||
failed: string[];
|
||||
skipped: string[];
|
||||
remaining: string[];
|
||||
artifact?: ArtifactDescriptor;
|
||||
messageId?: string;
|
||||
requestId?: string;
|
||||
ackRequired?: boolean;
|
||||
ackStatus?: "pending" | "acknowledged";
|
||||
}
|
||||
|
||||
export function resolveGroupJoinMode(runtime?: CrewRuntimeConfig): CrewGroupJoinMode {
|
||||
return runtime?.groupJoin ?? "smart";
|
||||
}
|
||||
|
||||
export function shouldGroupJoin(mode: CrewGroupJoinMode, batch: TeamTaskState[]): boolean {
|
||||
if (mode === "off") return false;
|
||||
if (mode === "group") return batch.length > 0;
|
||||
return batch.length > 1;
|
||||
}
|
||||
|
||||
function batchIdFor(runId: string, taskIds: string[]): string {
|
||||
return `${runId}_${taskIds.join("+").replace(/[^a-zA-Z0-9_+-]/g, "_")}`;
|
||||
}
|
||||
|
||||
function requestIdFor(runId: string, batchId: string, partial: boolean): string {
|
||||
return `${runId}:group-join:${partial ? "partial" : "completed"}:${batchId}`;
|
||||
}
|
||||
|
||||
function statusList(tasks: TeamTaskState[], status: TeamTaskState["status"]): string[] {
|
||||
return tasks.filter((task) => task.status === status).map((task) => task.id);
|
||||
}
|
||||
|
||||
export function deliverGroupJoin(input: {
|
||||
manifest: TeamRunManifest;
|
||||
mode: CrewGroupJoinMode;
|
||||
batch: TeamTaskState[];
|
||||
allTasks: TeamTaskState[];
|
||||
partial?: boolean;
|
||||
}): CrewGroupJoinDelivery | undefined {
|
||||
if (!shouldGroupJoin(input.mode, input.batch)) return undefined;
|
||||
const taskIds = input.batch.map((task) => task.id);
|
||||
const latest = taskIds.map((id) => input.allTasks.find((task) => task.id === id)).filter((task): task is TeamTaskState => Boolean(task));
|
||||
const completed = statusList(latest, "completed");
|
||||
const failed = statusList(latest, "failed");
|
||||
const skipped = statusList(latest, "skipped");
|
||||
const remaining = latest.filter((task) => task.status === "queued" || task.status === "running").map((task) => task.id);
|
||||
const partial = input.partial ?? remaining.length > 0;
|
||||
const batchId = batchIdFor(input.manifest.runId, taskIds);
|
||||
const summary = aggregateTaskOutputs(latest, input.manifest);
|
||||
const requestId = requestIdFor(input.manifest.runId, batchId, partial);
|
||||
const existingMailbox = findMailboxMessageByRequestId(input.manifest, requestId);
|
||||
const existingStatus = existingMailbox ? readDeliveryState(input.manifest).messages[existingMailbox.id] ?? existingMailbox.status : undefined;
|
||||
const delivery: CrewGroupJoinDelivery = { batchId, mode: input.mode, partial, taskIds, completed, failed, skipped, remaining, requestId, ackRequired: true, ackStatus: existingStatus === "acknowledged" ? "acknowledged" : "pending" };
|
||||
const content = `${JSON.stringify({ ...delivery, createdAt: new Date().toISOString() }, null, 2)}\n`;
|
||||
const artifact = writeArtifact(input.manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: `metadata/group-joins/${batchId}.json`,
|
||||
producer: "group-join",
|
||||
content,
|
||||
});
|
||||
const mailbox = existingMailbox ?? appendMailboxMessage(input.manifest, {
|
||||
direction: "outbox",
|
||||
from: "group-join",
|
||||
to: "leader",
|
||||
body: [
|
||||
`Group join ${partial ? "partial" : "completed"}: ${taskIds.join(", ")}`,
|
||||
`Request: ${requestId}`,
|
||||
`Completed: ${completed.join(", ") || "none"}`,
|
||||
`Failed: ${failed.join(", ") || "none"}`,
|
||||
`Skipped: ${skipped.join(", ") || "none"}`,
|
||||
`Remaining: ${remaining.join(", ") || "none"}`,
|
||||
"",
|
||||
summary,
|
||||
].join("\n"),
|
||||
status: "delivered",
|
||||
data: { kind: "group_join", requestId, batchId, partial, ackRequired: true, taskIds, completed, failed, skipped, remaining },
|
||||
});
|
||||
appendEvent(input.manifest.eventsPath, {
|
||||
type: partial ? "agent.group_join.partial" : "agent.group_join.completed",
|
||||
runId: input.manifest.runId,
|
||||
message: `Group join ${partial ? "partial" : "completed"} for ${taskIds.length} task(s).`,
|
||||
data: { ...delivery, artifactPath: artifact.path, messageId: mailbox.id, fallback: "mailbox-delivered", reused: Boolean(existingMailbox) },
|
||||
});
|
||||
if (existingMailbox) appendEvent(input.manifest.eventsPath, {
|
||||
type: "agent.group_join.delivery_reused",
|
||||
runId: input.manifest.runId,
|
||||
message: `Reused group join mailbox delivery for ${taskIds.length} task(s).`,
|
||||
data: { requestId, messageId: mailbox.id, batchId, partial },
|
||||
});
|
||||
return { ...delivery, artifact, messageId: mailbox.id };
|
||||
}
|
||||
28
extensions/pi-crew/src/runtime/heartbeat-gradient.ts
Normal file
28
extensions/pi-crew/src/runtime/heartbeat-gradient.ts
Normal file
@@ -0,0 +1,28 @@
|
||||
import type { WorkerHeartbeatState } from "./worker-heartbeat.ts";
|
||||
|
||||
export type HeartbeatLevel = "healthy" | "warn" | "stale" | "dead";
|
||||
|
||||
export interface GradientThresholds {
|
||||
warnMs: number;
|
||||
staleMs: number;
|
||||
deadMs: number;
|
||||
}
|
||||
|
||||
export const DEFAULT_GRADIENT_THRESHOLDS: GradientThresholds = { warnMs: 30_000, staleMs: 60_000, deadMs: 300_000 };
|
||||
|
||||
export function heartbeatAgeMs(heartbeat: WorkerHeartbeatState | undefined, now = Date.now()): number {
|
||||
if (!heartbeat) return Number.POSITIVE_INFINITY;
|
||||
const lastSeen = Date.parse(heartbeat.lastSeenAt);
|
||||
return Number.isFinite(lastSeen) ? Math.max(0, now - lastSeen) : Number.POSITIVE_INFINITY;
|
||||
}
|
||||
|
||||
export function classifyHeartbeat(heartbeat: WorkerHeartbeatState | undefined, thresholds: GradientThresholds = DEFAULT_GRADIENT_THRESHOLDS, now = Date.now()): HeartbeatLevel {
|
||||
if (!heartbeat) return "dead";
|
||||
if (heartbeat.alive === false) return "dead";
|
||||
const elapsed = heartbeatAgeMs(heartbeat, now);
|
||||
if (!Number.isFinite(elapsed)) return "dead";
|
||||
if (elapsed > thresholds.deadMs) return "dead";
|
||||
if (elapsed > thresholds.staleMs) return "stale";
|
||||
if (elapsed > thresholds.warnMs) return "warn";
|
||||
return "healthy";
|
||||
}
|
||||
124
extensions/pi-crew/src/runtime/heartbeat-watcher.ts
Normal file
124
extensions/pi-crew/src/runtime/heartbeat-watcher.ts
Normal file
@@ -0,0 +1,124 @@
|
||||
import type { NotificationDescriptor } from "../extension/notification-router.ts";
|
||||
import type { MetricRegistry } from "../observability/metric-registry.ts";
|
||||
import { appendEvent } from "../state/event-log.ts";
|
||||
import { loadRunManifestById } from "../state/state-store.ts";
|
||||
import type { TeamRunManifest } from "../state/types.ts";
|
||||
import { logInternalError } from "../utils/internal-error.ts";
|
||||
import type { ManifestCache } from "./manifest-cache.ts";
|
||||
import { classifyHeartbeat, DEFAULT_GRADIENT_THRESHOLDS, heartbeatAgeMs, type GradientThresholds, type HeartbeatLevel } from "./heartbeat-gradient.ts";
|
||||
|
||||
export interface HeartbeatWatcherRouter {
|
||||
enqueue(notification: NotificationDescriptor): boolean;
|
||||
}
|
||||
|
||||
export interface HeartbeatWatcherOptions {
|
||||
cwd: string;
|
||||
pollIntervalMs?: number;
|
||||
thresholds?: GradientThresholds;
|
||||
manifestCache: ManifestCache;
|
||||
registry: MetricRegistry;
|
||||
router: HeartbeatWatcherRouter;
|
||||
deadletterTickThreshold?: number;
|
||||
onDead?: (runId: string, taskId: string, elapsed: number) => void;
|
||||
onDeadletterTrigger?: (manifest: TeamRunManifest, taskId: string) => void;
|
||||
}
|
||||
|
||||
/**
|
||||
* Polls running runs for heartbeat staleness.
|
||||
*
|
||||
* Uses recursive setTimeout to avoid timer storms.
|
||||
* Cleanup is done in the same pass — no second scan over manifests.
|
||||
* Keys for runs that disappear from the cache are cleaned via staleness-age policy
|
||||
* rather than being leaked forever.
|
||||
*/
|
||||
export class HeartbeatWatcher {
|
||||
private timer?: ReturnType<typeof setTimeout>;
|
||||
private lastLevel = new Map<string, HeartbeatLevel>();
|
||||
private consecutiveDead = new Map<string, number>();
|
||||
private lastSeen = new Map<string, number>(); // key → last time it was active
|
||||
/** Max age (ms) to retain a stale key before garbage-collecting it. */
|
||||
private readonly maxKeyAgeMs = 600_000; // 10 minutes
|
||||
private readonly opts: HeartbeatWatcherOptions;
|
||||
|
||||
constructor(opts: HeartbeatWatcherOptions) {
|
||||
this.opts = opts;
|
||||
}
|
||||
|
||||
start(): void {
|
||||
this.dispose();
|
||||
this.scheduleTick();
|
||||
}
|
||||
|
||||
private scheduleTick(): void {
|
||||
this.timer = setTimeout(() => this.tick(), this.opts.pollIntervalMs ?? 5000);
|
||||
this.timer.unref();
|
||||
}
|
||||
|
||||
tick(now = Date.now()): void {
|
||||
try {
|
||||
this.tickUnsafe(now);
|
||||
} catch (error) {
|
||||
logInternalError("heartbeat-watcher.tick", error);
|
||||
} finally {
|
||||
this.scheduleTick();
|
||||
}
|
||||
}
|
||||
|
||||
private tickUnsafe(now: number): void {
|
||||
const thresholds = this.opts.thresholds ?? DEFAULT_GRADIENT_THRESHOLDS;
|
||||
const tickThreshold = this.opts.deadletterTickThreshold ?? 3;
|
||||
const activeKeys = new Set<string>();
|
||||
|
||||
for (const run of this.opts.manifestCache.list(50)) {
|
||||
if (run.status !== "running") continue;
|
||||
const loaded = loadRunManifestById(this.opts.cwd, run.runId);
|
||||
if (!loaded) continue;
|
||||
for (const task of loaded.tasks) {
|
||||
if (task.status !== "running") continue;
|
||||
const key = `${run.runId}:${task.id}`;
|
||||
activeKeys.add(key);
|
||||
this.lastSeen.set(key, now);
|
||||
|
||||
const elapsed = heartbeatAgeMs(task.heartbeat, now);
|
||||
const level = classifyHeartbeat(task.heartbeat, thresholds, now);
|
||||
this.opts.registry.gauge("crew.heartbeat.staleness_ms", "Heartbeat elapsed since last seen, milliseconds").set({ runId: run.runId, taskId: task.id }, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
|
||||
this.opts.registry.counter("crew.heartbeat.level_total", "Heartbeat classifications by level").inc({ runId: run.runId, level });
|
||||
const previous = this.lastLevel.get(key);
|
||||
this.lastLevel.set(key, level);
|
||||
if (level === "dead" && previous !== "dead") {
|
||||
this.opts.registry.counter("crew.heartbeat.dead_total", "Dead heartbeat detections").inc({ runId: run.runId });
|
||||
appendEvent(loaded.manifest.eventsPath, { type: "crew.task.heartbeat_dead", runId: run.runId, taskId: task.id, message: `Task ${task.id} heartbeat dead.`, data: { elapsedMs: Number.isFinite(elapsed) ? elapsed : undefined } });
|
||||
this.opts.router.enqueue({ id: `dead_${run.runId}_${task.id}`, severity: "warning", source: "heartbeat-watcher", runId: run.runId, title: `Task ${task.id} heartbeat dead`, body: "Background watcher detected a stuck worker." });
|
||||
this.opts.onDead?.(run.runId, task.id, Number.isFinite(elapsed) ? elapsed : thresholds.deadMs);
|
||||
}
|
||||
if (level === "dead") {
|
||||
const count = (this.consecutiveDead.get(key) ?? 0) + 1;
|
||||
this.consecutiveDead.set(key, count);
|
||||
if (count === tickThreshold) this.opts.onDeadletterTrigger?.(loaded.manifest, task.id);
|
||||
} else {
|
||||
this.consecutiveDead.delete(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Cleanup: drop keys that were NOT in this tick's active set AND
|
||||
// haven't been seen for > maxKeyAgeMs. This covers runs that
|
||||
// completed or fell out of the manifest cache's top-50 window.
|
||||
const cutoff = now - this.maxKeyAgeMs;
|
||||
for (const [key, ts] of this.lastSeen) {
|
||||
if (!activeKeys.has(key) && ts < cutoff) {
|
||||
this.lastLevel.delete(key);
|
||||
this.consecutiveDead.delete(key);
|
||||
this.lastSeen.delete(key);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
dispose(): void {
|
||||
if (this.timer) clearTimeout(this.timer);
|
||||
this.timer = undefined;
|
||||
this.lastLevel.clear();
|
||||
this.consecutiveDead.clear();
|
||||
this.lastSeen.clear();
|
||||
}
|
||||
}
|
||||
88
extensions/pi-crew/src/runtime/live-agent-control.ts
Normal file
88
extensions/pi-crew/src/runtime/live-agent-control.ts
Normal file
@@ -0,0 +1,88 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import type { TeamRunManifest } from "../state/types.ts";
|
||||
import { agentStateFile, ensureAgentStateDir } from "./crew-agent-records.ts";
|
||||
|
||||
export type LiveAgentControlOperation = "steer" | "follow-up" | "stop" | "resume";
|
||||
|
||||
export interface LiveAgentControlRequest {
|
||||
id: string;
|
||||
runId: string;
|
||||
taskId: string;
|
||||
agentId?: string;
|
||||
operation: LiveAgentControlOperation;
|
||||
message?: string;
|
||||
createdAt: string;
|
||||
processedAt?: string;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface LiveAgentControlCursor {
|
||||
offset: number;
|
||||
}
|
||||
|
||||
export function liveAgentControlPath(manifest: TeamRunManifest, taskId: string): string {
|
||||
return path.join(ensureAgentStateDir(manifest, taskId), "live-control.jsonl");
|
||||
}
|
||||
|
||||
function liveAgentControlFile(manifest: TeamRunManifest, taskId: string): string {
|
||||
return agentStateFile(manifest, taskId, "live-control.jsonl");
|
||||
}
|
||||
|
||||
function requestId(): string {
|
||||
return `ctrl_${Date.now().toString(36)}_${Math.random().toString(16).slice(2, 10)}`;
|
||||
}
|
||||
|
||||
export function appendLiveAgentControlRequest(manifest: TeamRunManifest, input: { taskId: string; agentId?: string; operation: LiveAgentControlOperation; message?: string }): LiveAgentControlRequest {
|
||||
const request: LiveAgentControlRequest = {
|
||||
id: requestId(),
|
||||
runId: manifest.runId,
|
||||
taskId: input.taskId,
|
||||
agentId: input.agentId,
|
||||
operation: input.operation,
|
||||
message: input.message,
|
||||
createdAt: new Date().toISOString(),
|
||||
};
|
||||
const filePath = liveAgentControlFile(manifest, input.taskId);
|
||||
fs.appendFileSync(filePath, `${JSON.stringify(request)}\n`, "utf-8");
|
||||
return request;
|
||||
}
|
||||
|
||||
export function readLiveAgentControlRequests(manifest: TeamRunManifest, taskId: string, cursor: LiveAgentControlCursor = { offset: 0 }): { requests: LiveAgentControlRequest[]; cursor: LiveAgentControlCursor } {
|
||||
let filePath: string;
|
||||
try {
|
||||
filePath = liveAgentControlFile(manifest, taskId);
|
||||
} catch {
|
||||
return { requests: [], cursor };
|
||||
}
|
||||
if (!fs.existsSync(filePath)) return { requests: [], cursor };
|
||||
const text = fs.readFileSync(filePath, "utf-8");
|
||||
const lines = text.split(/\r?\n/).filter(Boolean);
|
||||
const requests = lines.slice(cursor.offset).flatMap((line) => {
|
||||
try {
|
||||
const parsed = JSON.parse(line) as LiveAgentControlRequest;
|
||||
return parsed && parsed.runId === manifest.runId && parsed.taskId === taskId ? [parsed] : [];
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
});
|
||||
return { requests, cursor: { offset: lines.length } };
|
||||
}
|
||||
|
||||
export async function applyLiveAgentControlRequest(input: { request: LiveAgentControlRequest; taskId: string; agentId: string; session: { steer?: (text: string) => Promise<void>; prompt?: (text: string, options?: Record<string, unknown>) => Promise<void>; abort?: () => Promise<void> | void }; seenRequestIds?: Set<string> }): Promise<boolean> {
|
||||
const { request, taskId, agentId, session, seenRequestIds } = input;
|
||||
if (seenRequestIds?.has(request.id)) return false;
|
||||
if (request.agentId && request.agentId !== agentId && request.agentId !== taskId) return false;
|
||||
seenRequestIds?.add(request.id);
|
||||
if (request.operation === "steer") await session.steer?.(request.message ?? "Please report current status and wrap up if possible.");
|
||||
else if (request.operation === "follow-up") await session.prompt?.(request.message ?? "Please continue with the follow-up request.", { source: "api", expandPromptTemplates: false });
|
||||
else if (request.operation === "resume") await session.prompt?.(request.message ?? "Please resume and report final status.", { source: "api", expandPromptTemplates: false });
|
||||
else if (request.operation === "stop") await session.abort?.();
|
||||
return true;
|
||||
}
|
||||
|
||||
export async function applyLiveAgentControlRequests(input: { manifest: TeamRunManifest; taskId: string; agentId: string; session: { steer?: (text: string) => Promise<void>; prompt?: (text: string, options?: Record<string, unknown>) => Promise<void>; abort?: () => Promise<void> | void }; cursor: LiveAgentControlCursor; seenRequestIds?: Set<string> }): Promise<LiveAgentControlCursor> {
|
||||
const batch = readLiveAgentControlRequests(input.manifest, input.taskId, input.cursor);
|
||||
for (const request of batch.requests) await applyLiveAgentControlRequest({ request, taskId: input.taskId, agentId: input.agentId, session: input.session, seenRequestIds: input.seenRequestIds });
|
||||
return batch.cursor;
|
||||
}
|
||||
103
extensions/pi-crew/src/runtime/live-agent-manager.ts
Normal file
103
extensions/pi-crew/src/runtime/live-agent-manager.ts
Normal file
@@ -0,0 +1,103 @@
|
||||
import type { CrewAgentRecord } from "./crew-agent-runtime.ts";
|
||||
|
||||
type LiveSessionHandle = {
|
||||
steer?: (text: string) => Promise<void>;
|
||||
prompt?: (text: string, options?: Record<string, unknown>) => Promise<void>;
|
||||
abort?: () => Promise<void> | void;
|
||||
};
|
||||
|
||||
export interface LiveAgentHandle {
|
||||
agentId: string;
|
||||
taskId: string;
|
||||
runId: string;
|
||||
session: LiveSessionHandle;
|
||||
createdAt: string;
|
||||
updatedAt: string;
|
||||
status: CrewAgentRecord["status"];
|
||||
pendingSteers: string[];
|
||||
pendingFollowUps: string[];
|
||||
}
|
||||
|
||||
const liveAgents = new Map<string, LiveAgentHandle>();
|
||||
|
||||
export function registerLiveAgent(input: Omit<LiveAgentHandle, "createdAt" | "updatedAt" | "pendingSteers" | "pendingFollowUps">): LiveAgentHandle {
|
||||
const now = new Date().toISOString();
|
||||
const existing = liveAgents.get(input.agentId);
|
||||
const handle: LiveAgentHandle = { ...input, createdAt: existing?.createdAt ?? now, updatedAt: now, pendingSteers: existing?.pendingSteers ?? [], pendingFollowUps: existing?.pendingFollowUps ?? [] };
|
||||
liveAgents.set(input.agentId, handle);
|
||||
if (handle.pendingSteers.length && typeof handle.session.steer === "function") {
|
||||
const pending = [...handle.pendingSteers];
|
||||
handle.pendingSteers.length = 0;
|
||||
for (const message of pending) void handle.session.steer(message).catch(() => {});
|
||||
}
|
||||
if (handle.pendingFollowUps.length && typeof handle.session.prompt === "function") {
|
||||
const pending = [...handle.pendingFollowUps];
|
||||
handle.pendingFollowUps.length = 0;
|
||||
for (const message of pending) void handle.session.prompt(message, { source: "api", expandPromptTemplates: false }).catch(() => {});
|
||||
}
|
||||
return handle;
|
||||
}
|
||||
|
||||
export function updateLiveAgentStatus(agentId: string, status: CrewAgentRecord["status"]): void {
|
||||
const handle = liveAgents.get(agentId);
|
||||
if (!handle) return;
|
||||
handle.status = status;
|
||||
handle.updatedAt = new Date().toISOString();
|
||||
}
|
||||
|
||||
export function getLiveAgent(agentIdOrTaskId: string): LiveAgentHandle | undefined {
|
||||
return liveAgents.get(agentIdOrTaskId) ?? [...liveAgents.values()].find((entry) => entry.taskId === agentIdOrTaskId);
|
||||
}
|
||||
|
||||
export function listLiveAgents(): LiveAgentHandle[] {
|
||||
return [...liveAgents.values()].sort((a, b) => b.updatedAt.localeCompare(a.updatedAt));
|
||||
}
|
||||
|
||||
export async function steerLiveAgent(agentIdOrTaskId: string, message: string): Promise<LiveAgentHandle> {
|
||||
const handle = getLiveAgent(agentIdOrTaskId);
|
||||
if (!handle) throw new Error(`Live agent '${agentIdOrTaskId}' is not registered in this process.`);
|
||||
if (typeof handle.session.steer !== "function") {
|
||||
handle.pendingSteers.push(message);
|
||||
return handle;
|
||||
}
|
||||
await handle.session.steer(message);
|
||||
handle.updatedAt = new Date().toISOString();
|
||||
return handle;
|
||||
}
|
||||
|
||||
export async function followUpLiveAgent(agentIdOrTaskId: string, prompt: string): Promise<LiveAgentHandle> {
|
||||
const handle = getLiveAgent(agentIdOrTaskId);
|
||||
if (!handle) throw new Error(`Live agent '${agentIdOrTaskId}' is not registered in this process.`);
|
||||
if (typeof handle.session.prompt !== "function") {
|
||||
handle.pendingFollowUps.push(prompt);
|
||||
return handle;
|
||||
}
|
||||
await handle.session.prompt(prompt, { source: "api", expandPromptTemplates: false });
|
||||
handle.updatedAt = new Date().toISOString();
|
||||
return handle;
|
||||
}
|
||||
|
||||
export async function stopLiveAgent(agentIdOrTaskId: string): Promise<LiveAgentHandle> {
|
||||
const handle = getLiveAgent(agentIdOrTaskId);
|
||||
if (!handle) throw new Error(`Live agent '${agentIdOrTaskId}' is not registered in this process.`);
|
||||
if (typeof handle.session.abort !== "function") throw new Error(`Live agent '${agentIdOrTaskId}' does not expose abort().`);
|
||||
await handle.session.abort();
|
||||
handle.status = "stopped";
|
||||
handle.updatedAt = new Date().toISOString();
|
||||
return handle;
|
||||
}
|
||||
|
||||
export async function resumeLiveAgent(agentIdOrTaskId: string, prompt: string): Promise<LiveAgentHandle> {
|
||||
const handle = getLiveAgent(agentIdOrTaskId);
|
||||
if (!handle) throw new Error(`Live agent '${agentIdOrTaskId}' is not registered in this process.`);
|
||||
if (typeof handle.session.prompt !== "function") throw new Error(`Live agent '${agentIdOrTaskId}' does not expose prompt().`);
|
||||
handle.status = "running";
|
||||
await handle.session.prompt(prompt, { source: "api", expandPromptTemplates: false });
|
||||
handle.status = "completed";
|
||||
handle.updatedAt = new Date().toISOString();
|
||||
return handle;
|
||||
}
|
||||
|
||||
export function clearLiveAgentsForTest(): void {
|
||||
liveAgents.clear();
|
||||
}
|
||||
36
extensions/pi-crew/src/runtime/live-control-realtime.ts
Normal file
36
extensions/pi-crew/src/runtime/live-control-realtime.ts
Normal file
@@ -0,0 +1,36 @@
|
||||
import type { LiveAgentControlRequest } from "./live-agent-control.ts";
|
||||
|
||||
export interface LiveControlRealtimeMessage {
|
||||
type: "live-control";
|
||||
version: 1;
|
||||
request: LiveAgentControlRequest;
|
||||
}
|
||||
|
||||
type Listener = (request: LiveAgentControlRequest) => void | Promise<void>;
|
||||
|
||||
const listeners = new Set<Listener>();
|
||||
|
||||
export function publishLiveControlRealtime(request: LiveAgentControlRequest): void {
|
||||
for (const listener of [...listeners]) void listener(request);
|
||||
}
|
||||
|
||||
export function subscribeLiveControlRealtime(listener: Listener): () => void {
|
||||
listeners.add(listener);
|
||||
return () => listeners.delete(listener);
|
||||
}
|
||||
|
||||
export function liveControlRealtimeMessage(request: LiveAgentControlRequest): LiveControlRealtimeMessage {
|
||||
return { type: "live-control", version: 1, request };
|
||||
}
|
||||
|
||||
export function parseLiveControlRealtimeMessage(raw: unknown): LiveAgentControlRequest | undefined {
|
||||
if (!raw || typeof raw !== "object" || Array.isArray(raw)) return undefined;
|
||||
const message = raw as { type?: unknown; version?: unknown; request?: unknown };
|
||||
if (message.type !== "live-control" || message.version !== 1 || !message.request || typeof message.request !== "object" || Array.isArray(message.request)) return undefined;
|
||||
const request = message.request as Partial<LiveAgentControlRequest>;
|
||||
return typeof request.id === "string" && typeof request.runId === "string" && typeof request.taskId === "string" && (request.operation === "steer" || request.operation === "follow-up" || request.operation === "stop" || request.operation === "resume") && typeof request.createdAt === "string" ? request as LiveAgentControlRequest : undefined;
|
||||
}
|
||||
|
||||
export function clearLiveControlRealtimeForTest(): void {
|
||||
listeners.clear();
|
||||
}
|
||||
309
extensions/pi-crew/src/runtime/live-session-runtime.ts
Normal file
309
extensions/pi-crew/src/runtime/live-session-runtime.ts
Normal file
@@ -0,0 +1,309 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import type { AgentConfig } from "../agents/agent-config.ts";
|
||||
import type { CrewRuntimeConfig } from "../config/config.ts";
|
||||
import type { TeamRunManifest, TeamTaskState, UsageState } from "../state/types.ts";
|
||||
import { buildMemoryBlock } from "./agent-memory.ts";
|
||||
import { registerLiveAgent, updateLiveAgentStatus } from "./live-agent-manager.ts";
|
||||
import { applyLiveAgentControlRequest, applyLiveAgentControlRequests, type LiveAgentControlCursor } from "./live-agent-control.ts";
|
||||
import { subscribeLiveControlRealtime } from "./live-control-realtime.ts";
|
||||
import { eventToSidechainType, sidechainOutputPath, writeSidechainEntry } from "./sidechain-output.ts";
|
||||
import type { WorkflowStep } from "../workflows/workflow-config.ts";
|
||||
import { isLiveSessionRuntimeAvailable } from "./runtime-resolver.ts";
|
||||
import { redactSecrets } from "../utils/redaction.ts";
|
||||
import { buildConfiguredModelRouting } from "./model-fallback.ts";
|
||||
|
||||
export interface LiveSessionSpawnInput {
|
||||
manifest: TeamRunManifest;
|
||||
task: TeamTaskState;
|
||||
step: WorkflowStep;
|
||||
agent: AgentConfig;
|
||||
prompt: string;
|
||||
signal?: AbortSignal;
|
||||
transcriptPath?: string;
|
||||
onEvent?: (event: unknown) => void;
|
||||
onOutput?: (text: string) => void;
|
||||
runtimeConfig?: CrewRuntimeConfig;
|
||||
parentContext?: string;
|
||||
parentModel?: unknown;
|
||||
modelRegistry?: unknown;
|
||||
modelOverride?: string;
|
||||
teamRoleModel?: string;
|
||||
isCurrent?: () => boolean;
|
||||
}
|
||||
|
||||
export interface LiveSessionRunResult {
|
||||
available: true;
|
||||
exitCode: number | null;
|
||||
stdout: string;
|
||||
stderr: string;
|
||||
jsonEvents: number;
|
||||
usage?: UsageState;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface LiveSessionUnavailableResult {
|
||||
available: false;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export interface LiveSessionPlannedResult {
|
||||
available: true;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
type LiveSessionModule = Record<string, unknown> & {
|
||||
createAgentSession?: (options?: Record<string, unknown>) => Promise<{ session: LiveSessionLike; modelFallbackMessage?: string }>;
|
||||
DefaultResourceLoader?: new (options: Record<string, unknown>) => { reload?: () => Promise<void> };
|
||||
SessionManager?: { inMemory?: (cwd?: string) => unknown; create?: (cwd?: string, sessionDir?: string) => unknown };
|
||||
SettingsManager?: { create?: (cwd?: string, agentDir?: string) => unknown };
|
||||
getAgentDir?: () => string;
|
||||
};
|
||||
|
||||
type LiveSessionLike = {
|
||||
subscribe?: (listener: (event: unknown) => void) => (() => void);
|
||||
prompt?: (text: string, options?: Record<string, unknown>) => Promise<void>;
|
||||
steer?: (text: string) => Promise<void>;
|
||||
abort?: () => Promise<void> | void;
|
||||
getStats?: () => unknown;
|
||||
stats?: unknown;
|
||||
bindExtensions?: (bindings?: Record<string, unknown>) => Promise<void>;
|
||||
getActiveToolNames?: () => string[];
|
||||
setActiveToolsByName?: (names: string[]) => void;
|
||||
};
|
||||
|
||||
function appendTranscript(filePath: string | undefined, event: unknown): void {
|
||||
if (!filePath) return;
|
||||
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
||||
fs.appendFileSync(filePath, `${JSON.stringify(redactSecrets(event))}\n`, "utf-8");
|
||||
}
|
||||
|
||||
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
||||
return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
|
||||
}
|
||||
|
||||
function textFromContent(content: unknown): string[] {
|
||||
if (typeof content === "string") return [content];
|
||||
if (!Array.isArray(content)) return [];
|
||||
return content.flatMap((part) => {
|
||||
const obj = asRecord(part);
|
||||
if (!obj) return [];
|
||||
if (obj.type === "text" && typeof obj.text === "string") return [obj.text];
|
||||
if (typeof obj.content === "string") return [obj.content];
|
||||
return [];
|
||||
});
|
||||
}
|
||||
|
||||
function eventText(event: unknown): string[] {
|
||||
const obj = asRecord(event);
|
||||
if (!obj) return [];
|
||||
const text: string[] = [];
|
||||
if (typeof obj.text === "string") text.push(obj.text);
|
||||
text.push(...textFromContent(obj.content));
|
||||
const message = asRecord(obj.message);
|
||||
if (message) text.push(...textFromContent(message.content));
|
||||
return text.filter((entry) => entry.trim());
|
||||
}
|
||||
|
||||
function finalAssistantText(event: unknown): string[] {
|
||||
const obj = asRecord(event);
|
||||
if (!obj || obj.type !== "message_end") return [];
|
||||
const message = asRecord(obj.message);
|
||||
if (message?.role !== "assistant") return [];
|
||||
return textFromContent(message.content);
|
||||
}
|
||||
|
||||
function numberField(obj: Record<string, unknown> | undefined, keys: string[]): number | undefined {
|
||||
if (!obj) return undefined;
|
||||
for (const key of keys) {
|
||||
const value = obj[key];
|
||||
if (typeof value === "number" && Number.isFinite(value)) return value;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function modelFromRegistry(modelRegistry: unknown, modelId: string | undefined): unknown {
|
||||
if (!modelId || !modelId.includes("/")) return undefined;
|
||||
const registry = asRecord(modelRegistry);
|
||||
const find = registry?.find;
|
||||
if (typeof find !== "function") return undefined;
|
||||
const [provider, ...modelParts] = modelId.split("/");
|
||||
const id = modelParts.join("/");
|
||||
try {
|
||||
return find.call(modelRegistry, provider, id);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function liveSystemPrompt(input: LiveSessionSpawnInput): string {
|
||||
const memory = input.agent.memory ? buildMemoryBlock(input.agent.name, input.agent.memory, input.task.cwd, Boolean(input.agent.tools?.some((tool) => tool === "write" || tool === "edit"))) : "";
|
||||
return [
|
||||
"# pi-crew Live Subagent",
|
||||
`Run ID: ${input.manifest.runId}`,
|
||||
`Task ID: ${input.task.id}`,
|
||||
`Role: ${input.task.role}`,
|
||||
`Agent: ${input.agent.name}`,
|
||||
`Working directory: ${input.task.cwd}`,
|
||||
"",
|
||||
input.agent.systemPrompt || "Follow the user task exactly and report verification evidence.",
|
||||
memory ? `\n${memory}` : "",
|
||||
].filter(Boolean).join("\n");
|
||||
}
|
||||
|
||||
function filterActiveTools(session: LiveSessionLike, agent: AgentConfig): void {
|
||||
if (typeof session.getActiveToolNames !== "function" || typeof session.setActiveToolsByName !== "function") return;
|
||||
const recursiveTools = new Set(["team", "Team", "Agent", "get_subagent_result", "steer_subagent"]);
|
||||
const allowed = agent.tools?.length ? new Set(agent.tools) : undefined;
|
||||
const active = session.getActiveToolNames().filter((name) => !recursiveTools.has(name) && (!allowed || allowed.has(name)));
|
||||
session.setActiveToolsByName(active);
|
||||
}
|
||||
|
||||
function usageFromStats(stats: unknown): UsageState | undefined {
|
||||
const obj = asRecord(stats);
|
||||
if (!obj) return undefined;
|
||||
const input = numberField(obj, ["input", "inputTokens", "input_tokens"]);
|
||||
const output = numberField(obj, ["output", "outputTokens", "output_tokens"]);
|
||||
const cacheRead = numberField(obj, ["cacheRead", "cache_read"]);
|
||||
const cacheWrite = numberField(obj, ["cacheWrite", "cache_write"]);
|
||||
const cost = numberField(obj, ["cost"]);
|
||||
const turns = numberField(obj, ["turns", "turnCount", "turn_count"]);
|
||||
return [input, output, cacheRead, cacheWrite, cost, turns].some((value) => value !== undefined) ? { input, output, cacheRead, cacheWrite, cost, turns } : undefined;
|
||||
}
|
||||
|
||||
export async function probeLiveSessionRuntime(): Promise<LiveSessionUnavailableResult | LiveSessionPlannedResult> {
|
||||
const availability = await isLiveSessionRuntimeAvailable();
|
||||
if (!availability.available) return { available: false, reason: availability.reason ?? "Live-session runtime is unavailable." };
|
||||
return { available: true, reason: "Live-session SDK exports are available and pi-crew can run experimental in-process live agents when runtime.mode=live-session." };
|
||||
}
|
||||
|
||||
export async function runLiveSessionTask(input: LiveSessionSpawnInput): Promise<LiveSessionRunResult> {
|
||||
const isCurrent = input.isCurrent ?? (() => true);
|
||||
if (process.env.PI_CREW_MOCK_LIVE_SESSION === "success") {
|
||||
const agentId = `${input.manifest.runId}:${input.task.id}`;
|
||||
const inherited = input.runtimeConfig?.inheritContext === true && input.parentContext ? ` with inherited context: ${input.parentContext}` : "";
|
||||
const event = { type: "message_end", message: { role: "assistant", content: [{ type: "text", text: `Mock live-session success for ${input.agent.name}${inherited}` }] } };
|
||||
const mockSession = { steer: async () => {}, prompt: async () => {}, abort: async () => {} };
|
||||
registerLiveAgent({ agentId, runId: input.manifest.runId, taskId: input.task.id, session: mockSession, status: "running" });
|
||||
appendTranscript(input.transcriptPath, event);
|
||||
const sidechainPath = sidechainOutputPath(input.manifest.stateRoot, input.task.id);
|
||||
writeSidechainEntry(sidechainPath, { agentId, type: "user", message: { role: "user", content: input.prompt }, cwd: input.task.cwd });
|
||||
writeSidechainEntry(sidechainPath, { agentId, type: "message", message: event, cwd: input.task.cwd });
|
||||
if (isCurrent()) input.onEvent?.(event);
|
||||
const stdout = `Mock live-session success for ${input.agent.name}${inherited}`;
|
||||
if (isCurrent()) input.onOutput?.(stdout);
|
||||
updateLiveAgentStatus(agentId, "completed");
|
||||
return { available: true, exitCode: 0, stdout, stderr: "", jsonEvents: 1 };
|
||||
}
|
||||
const availability = await isLiveSessionRuntimeAvailable();
|
||||
if (!availability.available) return { available: true, exitCode: 1, stdout: "", stderr: availability.reason ?? "Live-session runtime unavailable.", jsonEvents: 0, error: availability.reason };
|
||||
const mod = await import("@mariozechner/pi-coding-agent") as LiveSessionModule;
|
||||
if (typeof mod.createAgentSession !== "function") return { available: true, exitCode: 1, stdout: "", stderr: "createAgentSession export is unavailable.", jsonEvents: 0, error: "createAgentSession export is unavailable." };
|
||||
let session: LiveSessionLike | undefined;
|
||||
let unsubscribe: (() => void) | undefined;
|
||||
let unsubscribeControlRealtime: (() => void) | undefined;
|
||||
let controlTimer: ReturnType<typeof setInterval> | undefined;
|
||||
let stdout = "";
|
||||
let jsonEvents = 0;
|
||||
try {
|
||||
const agentDir = typeof mod.getAgentDir === "function" ? mod.getAgentDir() : undefined;
|
||||
let resourceLoader: unknown;
|
||||
if (mod.DefaultResourceLoader && agentDir) {
|
||||
resourceLoader = new mod.DefaultResourceLoader({
|
||||
cwd: input.task.cwd,
|
||||
agentDir,
|
||||
noPromptTemplates: true,
|
||||
noThemes: true,
|
||||
noContextFiles: input.runtimeConfig?.inheritContext !== true,
|
||||
systemPromptOverride: () => liveSystemPrompt(input),
|
||||
appendSystemPromptOverride: () => [],
|
||||
});
|
||||
await (resourceLoader as { reload?: () => Promise<void> }).reload?.();
|
||||
}
|
||||
const modelRouting = buildConfiguredModelRouting({ overrideModel: input.modelOverride, stepModel: input.step.model, teamRoleModel: input.teamRoleModel, agentModel: input.agent.model, fallbackModels: input.agent.fallbackModels, parentModel: input.parentModel, modelRegistry: input.modelRegistry, cwd: input.manifest.cwd });
|
||||
const resolvedModel = modelFromRegistry(input.modelRegistry, modelRouting.candidates[0] ?? modelRouting.requested) ?? input.parentModel;
|
||||
const created = await mod.createAgentSession({
|
||||
cwd: input.task.cwd,
|
||||
...(agentDir ? { agentDir } : {}),
|
||||
...(resourceLoader ? { resourceLoader } : {}),
|
||||
...(mod.SessionManager?.inMemory ? { sessionManager: mod.SessionManager.inMemory(input.task.cwd) } : {}),
|
||||
...(mod.SettingsManager?.create && agentDir ? { settingsManager: mod.SettingsManager.create(input.task.cwd, agentDir) } : {}),
|
||||
...(input.modelRegistry ? { modelRegistry: input.modelRegistry } : {}),
|
||||
...(resolvedModel ? { model: resolvedModel } : {}),
|
||||
...(input.agent.thinking ? { thinkingLevel: input.agent.thinking } : {}),
|
||||
});
|
||||
session = created.session;
|
||||
filterActiveTools(session, input.agent);
|
||||
await session.bindExtensions?.({});
|
||||
const agentId = `${input.manifest.runId}:${input.task.id}`;
|
||||
registerLiveAgent({ agentId, runId: input.manifest.runId, taskId: input.task.id, session, status: "running" });
|
||||
let controlCursor: LiveAgentControlCursor = { offset: 0 };
|
||||
const seenControlRequestIds = new Set<string>();
|
||||
let controlBusy = false;
|
||||
const pollControl = async () => {
|
||||
if (!isCurrent() || controlBusy || !session) return;
|
||||
controlBusy = true;
|
||||
try {
|
||||
controlCursor = await applyLiveAgentControlRequests({ manifest: input.manifest, taskId: input.task.id, agentId, session, cursor: controlCursor, seenRequestIds: seenControlRequestIds });
|
||||
} finally {
|
||||
controlBusy = false;
|
||||
}
|
||||
};
|
||||
unsubscribeControlRealtime = subscribeLiveControlRealtime((request) => {
|
||||
if (!isCurrent() || request.runId !== input.manifest.runId || request.taskId !== input.task.id || !session) return;
|
||||
void applyLiveAgentControlRequest({ request, taskId: input.task.id, agentId, session, seenRequestIds: seenControlRequestIds });
|
||||
});
|
||||
await pollControl();
|
||||
controlTimer = setInterval(() => {
|
||||
if (isCurrent()) void pollControl();
|
||||
}, 500);
|
||||
let turnCount = 0;
|
||||
let softLimitReached = false;
|
||||
const maxTurns = input.runtimeConfig?.maxTurns;
|
||||
const graceTurns = input.runtimeConfig?.graceTurns ?? 5;
|
||||
const sidechainPath = sidechainOutputPath(input.manifest.stateRoot, input.task.id);
|
||||
writeSidechainEntry(sidechainPath, { agentId, type: "user", message: { role: "user", content: input.prompt }, cwd: input.task.cwd });
|
||||
if (typeof session.subscribe === "function") {
|
||||
unsubscribe = session.subscribe((event) => {
|
||||
if (!isCurrent()) return;
|
||||
jsonEvents += 1;
|
||||
appendTranscript(input.transcriptPath, event);
|
||||
const sidechainType = eventToSidechainType(event);
|
||||
if (sidechainType) writeSidechainEntry(sidechainPath, { agentId, type: sidechainType, message: event, cwd: input.task.cwd });
|
||||
const obj = asRecord(event);
|
||||
if (obj?.type === "turn_end") {
|
||||
turnCount += 1;
|
||||
if (maxTurns !== undefined && !softLimitReached && turnCount >= maxTurns) {
|
||||
softLimitReached = true;
|
||||
void session?.steer?.("You have reached your turn limit. Wrap up immediately — provide your final answer now.");
|
||||
} else if (maxTurns !== undefined && softLimitReached && turnCount >= maxTurns + graceTurns) {
|
||||
void session?.abort?.();
|
||||
}
|
||||
}
|
||||
input.onEvent?.(event);
|
||||
const text = [...eventText(event), ...finalAssistantText(event)].join("\n");
|
||||
if (text.trim()) {
|
||||
stdout += `${text}\n`;
|
||||
input.onOutput?.(text);
|
||||
}
|
||||
});
|
||||
}
|
||||
if (input.signal) {
|
||||
if (input.signal.aborted) await session.abort?.();
|
||||
else input.signal.addEventListener("abort", () => { void session?.abort?.(); }, { once: true });
|
||||
}
|
||||
const effectivePrompt = input.runtimeConfig?.inheritContext === true && input.parentContext ? `${input.parentContext}\n\n---\n# Live Subagent Task\n${input.prompt}` : input.prompt;
|
||||
await session.prompt?.(effectivePrompt, { source: "api", expandPromptTemplates: false });
|
||||
const usage = usageFromStats(typeof session.getStats === "function" ? session.getStats() : session.stats);
|
||||
updateLiveAgentStatus(agentId, "completed");
|
||||
return { available: true, exitCode: 0, stdout: stdout.trim(), stderr: created.modelFallbackMessage ?? "", jsonEvents, usage };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
updateLiveAgentStatus(`${input.manifest.runId}:${input.task.id}`, "failed");
|
||||
return { available: true, exitCode: 1, stdout: stdout.trim(), stderr: message, jsonEvents, error: message };
|
||||
} finally {
|
||||
if (controlTimer) clearInterval(controlTimer);
|
||||
unsubscribeControlRealtime?.();
|
||||
unsubscribe?.();
|
||||
}
|
||||
}
|
||||
263
extensions/pi-crew/src/runtime/manifest-cache.ts
Normal file
263
extensions/pi-crew/src/runtime/manifest-cache.ts
Normal file
@@ -0,0 +1,263 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import { closeWatcher, watchWithErrorHandler } from "../utils/fs-watch.ts";
|
||||
import { findRepoRoot, projectCrewRoot, userCrewRoot } from "../utils/paths.ts";
|
||||
import { activeRunEntries } from "../state/active-run-registry.ts";
|
||||
import { isSafePathId, resolveContainedRelativePath, resolveRealContainedPath } from "../utils/safe-paths.ts";
|
||||
import type { TeamRunManifest } from "../state/types.ts";
|
||||
import { DEFAULT_CACHE, DEFAULT_PATHS } from "../config/defaults.ts";
|
||||
|
||||
export interface ManifestCache {
|
||||
list(limit?: number): TeamRunManifest[];
|
||||
get(runId: string): TeamRunManifest | undefined;
|
||||
clear(runId?: string): void;
|
||||
dispose(): void;
|
||||
}
|
||||
|
||||
interface CachedManifest {
|
||||
path: string;
|
||||
manifest: TeamRunManifest;
|
||||
mtimeMs: number;
|
||||
size: number;
|
||||
loadedAtMs: number;
|
||||
}
|
||||
|
||||
interface CachedList {
|
||||
runs: TeamRunManifest[];
|
||||
limit?: number;
|
||||
expireAtMs: number;
|
||||
}
|
||||
|
||||
export interface ManifestCacheOptions {
|
||||
debounceMs?: number;
|
||||
watch?: boolean;
|
||||
maxEntries?: number;
|
||||
}
|
||||
|
||||
const DEFAULT_TTL_MS = 500;
|
||||
|
||||
interface ParsedEntry {
|
||||
runId: string;
|
||||
path: string;
|
||||
manifest?: TeamRunManifest;
|
||||
}
|
||||
|
||||
function manifestPathForRun(root: string, runId: string): string | undefined {
|
||||
if (!isSafePathId(runId)) return undefined;
|
||||
try {
|
||||
return path.join(resolveRealContainedPath(root, runId), DEFAULT_PATHS.state.manifestFile);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function parseManifest(filePath: string): TeamRunManifest | undefined {
|
||||
try {
|
||||
return JSON.parse(fs.readFileSync(filePath, "utf-8")) as TeamRunManifest;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function sameFilesystemPath(left: string, right: string): boolean {
|
||||
if (path.resolve(left) === path.resolve(right)) return true;
|
||||
try {
|
||||
return fs.realpathSync.native(left) === fs.realpathSync.native(right);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function validateManifestForRoot(root: string, runId: string, manifest: TeamRunManifest): boolean {
|
||||
try {
|
||||
if (!isSafePathId(runId)) return false;
|
||||
const stateRoot = resolveContainedRelativePath(root, runId, "runId");
|
||||
const crewRoot = path.dirname(path.dirname(root));
|
||||
const artifactsRoot = resolveContainedRelativePath(path.join(crewRoot, DEFAULT_PATHS.state.artifactsSubdir), runId, "runId");
|
||||
if (manifest.runId !== runId || !sameFilesystemPath(manifest.stateRoot, stateRoot) || !sameFilesystemPath(manifest.tasksPath, path.join(stateRoot, DEFAULT_PATHS.state.tasksFile)) || !sameFilesystemPath(manifest.eventsPath, path.join(stateRoot, DEFAULT_PATHS.state.eventsFile)) || !sameFilesystemPath(manifest.artifactsRoot, artifactsRoot)) return false;
|
||||
if (fs.existsSync(artifactsRoot)) {
|
||||
if (fs.lstatSync(artifactsRoot).isSymbolicLink()) return false;
|
||||
resolveRealContainedPath(path.dirname(artifactsRoot), path.basename(artifactsRoot));
|
||||
}
|
||||
return true;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function parseManifestIfChanged(root: string, runId: string, filePath: string, previous?: CachedManifest): CachedManifest | undefined {
|
||||
let stat: fs.Stats;
|
||||
try {
|
||||
stat = fs.statSync(filePath);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
if (previous && previous.mtimeMs === stat.mtimeMs && previous.size === stat.size) {
|
||||
return validateManifestForRoot(root, runId, previous.manifest) ? previous : undefined;
|
||||
}
|
||||
const manifest = parseManifest(filePath);
|
||||
if (!manifest || !validateManifestForRoot(root, runId, manifest)) return undefined;
|
||||
return {
|
||||
path: filePath,
|
||||
manifest,
|
||||
mtimeMs: stat.mtimeMs,
|
||||
size: stat.size,
|
||||
loadedAtMs: Date.now(),
|
||||
};
|
||||
}
|
||||
|
||||
function listRunRoots(cwd: string): string[] {
|
||||
const roots = new Set<string>();
|
||||
const base = findRepoRoot(cwd) ? projectCrewRoot(cwd) : userCrewRoot();
|
||||
roots.add(path.join(base, DEFAULT_PATHS.state.runsSubdir));
|
||||
return [...roots];
|
||||
}
|
||||
|
||||
function collectRoots(root: string): ParsedEntry[] {
|
||||
if (!fs.existsSync(root)) return [];
|
||||
let entries: string[];
|
||||
try {
|
||||
entries = fs.readdirSync(root);
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
return entries
|
||||
.filter((entry) => entry.length > 0 && isSafePathId(entry))
|
||||
.map((entry) => ({ runId: entry, path: manifestPathForRun(root, entry) }))
|
||||
.filter((entry): entry is ParsedEntry => entry.path !== undefined);
|
||||
}
|
||||
|
||||
export function createManifestCache(cwd: string, options: ManifestCacheOptions = {}): ManifestCache {
|
||||
const ttlMs = options.debounceMs ?? DEFAULT_TTL_MS;
|
||||
const maxEntries = options.maxEntries ?? DEFAULT_CACHE.manifestMaxEntries;
|
||||
const roots = listRunRoots(cwd);
|
||||
const manifestIndex = new Map<string, CachedManifest>();
|
||||
const listCache = new Map<number, CachedList>();
|
||||
let listTimer: ReturnType<typeof setTimeout> | undefined;
|
||||
let watchers: fs.FSWatcher[] = [];
|
||||
|
||||
function invalidate(runId?: string): void {
|
||||
if (runId) {
|
||||
manifestIndex.delete(runId);
|
||||
} else {
|
||||
manifestIndex.clear();
|
||||
}
|
||||
listCache.clear();
|
||||
}
|
||||
|
||||
function scheduleListRefresh(): void {
|
||||
if (listTimer) {
|
||||
clearTimeout(listTimer);
|
||||
}
|
||||
listTimer = setTimeout(() => {
|
||||
listTimer = undefined;
|
||||
listCache.clear();
|
||||
}, ttlMs);
|
||||
listTimer.unref();
|
||||
}
|
||||
|
||||
function loadManifest(runId: string, rootsToCheck: string[]): CachedManifest | undefined {
|
||||
let cached = manifestIndex.get(runId);
|
||||
if (!isSafePathId(runId)) return undefined;
|
||||
const activeEntry = activeRunEntries().find((entry) => entry.runId === runId);
|
||||
if (activeEntry) {
|
||||
const activeRoot = path.dirname(activeEntry.stateRoot);
|
||||
const parsed = parseManifestIfChanged(activeRoot, runId, activeEntry.manifestPath, cached);
|
||||
if (parsed) {
|
||||
manifestIndex.set(runId, parsed);
|
||||
return parsed;
|
||||
}
|
||||
}
|
||||
for (const root of rootsToCheck) {
|
||||
const manifestPath = manifestPathForRun(root, runId);
|
||||
if (!manifestPath) continue;
|
||||
const parsed = parseManifestIfChanged(root, runId, manifestPath, cached);
|
||||
if (parsed) {
|
||||
if (!cached || parsed.mtimeMs !== cached.mtimeMs || parsed.size !== cached.size) {
|
||||
manifestIndex.set(runId, parsed);
|
||||
if (manifestIndex.size > maxEntries) {
|
||||
const oldest = [...manifestIndex.values()].sort((a, b) => a.loadedAtMs - b.loadedAtMs)[0];
|
||||
if (oldest) manifestIndex.delete(oldest.manifest.runId);
|
||||
}
|
||||
}
|
||||
return manifestIndex.get(runId);
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function list(limit = DEFAULT_CACHE.manifestMaxEntries): TeamRunManifest[] {
|
||||
const now = Date.now();
|
||||
const cached = listCache.get(limit);
|
||||
if (cached && cached.expireAtMs > now) {
|
||||
return cached.runs;
|
||||
}
|
||||
const parsedEntries = [
|
||||
...roots.flatMap((root) => collectRoots(root)),
|
||||
...activeRunEntries().map((entry) => ({ runId: entry.runId, path: entry.manifestPath })),
|
||||
];
|
||||
const unique = new Map<string, CachedManifest | undefined>();
|
||||
for (const entry of parsedEntries) {
|
||||
if (entry.runId.length === 0) continue;
|
||||
let cached = manifestIndex.get(entry.runId);
|
||||
const root = path.dirname(path.dirname(entry.path));
|
||||
const parsed = parseManifestIfChanged(root, entry.runId, entry.path, cached);
|
||||
if (parsed) {
|
||||
cached = parsed;
|
||||
manifestIndex.set(entry.runId, cached);
|
||||
}
|
||||
if (cached) unique.set(entry.runId, cached);
|
||||
}
|
||||
|
||||
|
||||
const runs = [...unique.values()].filter((value): value is CachedManifest => value !== undefined).map((value) => value.manifest);
|
||||
const sorted = runs.sort((a, b) => b.createdAt.localeCompare(a.createdAt));
|
||||
const limited = sorted.slice(0, Math.max(0, limit));
|
||||
if (manifestIndex.size > maxEntries) {
|
||||
const removeCount = manifestIndex.size - maxEntries;
|
||||
const oldest = [...manifestIndex.values()].sort((a, b) => a.loadedAtMs - b.loadedAtMs).slice(0, removeCount);
|
||||
for (const entry of oldest) manifestIndex.delete(entry.manifest.runId);
|
||||
}
|
||||
const result = limited;
|
||||
listCache.set(limit, { runs: result, limit, expireAtMs: now + ttlMs });
|
||||
return result;
|
||||
}
|
||||
|
||||
function get(runId: string): TeamRunManifest | undefined {
|
||||
const cached = loadManifest(runId, roots);
|
||||
if (cached) return cached.manifest;
|
||||
return undefined;
|
||||
}
|
||||
|
||||
if (options.watch ?? true) {
|
||||
for (const root of roots) {
|
||||
const watcher = watchWithErrorHandler(root, () => {
|
||||
scheduleListRefresh();
|
||||
}, () => {
|
||||
scheduleListRefresh();
|
||||
});
|
||||
if (watcher) {
|
||||
watcher.unref();
|
||||
watchers.push(watcher);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
list,
|
||||
get,
|
||||
clear(runId) {
|
||||
invalidate(runId);
|
||||
},
|
||||
dispose() {
|
||||
if (listTimer) {
|
||||
clearTimeout(listTimer);
|
||||
listTimer = undefined;
|
||||
}
|
||||
for (const watcher of watchers) closeWatcher(watcher);
|
||||
watchers = [];
|
||||
manifestIndex.clear();
|
||||
listCache.clear();
|
||||
},
|
||||
};
|
||||
}
|
||||
274
extensions/pi-crew/src/runtime/model-fallback.ts
Normal file
274
extensions/pi-crew/src/runtime/model-fallback.ts
Normal file
@@ -0,0 +1,274 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as os from "node:os";
|
||||
import * as path from "node:path";
|
||||
|
||||
export interface AvailableModelInfo {
|
||||
provider: string;
|
||||
id: string;
|
||||
fullId: string;
|
||||
}
|
||||
|
||||
export interface ModelAttemptSummary {
|
||||
model: string;
|
||||
success: boolean;
|
||||
exitCode?: number | null;
|
||||
error?: string;
|
||||
}
|
||||
|
||||
export interface ModelLike {
|
||||
provider?: unknown;
|
||||
id?: unknown;
|
||||
}
|
||||
|
||||
export interface ModelRegistryLike {
|
||||
getAvailable?: () => unknown[];
|
||||
getAll?: () => unknown[];
|
||||
}
|
||||
|
||||
interface PiSettingsLike {
|
||||
defaultProvider?: unknown;
|
||||
defaultModel?: unknown;
|
||||
}
|
||||
|
||||
interface PiModelsJsonLike {
|
||||
providers?: unknown;
|
||||
}
|
||||
|
||||
interface PiProviderConfigLike {
|
||||
models?: unknown;
|
||||
modelOverrides?: unknown;
|
||||
}
|
||||
|
||||
function modelInfoFromUnknown(value: unknown): AvailableModelInfo | undefined {
|
||||
if (!value || typeof value !== "object" || Array.isArray(value)) return undefined;
|
||||
const record = value as ModelLike;
|
||||
if (typeof record.provider !== "string" || typeof record.id !== "string") return undefined;
|
||||
return { provider: record.provider, id: record.id, fullId: `${record.provider}/${record.id}` };
|
||||
}
|
||||
|
||||
export function availableModelInfosFromRegistry(registry: unknown): AvailableModelInfo[] | undefined {
|
||||
if (!registry || typeof registry !== "object" || Array.isArray(registry)) return undefined;
|
||||
const candidate = registry as ModelRegistryLike;
|
||||
const raw = typeof candidate.getAvailable === "function" ? candidate.getAvailable() : typeof candidate.getAll === "function" ? candidate.getAll() : undefined;
|
||||
if (!Array.isArray(raw)) return undefined;
|
||||
return raw.map(modelInfoFromUnknown).filter((entry): entry is AvailableModelInfo => entry !== undefined);
|
||||
}
|
||||
|
||||
export function modelStringFromUnknown(model: unknown): string | undefined {
|
||||
return modelInfoFromUnknown(model)?.fullId;
|
||||
}
|
||||
|
||||
function uniqueModelInfos(models: AvailableModelInfo[]): AvailableModelInfo[] {
|
||||
const seen = new Set<string>();
|
||||
return models.filter((model) => {
|
||||
if (seen.has(model.fullId)) return false;
|
||||
seen.add(model.fullId);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
function readJsonObject(filePath: string): Record<string, unknown> | undefined {
|
||||
try {
|
||||
if (!fs.existsSync(filePath)) return undefined;
|
||||
const parsed = JSON.parse(fs.readFileSync(filePath, "utf-8"));
|
||||
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed as Record<string, unknown> : undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function piAgentDir(): string {
|
||||
const envDir = process.env.PI_CODING_AGENT_DIR?.trim();
|
||||
if (envDir) {
|
||||
if (envDir === "~") return os.homedir();
|
||||
if (envDir.startsWith("~/")) return path.join(os.homedir(), envDir.slice(2));
|
||||
return envDir;
|
||||
}
|
||||
return path.join(os.homedir(), ".pi", "agent");
|
||||
}
|
||||
|
||||
function settingsModelInfo(settings: PiSettingsLike | undefined): AvailableModelInfo | undefined {
|
||||
if (typeof settings?.defaultProvider !== "string" || typeof settings.defaultModel !== "string") return undefined;
|
||||
return { provider: settings.defaultProvider, id: settings.defaultModel, fullId: `${settings.defaultProvider}/${settings.defaultModel}` };
|
||||
}
|
||||
|
||||
function modelsJsonInfos(modelsJson: PiModelsJsonLike | undefined): AvailableModelInfo[] {
|
||||
if (!modelsJson?.providers || typeof modelsJson.providers !== "object" || Array.isArray(modelsJson.providers)) return [];
|
||||
const infos: AvailableModelInfo[] = [];
|
||||
for (const [provider, rawConfig] of Object.entries(modelsJson.providers as Record<string, unknown>)) {
|
||||
if (!rawConfig || typeof rawConfig !== "object" || Array.isArray(rawConfig)) continue;
|
||||
const config = rawConfig as PiProviderConfigLike;
|
||||
if (Array.isArray(config.models)) {
|
||||
for (const rawModel of config.models) {
|
||||
if (!rawModel || typeof rawModel !== "object" || Array.isArray(rawModel)) continue;
|
||||
const id = (rawModel as { id?: unknown }).id;
|
||||
if (typeof id === "string") infos.push({ provider, id, fullId: `${provider}/${id}` });
|
||||
}
|
||||
}
|
||||
if (config.modelOverrides && typeof config.modelOverrides === "object" && !Array.isArray(config.modelOverrides)) {
|
||||
for (const id of Object.keys(config.modelOverrides)) infos.push({ provider, id, fullId: `${provider}/${id}` });
|
||||
}
|
||||
}
|
||||
return infos;
|
||||
}
|
||||
|
||||
export function configuredModelInfosFromPiConfig(cwd?: string): AvailableModelInfo[] {
|
||||
const agentDir = piAgentDir();
|
||||
const globalSettings = readJsonObject(path.join(agentDir, "settings.json")) as PiSettingsLike | undefined;
|
||||
const projectSettings = cwd ? readJsonObject(path.join(cwd, ".pi", "settings.json")) as PiSettingsLike | undefined : undefined;
|
||||
const effectiveSettings = { ...(globalSettings ?? {}), ...(projectSettings ?? {}) };
|
||||
const defaultModel = settingsModelInfo(effectiveSettings);
|
||||
return uniqueModelInfos([
|
||||
...(defaultModel ? [defaultModel] : []),
|
||||
...modelsJsonInfos(readJsonObject(path.join(agentDir, "models.json")) as PiModelsJsonLike | undefined),
|
||||
]);
|
||||
}
|
||||
|
||||
export function splitThinkingSuffix(model: string): { baseModel: string; thinkingSuffix: string } {
|
||||
const colonIdx = model.lastIndexOf(":");
|
||||
if (colonIdx === -1) return { baseModel: model, thinkingSuffix: "" };
|
||||
return {
|
||||
baseModel: model.substring(0, colonIdx),
|
||||
thinkingSuffix: model.substring(colonIdx),
|
||||
};
|
||||
}
|
||||
|
||||
export function resolveModelCandidate(
|
||||
model: string | undefined,
|
||||
availableModels: AvailableModelInfo[] | undefined,
|
||||
preferredProvider?: string,
|
||||
): string | undefined {
|
||||
if (!model) return undefined;
|
||||
if (model.includes("/")) return model;
|
||||
if (!availableModels || availableModels.length === 0) return model;
|
||||
|
||||
const { baseModel, thinkingSuffix } = splitThinkingSuffix(model);
|
||||
const matches = availableModels.filter((entry) => entry.id === baseModel);
|
||||
if (preferredProvider) {
|
||||
const preferredMatch = matches.find((entry) => entry.provider === preferredProvider);
|
||||
if (preferredMatch) return `${preferredMatch.fullId}${thinkingSuffix}`;
|
||||
}
|
||||
// When multiple providers share the same model id, return the raw model string.
|
||||
// Callers should use the preferredProvider hint via resolveModelCandidate.
|
||||
if (matches.length !== 1) return model;
|
||||
return `${matches[0]!.fullId}${thinkingSuffix}`;
|
||||
}
|
||||
|
||||
const RETRYABLE_MODEL_FAILURE_PATTERNS = [
|
||||
/rate\s*limit/i,
|
||||
/too many requests/i,
|
||||
/\b429\b/,
|
||||
/quota/i,
|
||||
/provider.*unavailable/i,
|
||||
/model.*unavailable/i,
|
||||
/model.*disabled/i,
|
||||
/model.*not found/i,
|
||||
/unknown model/i,
|
||||
/overloaded/i,
|
||||
/service unavailable/i,
|
||||
/temporar(?:ily)? unavailable/i,
|
||||
/connection refused/i,
|
||||
/fetch failed/i,
|
||||
/network error/i,
|
||||
/socket hang up/i,
|
||||
/upstream/i,
|
||||
/timed? out/i,
|
||||
/timeout/i,
|
||||
/\b502\b/,
|
||||
/\b503\b/,
|
||||
/\b504\b/,
|
||||
];
|
||||
|
||||
// These patterns indicate auth/key/billing issues that will never succeed on retry.
|
||||
const NON_RETRYABLE_MODEL_FAILURE_PATTERNS = [
|
||||
/auth(?:entication)?/i,
|
||||
/unauthori[sz]ed/i,
|
||||
/forbidden/i,
|
||||
/api key/i,
|
||||
/token expired/i,
|
||||
/invalid key/i,
|
||||
/billing/i,
|
||||
/credit/i,
|
||||
];
|
||||
|
||||
export function isRetryableModelFailure(error: string | undefined): boolean {
|
||||
if (!error) return false;
|
||||
// Auth / billing / invalid-key failures will never succeed on retry.
|
||||
if (NON_RETRYABLE_MODEL_FAILURE_PATTERNS.some((pattern) => pattern.test(error))) return false;
|
||||
return RETRYABLE_MODEL_FAILURE_PATTERNS.some((pattern) => pattern.test(error));
|
||||
}
|
||||
|
||||
export function formatModelAttemptNote(attempt: ModelAttemptSummary, nextModel?: string): string {
|
||||
const failure = attempt.error?.trim() || `exit ${attempt.exitCode ?? 1}`;
|
||||
return nextModel ? `[fallback] ${attempt.model} failed: ${failure}. Retrying with ${nextModel}.` : `[fallback] ${attempt.model} failed: ${failure}.`;
|
||||
}
|
||||
|
||||
export function buildModelCandidates(
|
||||
primaryModel: string | undefined,
|
||||
fallbackModels: string[] | undefined,
|
||||
availableModels: AvailableModelInfo[] | undefined,
|
||||
preferredProvider?: string,
|
||||
): string[] {
|
||||
const seen = new Set<string>();
|
||||
const candidates: string[] = [];
|
||||
for (const raw of [primaryModel, ...(fallbackModels ?? [])]) {
|
||||
if (!raw) continue;
|
||||
const normalized = resolveModelCandidate(raw.trim(), availableModels, preferredProvider);
|
||||
if (!normalized || seen.has(normalized)) continue;
|
||||
seen.add(normalized);
|
||||
candidates.push(normalized);
|
||||
}
|
||||
return candidates;
|
||||
}
|
||||
|
||||
function isAvailableModel(model: string, availableModels: AvailableModelInfo[] | undefined): boolean {
|
||||
if (!availableModels || availableModels.length === 0) return true;
|
||||
const { baseModel } = splitThinkingSuffix(model);
|
||||
if (baseModel.includes("/")) return availableModels.some((entry) => entry.fullId === baseModel);
|
||||
return availableModels.some((entry) => entry.id === baseModel);
|
||||
}
|
||||
|
||||
export interface ConfiguredModelRouting {
|
||||
requested?: string;
|
||||
candidates: string[];
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
export function buildConfiguredModelRouting(input: {
|
||||
overrideModel?: string;
|
||||
stepModel?: string;
|
||||
teamRoleModel?: string;
|
||||
agentModel?: string;
|
||||
fallbackModels?: string[];
|
||||
parentModel?: unknown;
|
||||
modelRegistry?: unknown;
|
||||
cwd?: string;
|
||||
}): ConfiguredModelRouting {
|
||||
const registryModels = availableModelInfosFromRegistry(input.modelRegistry);
|
||||
const configModels = configuredModelInfosFromPiConfig(input.cwd);
|
||||
const availableModels = registryModels && registryModels.length > 0 ? registryModels : configModels.length > 0 ? configModels : registryModels;
|
||||
const parentModel = modelStringFromUnknown(input.parentModel);
|
||||
const preferredProvider = parentModel?.split("/")[0] ?? availableModels?.[0]?.provider;
|
||||
// B3: Parent model inheritance — when agent has no model specified,
|
||||
// inherit from parent session model before falling back to defaults.
|
||||
const effectiveAgentModel = input.agentModel?.trim() ? input.agentModel : parentModel;
|
||||
const requested = [input.overrideModel, input.stepModel, input.teamRoleModel, effectiveAgentModel].find((model): model is string => Boolean(model?.trim()));
|
||||
if (availableModels && availableModels.length === 0) return { requested, candidates: [], reason: "no configured Pi models available" };
|
||||
const rawModels = availableModels
|
||||
? [input.overrideModel, input.stepModel, input.teamRoleModel, effectiveAgentModel, ...(input.fallbackModels ?? []), ...availableModels.map((model) => model.fullId)]
|
||||
: [input.overrideModel, input.stepModel, input.teamRoleModel, effectiveAgentModel, ...(input.fallbackModels ?? []), parentModel];
|
||||
const configuredModels = rawModels
|
||||
.filter((model): model is string => Boolean(model?.trim()))
|
||||
.filter((model) => isAvailableModel(model.trim(), availableModels));
|
||||
const candidates = buildModelCandidates(configuredModels[0], configuredModels.slice(1), availableModels, preferredProvider);
|
||||
const reason = requested && candidates[0] && resolveModelCandidate(requested, availableModels, preferredProvider) !== candidates[0]
|
||||
? "requested model unavailable; selected configured Pi fallback"
|
||||
: candidates.length > 1
|
||||
? "configured Pi fallback chain"
|
||||
: undefined;
|
||||
return { requested, candidates, reason };
|
||||
}
|
||||
|
||||
export function buildConfiguredModelCandidates(input: Parameters<typeof buildConfiguredModelRouting>[0]): string[] {
|
||||
return buildConfiguredModelRouting(input).candidates;
|
||||
}
|
||||
176
extensions/pi-crew/src/runtime/overflow-recovery.ts
Normal file
176
extensions/pi-crew/src/runtime/overflow-recovery.ts
Normal file
@@ -0,0 +1,176 @@
|
||||
import { logInternalError } from "../utils/internal-error.ts";
|
||||
|
||||
export type OverflowPhase = "none" | "compaction" | "retrying" | "recovered" | "failed";
|
||||
|
||||
export interface OverflowRecoveryState {
|
||||
taskId: string;
|
||||
runId: string;
|
||||
phase: OverflowPhase;
|
||||
startedAt: number;
|
||||
lastEventAt: number;
|
||||
compactionCount: number;
|
||||
retryCount: number;
|
||||
}
|
||||
|
||||
export interface OverflowRecoveryCallbacks {
|
||||
onPhaseChange?: (state: OverflowRecoveryState, previousPhase: OverflowPhase) => void;
|
||||
onTimeout?: (state: OverflowRecoveryState) => void;
|
||||
}
|
||||
|
||||
const PHASE_TIMEOUT_MS = 120_000; // 120 seconds per phase
|
||||
const TERMINAL_STATE_TTL_MS = 5 * 60_000;
|
||||
|
||||
export class OverflowRecoveryTracker {
|
||||
private states = new Map<string, OverflowRecoveryState>();
|
||||
private timers = new Map<string, ReturnType<typeof setTimeout>>();
|
||||
private callbacks: OverflowRecoveryCallbacks;
|
||||
|
||||
constructor(callbacks: OverflowRecoveryCallbacks = {}) {
|
||||
this.callbacks = callbacks;
|
||||
}
|
||||
|
||||
feedEvent(taskId: string, runId: string, eventType: string): OverflowPhase {
|
||||
const key = this.keyFor(taskId, runId);
|
||||
const existing = this.states.get(key);
|
||||
const now = Date.now();
|
||||
|
||||
if (existing && existing.phase === "recovered") {
|
||||
existing.lastEventAt = now;
|
||||
return "recovered";
|
||||
}
|
||||
if (existing && existing.phase === "failed") {
|
||||
existing.lastEventAt = now;
|
||||
return "failed";
|
||||
}
|
||||
|
||||
let phase: OverflowPhase = existing?.phase ?? "none";
|
||||
let compactionCount = existing?.compactionCount ?? 0;
|
||||
let retryCount = existing?.retryCount ?? 0;
|
||||
const previousPhase = phase;
|
||||
|
||||
switch (eventType) {
|
||||
case "compaction_start":
|
||||
phase = "compaction";
|
||||
compactionCount++;
|
||||
break;
|
||||
case "compaction_end":
|
||||
// After compaction, we expect a retry; stay in compaction until retry starts
|
||||
break;
|
||||
case "auto_retry_start":
|
||||
phase = "retrying";
|
||||
retryCount++;
|
||||
break;
|
||||
case "auto_retry_end":
|
||||
// After retry completes, the agent should produce a response
|
||||
// We consider this recovered but don't finalize until agent_end
|
||||
phase = "recovered";
|
||||
break;
|
||||
case "agent_end":
|
||||
// If we were recovering and agent ends, we're recovered or failed
|
||||
if (phase === "compaction" || phase === "retrying") {
|
||||
phase = "failed";
|
||||
}
|
||||
break;
|
||||
default:
|
||||
// Unknown event type — no phase change
|
||||
break;
|
||||
}
|
||||
|
||||
const state: OverflowRecoveryState = {
|
||||
taskId,
|
||||
runId,
|
||||
phase,
|
||||
startedAt: existing?.startedAt ?? now,
|
||||
lastEventAt: now,
|
||||
compactionCount,
|
||||
retryCount,
|
||||
};
|
||||
|
||||
this.states.set(key, state);
|
||||
this.resetTimeout(key);
|
||||
|
||||
if (previousPhase !== phase && this.callbacks.onPhaseChange) {
|
||||
try {
|
||||
this.callbacks.onPhaseChange(state, previousPhase);
|
||||
} catch (error) {
|
||||
logInternalError("overflow-recovery.onPhaseChange", error, `taskId=${taskId}`);
|
||||
}
|
||||
}
|
||||
|
||||
return phase;
|
||||
}
|
||||
|
||||
getState(taskId: string, runId?: string): OverflowRecoveryState | undefined {
|
||||
if (runId) return this.states.get(this.keyFor(taskId, runId));
|
||||
return [...this.states.values()].find((state) => state.taskId === taskId);
|
||||
}
|
||||
|
||||
getPhase(taskId: string, runId?: string): OverflowPhase {
|
||||
return this.getState(taskId, runId)?.phase ?? "none";
|
||||
}
|
||||
|
||||
removeTask(taskId: string, runId?: string): void {
|
||||
const keys = runId
|
||||
? [this.keyFor(taskId, runId)]
|
||||
: [...this.states.entries()].filter(([, state]) => state.taskId === taskId).map(([key]) => key);
|
||||
for (const key of keys) this.removeKey(key);
|
||||
}
|
||||
|
||||
dispose(): void {
|
||||
for (const timer of this.timers.values()) clearTimeout(timer);
|
||||
this.timers.clear();
|
||||
this.states.clear();
|
||||
}
|
||||
|
||||
private keyFor(taskId: string, runId: string): string {
|
||||
return `${runId}\u0000${taskId}`;
|
||||
}
|
||||
|
||||
private removeKey(key: string): void {
|
||||
this.states.delete(key);
|
||||
const timer = this.timers.get(key);
|
||||
if (timer) clearTimeout(timer);
|
||||
this.timers.delete(key);
|
||||
}
|
||||
|
||||
private resetTimeout(key: string): void {
|
||||
const existing = this.timers.get(key);
|
||||
if (existing) clearTimeout(existing);
|
||||
const current = this.states.get(key);
|
||||
const timeoutMs = current?.phase === "recovered" || current?.phase === "failed" || current?.phase === "none"
|
||||
? TERMINAL_STATE_TTL_MS
|
||||
: PHASE_TIMEOUT_MS;
|
||||
|
||||
const timer = setTimeout(() => {
|
||||
this.timers.delete(key);
|
||||
const state = this.states.get(key);
|
||||
if (!state) return;
|
||||
if (state.phase === "recovered" || state.phase === "failed" || state.phase === "none") {
|
||||
this.states.delete(key);
|
||||
return;
|
||||
}
|
||||
|
||||
const previousPhase = state.phase;
|
||||
state.phase = "failed";
|
||||
state.lastEventAt = Date.now();
|
||||
|
||||
if (this.callbacks.onTimeout) {
|
||||
try {
|
||||
this.callbacks.onTimeout(state);
|
||||
} catch (error) {
|
||||
logInternalError("overflow-recovery.onTimeout", error, `taskId=${state.taskId}`);
|
||||
}
|
||||
}
|
||||
if (this.callbacks.onPhaseChange) {
|
||||
try {
|
||||
this.callbacks.onPhaseChange(state, previousPhase);
|
||||
} catch (error) {
|
||||
logInternalError("overflow-recovery.onPhaseChange-timeout", error, `taskId=${state.taskId}`);
|
||||
}
|
||||
}
|
||||
}, timeoutMs);
|
||||
|
||||
timer.unref();
|
||||
this.timers.set(key, timer);
|
||||
}
|
||||
}
|
||||
44
extensions/pi-crew/src/runtime/parallel-research.ts
Normal file
44
extensions/pi-crew/src/runtime/parallel-research.ts
Normal file
@@ -0,0 +1,44 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import type { WorkflowConfig, WorkflowStep } from "../workflows/workflow-config.ts";
|
||||
|
||||
export function sourcePiProjects(cwd: string): string[] {
|
||||
const sourceDir = path.join(cwd, "Source");
|
||||
try {
|
||||
return fs.readdirSync(sourceDir, { withFileTypes: true })
|
||||
.filter((entry) => entry.isDirectory() && entry.name.startsWith("pi-"))
|
||||
.map((entry) => `Source/${entry.name}`)
|
||||
.sort();
|
||||
} catch {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
export function chunkProjects(projects: string[], target = 6): string[][] {
|
||||
const chunks = Array.from({ length: Math.min(Math.max(1, target), Math.max(1, projects.length)) }, () => [] as string[]);
|
||||
projects.forEach((project, index) => chunks[index % chunks.length]!.push(project));
|
||||
return chunks.filter((chunk) => chunk.length > 0);
|
||||
}
|
||||
|
||||
export function expandParallelResearchWorkflow(workflow: WorkflowConfig, cwd: string): WorkflowConfig {
|
||||
if (workflow.name !== "parallel-research") return workflow;
|
||||
const projects = sourcePiProjects(cwd);
|
||||
if (projects.length === 0) return workflow;
|
||||
const chunks = chunkProjects(projects, Math.min(8, Math.max(4, Math.ceil(projects.length / 3))));
|
||||
const exploreSteps: WorkflowStep[] = chunks.map((paths, index) => ({
|
||||
id: `explore-shard-${index + 1}`,
|
||||
role: "explorer",
|
||||
parallelGroup: "explore",
|
||||
reads: paths,
|
||||
task: [`Explore this dynamic shard for: {goal}`, "", "Paths:", ...paths.map((item) => `- ${item}`), "", "Focus on purpose, architecture, runtime/UI patterns, package config, docs, and lessons for pi-crew."].join("\n"),
|
||||
}));
|
||||
return {
|
||||
...workflow,
|
||||
steps: [
|
||||
{ id: "discover", role: "explorer", parallelGroup: "inventory", task: `Quickly inventory and validate ${projects.length} pi-* projects for: {goal}\n\nProjects:\n${projects.map((item) => `- ${item}`).join("\n")}\n\nDo not block shard work; summarize routing notes only.` },
|
||||
...exploreSteps,
|
||||
{ id: "synthesize", role: "analyst", dependsOn: exploreSteps.map((step) => step.id), task: "Synthesize all dynamic shard findings. Identify common patterns, gaps, and concrete recommendations. Use discover output if available, but prioritize completed shard outputs." },
|
||||
{ id: "write", role: "writer", dependsOn: ["synthesize"], output: "research-summary.md", task: "Write a concise final summary with evidence, risks, and actionable next steps." },
|
||||
],
|
||||
};
|
||||
}
|
||||
99
extensions/pi-crew/src/runtime/parallel-utils.ts
Normal file
99
extensions/pi-crew/src/runtime/parallel-utils.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
export interface RunnerSubagentStep {
|
||||
agent: string;
|
||||
task: string;
|
||||
cwd?: string;
|
||||
model?: string;
|
||||
modelCandidates?: string[];
|
||||
tools?: string[];
|
||||
extensions?: string[];
|
||||
mcpDirectTools?: string[];
|
||||
systemPrompt?: string | null;
|
||||
systemPromptMode?: "append" | "replace";
|
||||
inheritProjectContext: boolean;
|
||||
inheritSkills: boolean;
|
||||
skills?: string[];
|
||||
outputPath?: string;
|
||||
sessionFile?: string;
|
||||
maxSubagentDepth?: number;
|
||||
}
|
||||
|
||||
export interface ParallelStepGroup {
|
||||
parallel: RunnerSubagentStep[];
|
||||
concurrency?: number;
|
||||
failFast?: boolean;
|
||||
worktree?: boolean;
|
||||
}
|
||||
|
||||
export type RunnerStep = RunnerSubagentStep | ParallelStepGroup;
|
||||
|
||||
export function isParallelGroup(step: RunnerStep): step is ParallelStepGroup {
|
||||
return "parallel" in step && Array.isArray(step.parallel);
|
||||
}
|
||||
|
||||
export function flattenSteps(steps: RunnerStep[]): RunnerSubagentStep[] {
|
||||
const flat: RunnerSubagentStep[] = [];
|
||||
for (const step of steps) {
|
||||
if (isParallelGroup(step)) {
|
||||
for (const task of step.parallel) flat.push(task);
|
||||
} else {
|
||||
flat.push(step);
|
||||
}
|
||||
}
|
||||
return flat;
|
||||
}
|
||||
|
||||
export async function mapConcurrent<T, R>(items: T[], limit: number, fn: (item: T, i: number) => Promise<R>): Promise<R[]> {
|
||||
const safeLimit = Math.max(1, Math.floor(limit) || 1);
|
||||
const results: R[] = new Array(items.length);
|
||||
let next = 0;
|
||||
|
||||
const worker = async (_workerIndex: number): Promise<void> => {
|
||||
while (next < items.length) {
|
||||
const i = next++;
|
||||
results[i] = await fn(items[i], i);
|
||||
}
|
||||
};
|
||||
|
||||
await Promise.all(Array.from({ length: Math.min(safeLimit, items.length) }, (_, workerIndex) => worker(workerIndex)));
|
||||
return results;
|
||||
}
|
||||
|
||||
export interface ParallelTaskResult {
|
||||
agent: string;
|
||||
taskIndex?: number;
|
||||
output: string;
|
||||
exitCode: number | null;
|
||||
error?: string;
|
||||
model?: string;
|
||||
attemptedModels?: string[];
|
||||
outputTargetPath?: string;
|
||||
outputTargetExists?: boolean;
|
||||
}
|
||||
|
||||
export function aggregateParallelOutputs(
|
||||
results: ParallelTaskResult[],
|
||||
headerFormat: (index: number, agent: string) => string = (i, agent) => `=== Parallel Task ${i + 1} (${agent}) ===`,
|
||||
): string {
|
||||
return results
|
||||
.map((r, i) => {
|
||||
const header = headerFormat(r.taskIndex ?? i, r.agent);
|
||||
const hasOutput = Boolean(r.output?.trim());
|
||||
const status =
|
||||
r.exitCode === -1
|
||||
? "SKIPPED"
|
||||
: r.exitCode == null || r.exitCode !== 0
|
||||
? `FAILED (exit code ${r.exitCode})${r.error ? `: ${r.error}` : ""}`
|
||||
: r.error
|
||||
? `WARNING: ${r.error}`
|
||||
: !hasOutput && r.outputTargetPath && r.outputTargetExists === false
|
||||
? `EMPTY OUTPUT (expected output file missing: ${r.outputTargetPath})`
|
||||
: !hasOutput && !r.outputTargetPath
|
||||
? "EMPTY OUTPUT (no textual response returned)"
|
||||
: "";
|
||||
const body = status ? (hasOutput ? `${status}\n${r.output}` : status) : r.output;
|
||||
return `${header}\n${body}`;
|
||||
})
|
||||
.join("\n\n");
|
||||
}
|
||||
|
||||
export const MAX_PARALLEL_CONCURRENCY = 4;
|
||||
129
extensions/pi-crew/src/runtime/pi-args.ts
Normal file
129
extensions/pi-crew/src/runtime/pi-args.ts
Normal file
@@ -0,0 +1,129 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as os from "node:os";
|
||||
import * as path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import type { AgentConfig } from "../agents/agent-config.ts";
|
||||
|
||||
const THINKING_LEVELS = ["off", "minimal", "low", "medium", "high", "xhigh"];
|
||||
const PROMPT_RUNTIME_EXTENSION_PATH = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..", "prompt", "prompt-runtime.ts");
|
||||
const TASK_ARG_LIMIT = 8000;
|
||||
const DEFAULT_MAX_CREW_DEPTH = 2;
|
||||
|
||||
export interface BuildPiWorkerArgsInput {
|
||||
task: string;
|
||||
agent: AgentConfig;
|
||||
model?: string;
|
||||
sessionEnabled?: boolean;
|
||||
maxDepth?: number;
|
||||
skillPaths?: string[];
|
||||
env?: NodeJS.ProcessEnv;
|
||||
}
|
||||
|
||||
export interface BuildPiWorkerArgsResult {
|
||||
args: string[];
|
||||
env: Record<string, string | undefined>;
|
||||
tempDir?: string;
|
||||
}
|
||||
|
||||
function isValidThinkingLevel(value: string | undefined): value is string {
|
||||
return value !== undefined && THINKING_LEVELS.includes(value);
|
||||
}
|
||||
|
||||
export function applyThinkingSuffix(model: string | undefined, thinking: string | undefined): string | undefined {
|
||||
if (!model || !thinking || thinking === "off") return model;
|
||||
const colonIdx = model.lastIndexOf(":");
|
||||
if (colonIdx !== -1 && isValidThinkingLevel(model.substring(colonIdx + 1))) return model;
|
||||
// Invalid config values fall back to Pi's default thinking behavior.
|
||||
if (!isValidThinkingLevel(thinking)) return model;
|
||||
return `${model}:${thinking}`;
|
||||
}
|
||||
|
||||
export function currentCrewDepth(env: NodeJS.ProcessEnv = process.env): number {
|
||||
const raw = env.PI_CREW_DEPTH ?? env.PI_TEAMS_DEPTH ?? "0";
|
||||
const parsed = Number(raw);
|
||||
return Number.isInteger(parsed) && parsed >= 0 ? parsed : 0;
|
||||
}
|
||||
|
||||
export function resolveCrewMaxDepth(inputMaxDepth?: number, env: NodeJS.ProcessEnv = process.env): number {
|
||||
const raw = env.PI_CREW_MAX_DEPTH ?? env.PI_TEAMS_MAX_DEPTH;
|
||||
const envDepth = raw !== undefined ? Number(raw) : NaN;
|
||||
if (Number.isInteger(envDepth) && envDepth >= 0) return envDepth;
|
||||
return Number.isInteger(inputMaxDepth) && inputMaxDepth !== undefined && inputMaxDepth >= 0 ? inputMaxDepth : DEFAULT_MAX_CREW_DEPTH;
|
||||
}
|
||||
|
||||
export function checkCrewDepth(inputMaxDepth?: number, env: NodeJS.ProcessEnv = process.env): { blocked: boolean; depth: number; maxDepth: number } {
|
||||
const depth = currentCrewDepth(env);
|
||||
const maxDepth = resolveCrewMaxDepth(inputMaxDepth, env);
|
||||
return { depth, maxDepth, blocked: depth >= maxDepth };
|
||||
}
|
||||
|
||||
export function buildPiWorkerArgs(input: BuildPiWorkerArgsInput): BuildPiWorkerArgsResult {
|
||||
const args = ["--mode", "json", "-p"];
|
||||
if (input.sessionEnabled === false) args.push("--no-session");
|
||||
|
||||
const resolvedModel = input.model ?? input.agent.model;
|
||||
if (resolvedModel) {
|
||||
const modelWithThinking = applyThinkingSuffix(resolvedModel, input.agent.thinking);
|
||||
if (modelWithThinking) args.push("--model", modelWithThinking);
|
||||
}
|
||||
// When no model resolved, pass thinking separately so Pi can apply it to the inherited parent model.
|
||||
if (!resolvedModel && input.agent.thinking && input.agent.thinking !== "off" && isValidThinkingLevel(input.agent.thinking)) {
|
||||
args.push("--thinking", input.agent.thinking);
|
||||
}
|
||||
|
||||
if (input.agent.tools?.length) args.push("--tools", input.agent.tools.join(","));
|
||||
if (input.agent.extensions !== undefined) {
|
||||
args.push("--no-extensions");
|
||||
for (const extension of [PROMPT_RUNTIME_EXTENSION_PATH, ...input.agent.extensions]) args.push("--extension", extension);
|
||||
} else {
|
||||
args.push("--extension", PROMPT_RUNTIME_EXTENSION_PATH);
|
||||
}
|
||||
if (!input.agent.inheritSkills) args.push("--no-skills");
|
||||
for (const skillPath of input.skillPaths ?? []) args.push("--skill", skillPath);
|
||||
|
||||
let tempDir: string | undefined;
|
||||
if (input.agent.systemPrompt) {
|
||||
tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-crew-"));
|
||||
const promptPath = path.join(tempDir, `${input.agent.name.replace(/[^\w.-]/g, "_")}.md`);
|
||||
fs.writeFileSync(promptPath, input.agent.systemPrompt, { mode: 0o600 });
|
||||
args.push(input.agent.systemPromptMode === "append" ? "--append-system-prompt" : "--system-prompt", promptPath);
|
||||
}
|
||||
|
||||
if (input.task.length > TASK_ARG_LIMIT) {
|
||||
if (!tempDir) tempDir = fs.mkdtempSync(path.join(os.tmpdir(), "pi-crew-"));
|
||||
const taskPath = path.join(tempDir, "task.md");
|
||||
fs.writeFileSync(taskPath, input.task, { mode: 0o600 });
|
||||
args.push(`@${taskPath}`);
|
||||
} else {
|
||||
args.push(`Task: ${input.task}`);
|
||||
}
|
||||
|
||||
const env = input.env ?? process.env;
|
||||
const parentDepth = currentCrewDepth(env);
|
||||
const maxDepth = resolveCrewMaxDepth(input.maxDepth, env);
|
||||
return {
|
||||
args,
|
||||
env: {
|
||||
PI_CREW_INHERIT_PROJECT_CONTEXT: input.agent.inheritProjectContext ? "1" : "0",
|
||||
PI_CREW_INHERIT_SKILLS: input.agent.inheritSkills ? "1" : "0",
|
||||
PI_CREW_DEPTH: String(parentDepth + 1),
|
||||
PI_CREW_MAX_DEPTH: String(maxDepth),
|
||||
PI_CREW_ROLE: input.agent.name,
|
||||
PI_TEAMS_INHERIT_PROJECT_CONTEXT: input.agent.inheritProjectContext ? "1" : "0",
|
||||
PI_TEAMS_INHERIT_SKILLS: input.agent.inheritSkills ? "1" : "0",
|
||||
PI_TEAMS_DEPTH: String(parentDepth + 1),
|
||||
PI_TEAMS_MAX_DEPTH: String(maxDepth),
|
||||
PI_TEAMS_ROLE: input.agent.name,
|
||||
},
|
||||
tempDir,
|
||||
};
|
||||
}
|
||||
|
||||
export function cleanupTempDir(tempDir: string | undefined): void {
|
||||
if (!tempDir) return;
|
||||
try {
|
||||
fs.rmSync(tempDir, { recursive: true, force: true });
|
||||
} catch {
|
||||
// Best effort.
|
||||
}
|
||||
}
|
||||
111
extensions/pi-crew/src/runtime/pi-json-output.ts
Normal file
111
extensions/pi-crew/src/runtime/pi-json-output.ts
Normal file
@@ -0,0 +1,111 @@
|
||||
export interface ParsedPiUsage {
|
||||
input?: number;
|
||||
output?: number;
|
||||
cacheRead?: number;
|
||||
cacheWrite?: number;
|
||||
cost?: number;
|
||||
turns?: number;
|
||||
}
|
||||
|
||||
export interface ParsedPiJsonOutput {
|
||||
jsonEvents: number;
|
||||
textEvents: string[];
|
||||
finalText?: string;
|
||||
usage?: ParsedPiUsage;
|
||||
}
|
||||
|
||||
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
||||
return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
|
||||
}
|
||||
|
||||
function numberField(obj: Record<string, unknown>, keys: string[]): number | undefined {
|
||||
for (const key of keys) {
|
||||
const value = obj[key];
|
||||
if (typeof value === "number" && Number.isFinite(value)) return value;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function mergeUsage(target: ParsedPiUsage, source: ParsedPiUsage): ParsedPiUsage {
|
||||
return {
|
||||
input: source.input ?? target.input,
|
||||
output: source.output ?? target.output,
|
||||
cacheRead: source.cacheRead ?? target.cacheRead,
|
||||
cacheWrite: source.cacheWrite ?? target.cacheWrite,
|
||||
cost: source.cost ?? target.cost,
|
||||
turns: source.turns ?? target.turns,
|
||||
};
|
||||
}
|
||||
|
||||
function extractUsage(value: unknown): ParsedPiUsage | undefined {
|
||||
const obj = asRecord(value);
|
||||
if (!obj) return undefined;
|
||||
const direct: ParsedPiUsage = {
|
||||
input: numberField(obj, ["input", "inputTokens", "input_tokens"]),
|
||||
output: numberField(obj, ["output", "outputTokens", "output_tokens"]),
|
||||
cacheRead: numberField(obj, ["cacheRead", "cache_read", "cacheReadTokens", "cache_read_tokens"]),
|
||||
cacheWrite: numberField(obj, ["cacheWrite", "cache_write", "cacheWriteTokens", "cache_write_tokens"]),
|
||||
cost: numberField(obj, ["cost", "costUsd", "cost_usd"]),
|
||||
turns: numberField(obj, ["turns", "turnCount", "turn_count"]),
|
||||
};
|
||||
if (Object.values(direct).some((entry) => entry !== undefined)) return direct;
|
||||
for (const key of ["usage", "tokenUsage", "tokens", "stats"]) {
|
||||
const nested = extractUsage(obj[key]);
|
||||
if (nested) return nested;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function textFromContent(content: unknown): string[] {
|
||||
if (typeof content === "string") return [content];
|
||||
if (!Array.isArray(content)) return [];
|
||||
const text: string[] = [];
|
||||
for (const part of content) {
|
||||
const obj = asRecord(part);
|
||||
if (!obj) continue;
|
||||
if (obj.type === "text" && typeof obj.text === "string") text.push(obj.text);
|
||||
else if (typeof obj.content === "string") text.push(obj.content);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
function extractText(value: unknown): string[] {
|
||||
const obj = asRecord(value);
|
||||
if (!obj) return [];
|
||||
const message = asRecord(obj.message);
|
||||
if (message?.role !== undefined && message.role !== "assistant") return [];
|
||||
const text: string[] = [];
|
||||
if (typeof obj.text === "string") text.push(obj.text);
|
||||
if (typeof obj.output === "string") text.push(obj.output);
|
||||
if (typeof obj.finalOutput === "string") text.push(obj.finalOutput);
|
||||
if (typeof obj.final_output === "string") text.push(obj.final_output);
|
||||
if (!message) text.push(...textFromContent(obj.content));
|
||||
if (message) text.push(...textFromContent(message.content));
|
||||
return text.filter((entry) => entry.trim().length > 0);
|
||||
}
|
||||
|
||||
export function parsePiJsonOutput(stdout: string): ParsedPiJsonOutput {
|
||||
let jsonEvents = 0;
|
||||
const textEvents: string[] = [];
|
||||
let usage: ParsedPiUsage | undefined;
|
||||
for (const line of stdout.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
let event: unknown;
|
||||
try {
|
||||
event = JSON.parse(trimmed) as unknown;
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
jsonEvents++;
|
||||
textEvents.push(...extractText(event));
|
||||
const eventUsage = extractUsage(event);
|
||||
if (eventUsage) usage = mergeUsage(usage ?? {}, eventUsage);
|
||||
}
|
||||
return {
|
||||
jsonEvents,
|
||||
textEvents,
|
||||
finalText: textEvents.length > 0 ? textEvents[textEvents.length - 1] : undefined,
|
||||
usage,
|
||||
};
|
||||
}
|
||||
99
extensions/pi-crew/src/runtime/pi-spawn.ts
Normal file
99
extensions/pi-crew/src/runtime/pi-spawn.ts
Normal file
@@ -0,0 +1,99 @@
|
||||
import * as fs from "node:fs";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import * as path from "node:path";
|
||||
|
||||
export interface PiSpawnCommand {
|
||||
command: string;
|
||||
args: string[];
|
||||
}
|
||||
|
||||
function isRunnableNodeScript(filePath: string): boolean {
|
||||
return fs.existsSync(filePath) && /\.(?:mjs|cjs|js)$/i.test(filePath);
|
||||
}
|
||||
|
||||
function resolvePiPackageRoot(): string | undefined {
|
||||
try {
|
||||
const entry = process.argv[1];
|
||||
if (!entry) return undefined;
|
||||
let dir = path.dirname(fs.realpathSync(entry));
|
||||
while (dir !== path.dirname(dir)) {
|
||||
try {
|
||||
const pkg = JSON.parse(fs.readFileSync(path.join(dir, "package.json"), "utf-8")) as { name?: string };
|
||||
if (pkg.name === "@mariozechner/pi-coding-agent") return dir;
|
||||
} catch {
|
||||
// Continue walking upward.
|
||||
}
|
||||
dir = path.dirname(dir);
|
||||
}
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function packageBinScript(packageJsonPath: string): string | undefined {
|
||||
try {
|
||||
const pkg = JSON.parse(fs.readFileSync(packageJsonPath, "utf-8")) as { bin?: string | Record<string, string> };
|
||||
const binPath = typeof pkg.bin === "string" ? pkg.bin : pkg.bin?.pi ?? Object.values(pkg.bin ?? {})[0];
|
||||
if (!binPath) return undefined;
|
||||
const candidate = path.resolve(path.dirname(packageJsonPath), binPath);
|
||||
return isRunnableNodeScript(candidate) ? candidate : undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function findPiPackageJsonFrom(startDir: string): string | undefined {
|
||||
let dir = startDir;
|
||||
while (dir !== path.dirname(dir)) {
|
||||
const direct = path.join(dir, "package.json");
|
||||
try {
|
||||
const pkg = JSON.parse(fs.readFileSync(direct, "utf-8")) as { name?: string };
|
||||
if (pkg.name === "@mariozechner/pi-coding-agent") return direct;
|
||||
} catch {
|
||||
// Continue searching upward and in node_modules.
|
||||
}
|
||||
const dependency = path.join(dir, "node_modules", "@mariozechner", "pi-coding-agent", "package.json");
|
||||
if (fs.existsSync(dependency)) return dependency;
|
||||
dir = path.dirname(dir);
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function resolvePiCliScript(): string | undefined {
|
||||
const explicit = process.env.PI_TEAMS_PI_BIN?.trim();
|
||||
if (explicit && isRunnableNodeScript(explicit)) return explicit;
|
||||
|
||||
const argv1 = process.argv[1];
|
||||
if (argv1) {
|
||||
const argvPath = path.isAbsolute(argv1) ? argv1 : path.resolve(argv1);
|
||||
if (isRunnableNodeScript(argvPath)) return argvPath;
|
||||
}
|
||||
|
||||
const roots = [
|
||||
resolvePiPackageRoot(),
|
||||
process.env.APPDATA ? path.join(process.env.APPDATA, "npm", "node_modules", "@mariozechner", "pi-coding-agent") : undefined,
|
||||
path.dirname(fileURLToPath(import.meta.url)),
|
||||
process.cwd(),
|
||||
].filter((entry): entry is string => Boolean(entry));
|
||||
|
||||
for (const root of roots) {
|
||||
const packageJsonPath = root.endsWith("package.json") ? root : findPiPackageJsonFrom(root) ?? path.join(root, "package.json");
|
||||
const script = packageBinScript(packageJsonPath);
|
||||
if (script) return script;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function getPiSpawnCommand(args: string[]): PiSpawnCommand {
|
||||
const explicit = process.env.PI_TEAMS_PI_BIN?.trim();
|
||||
if (explicit && fs.existsSync(explicit)) {
|
||||
if (isRunnableNodeScript(explicit)) return { command: process.execPath, args: [explicit, ...args] };
|
||||
return { command: explicit, args };
|
||||
}
|
||||
if (process.platform === "win32") {
|
||||
const script = resolvePiCliScript();
|
||||
if (script) return { command: process.execPath, args: [script, ...args] };
|
||||
}
|
||||
return { command: "pi", args };
|
||||
}
|
||||
79
extensions/pi-crew/src/runtime/policy-engine.ts
Normal file
79
extensions/pi-crew/src/runtime/policy-engine.ts
Normal file
@@ -0,0 +1,79 @@
|
||||
import type { CrewLimitsConfig } from "../config/config.ts";
|
||||
import type { PolicyDecision, PolicyDecisionAction, PolicyDecisionReason, TeamRunManifest, TeamTaskState } from "../state/types.ts";
|
||||
import { evaluateGreenContract } from "./green-contract.ts";
|
||||
import { isWorkerHeartbeatStale } from "./worker-heartbeat.ts";
|
||||
|
||||
export interface PolicyEngineInput {
|
||||
manifest: TeamRunManifest;
|
||||
tasks: TeamTaskState[];
|
||||
limits?: CrewLimitsConfig;
|
||||
now?: Date;
|
||||
}
|
||||
|
||||
function decision(action: PolicyDecisionAction, reason: PolicyDecisionReason, message: string, taskId?: string): PolicyDecision {
|
||||
return {
|
||||
action,
|
||||
reason,
|
||||
message,
|
||||
taskId,
|
||||
createdAt: new Date().toISOString(),
|
||||
};
|
||||
}
|
||||
|
||||
function taskDepth(task: TeamTaskState, tasksById: Map<string, TeamTaskState>): number {
|
||||
let depth = 0;
|
||||
let current = task.graph?.parentId;
|
||||
const seen = new Set<string>();
|
||||
while (current && !seen.has(current)) {
|
||||
seen.add(current);
|
||||
depth += 1;
|
||||
current = tasksById.get(current)?.graph?.parentId;
|
||||
}
|
||||
return depth;
|
||||
}
|
||||
|
||||
export function evaluateCrewPolicy(input: PolicyEngineInput): PolicyDecision[] {
|
||||
const decisions: PolicyDecision[] = [];
|
||||
const maxTasksPerRun = Number.isFinite(input.limits?.maxTasksPerRun) ? input.limits!.maxTasksPerRun : undefined;
|
||||
if (maxTasksPerRun !== undefined && input.tasks.length > maxTasksPerRun) {
|
||||
decisions.push(decision("block", "limit_exceeded", `Run has ${input.tasks.length} tasks, exceeding maxTasksPerRun=${maxTasksPerRun}.`));
|
||||
}
|
||||
const runningCount = input.tasks.filter((task) => task.status === "running").length;
|
||||
const maxConcurrentWorkers = Number.isFinite(input.limits?.maxConcurrentWorkers) ? input.limits!.maxConcurrentWorkers : undefined;
|
||||
if (maxConcurrentWorkers !== undefined && runningCount > maxConcurrentWorkers) {
|
||||
decisions.push(decision("block", "limit_exceeded", `Run has ${runningCount} running workers, exceeding maxConcurrentWorkers=${maxConcurrentWorkers}.`));
|
||||
}
|
||||
const tasksById = new Map(input.tasks.map((task) => [task.id, task]));
|
||||
|
||||
for (const task of input.tasks) {
|
||||
if (input.limits?.maxChildrenPerTask !== undefined && (task.graph?.children.length ?? 0) > input.limits.maxChildrenPerTask) {
|
||||
decisions.push(decision("block", "limit_exceeded", `Task has ${task.graph?.children.length ?? 0} children, exceeding maxChildrenPerTask=${input.limits.maxChildrenPerTask}.`, task.id));
|
||||
}
|
||||
if (input.limits?.maxTaskDepth !== undefined && taskDepth(task, tasksById) > input.limits.maxTaskDepth) {
|
||||
decisions.push(decision("block", "limit_exceeded", `Task graph depth exceeds maxTaskDepth=${input.limits.maxTaskDepth}.`, task.id));
|
||||
}
|
||||
if (task.status === "failed") {
|
||||
const retryCount = task.policy?.retryCount ?? 0;
|
||||
const maxRetries = input.limits?.maxRetriesPerTask ?? 0;
|
||||
decisions.push(decision(retryCount < maxRetries ? "retry" : "escalate", "task_failed", task.error ? `Task failed: ${task.error}` : "Task failed.", task.id));
|
||||
}
|
||||
if ((task.status === "running" || task.status === "queued") && task.heartbeat && task.heartbeat.alive !== false && isWorkerHeartbeatStale(task.heartbeat, input.limits?.heartbeatStaleMs ?? 60_000, input.now)) {
|
||||
decisions.push(decision("escalate", "worker_stale", "Worker heartbeat is stale.", task.id));
|
||||
}
|
||||
if (task.taskPacket?.verification) {
|
||||
const outcome = evaluateGreenContract(task.taskPacket.verification, task.verification);
|
||||
if (!outcome.satisfied && task.status === "completed") {
|
||||
decisions.push(decision("block", "green_unsatisfied", `Green contract unsatisfied: required=${outcome.requiredGreenLevel}, observed=${outcome.observedGreenLevel}.`, task.id));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (decisions.length === 0 && input.tasks.length > 0 && input.tasks.every((task) => task.status === "completed")) {
|
||||
decisions.push(decision("closeout", "run_complete", "All tasks completed and no policy blockers were found."));
|
||||
}
|
||||
return decisions;
|
||||
}
|
||||
|
||||
export function summarizePolicyDecisions(decisions: PolicyDecision[]): string[] {
|
||||
return decisions.map((item) => `- ${item.action} (${item.reason})${item.taskId ? ` ${item.taskId}` : ""}: ${item.message}`);
|
||||
}
|
||||
86
extensions/pi-crew/src/runtime/post-exit-stdio-guard.ts
Normal file
86
extensions/pi-crew/src/runtime/post-exit-stdio-guard.ts
Normal file
@@ -0,0 +1,86 @@
|
||||
import type { ChildProcess } from "node:child_process";
|
||||
|
||||
interface PostExitStdioGuardOptions {
|
||||
idleMs: number;
|
||||
hardMs: number;
|
||||
}
|
||||
|
||||
export interface ChildWithPipedStdio {
|
||||
stdout: ChildProcess["stdout"];
|
||||
stderr: ChildProcess["stderr"];
|
||||
on: ChildProcess["on"];
|
||||
}
|
||||
|
||||
export interface ChildWithKill {
|
||||
kill(signal?: NodeJS.Signals | number): boolean;
|
||||
}
|
||||
|
||||
export function trySignalChild(child: ChildWithKill, signal: NodeJS.Signals): boolean {
|
||||
try {
|
||||
return child.kill(signal);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export function attachPostExitStdioGuard(child: ChildWithPipedStdio, options: PostExitStdioGuardOptions): () => void {
|
||||
const { idleMs, hardMs } = options;
|
||||
let exited = false;
|
||||
let stdoutEnded = false;
|
||||
let stderrEnded = false;
|
||||
let idleTimer: ReturnType<typeof setTimeout> | undefined;
|
||||
let hardTimer: ReturnType<typeof setTimeout> | undefined;
|
||||
|
||||
const destroyUnendedStdio = (): void => {
|
||||
if (!stdoutEnded) {
|
||||
try {
|
||||
child.stdout?.destroy();
|
||||
} catch {}
|
||||
}
|
||||
if (!stderrEnded) {
|
||||
try {
|
||||
child.stderr?.destroy();
|
||||
} catch {}
|
||||
}
|
||||
};
|
||||
|
||||
const clearTimers = (): void => {
|
||||
if (idleTimer) {
|
||||
clearTimeout(idleTimer);
|
||||
idleTimer = undefined;
|
||||
}
|
||||
if (hardTimer) {
|
||||
clearTimeout(hardTimer);
|
||||
hardTimer = undefined;
|
||||
}
|
||||
};
|
||||
|
||||
const armIdleTimer = () => {
|
||||
if (!exited) return;
|
||||
if (idleTimer) clearTimeout(idleTimer);
|
||||
idleTimer = setTimeout(destroyUnendedStdio, idleMs);
|
||||
idleTimer.unref();
|
||||
};
|
||||
|
||||
child.stdout?.on("data", armIdleTimer);
|
||||
child.stderr?.on("data", armIdleTimer);
|
||||
child.stdout?.on("end", () => {
|
||||
stdoutEnded = true;
|
||||
if (stdoutEnded && stderrEnded) clearTimers();
|
||||
});
|
||||
child.stderr?.on("end", () => {
|
||||
stderrEnded = true;
|
||||
if (stdoutEnded && stderrEnded) clearTimers();
|
||||
});
|
||||
child.on("exit", () => {
|
||||
exited = true;
|
||||
armIdleTimer();
|
||||
if (hardTimer) return;
|
||||
hardTimer = setTimeout(destroyUnendedStdio, hardMs);
|
||||
hardTimer.unref();
|
||||
});
|
||||
child.on("close", clearTimers);
|
||||
child.on("error", clearTimers);
|
||||
|
||||
return clearTimers;
|
||||
}
|
||||
60
extensions/pi-crew/src/runtime/process-status.ts
Normal file
60
extensions/pi-crew/src/runtime/process-status.ts
Normal file
@@ -0,0 +1,60 @@
|
||||
import type { CrewAgentRecord } from "./crew-agent-runtime.ts";
|
||||
import type { TeamRunManifest } from "../state/types.ts";
|
||||
export { hasAsyncStartMarker } from "./async-marker.ts";
|
||||
|
||||
export interface ProcessLiveness {
|
||||
pid?: number;
|
||||
alive: boolean;
|
||||
detail: string;
|
||||
}
|
||||
|
||||
const ORPHANED_ACTIVE_RUN_MS = 10 * 60 * 1000;
|
||||
|
||||
export function checkProcessLiveness(pid: number | undefined): ProcessLiveness {
|
||||
if (pid === undefined || !Number.isInteger(pid) || pid <= 0) {
|
||||
return { pid, alive: false, detail: "no pid recorded" };
|
||||
}
|
||||
try {
|
||||
process.kill(pid, 0);
|
||||
return { pid, alive: true, detail: "process is alive" };
|
||||
} catch (error) {
|
||||
const nodeError = error as NodeJS.ErrnoException;
|
||||
if (nodeError.code === "EPERM") return { pid, alive: true, detail: "process exists but permission is denied" };
|
||||
if (nodeError.code === "ESRCH") return { pid, alive: false, detail: "process does not exist" };
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return { pid, alive: false, detail: message };
|
||||
}
|
||||
}
|
||||
|
||||
export function isActiveRunStatus(status: string): boolean {
|
||||
return status === "queued" || status === "planning" || status === "running" || status === "waiting";
|
||||
}
|
||||
|
||||
export function isLikelyOrphanedActiveRun(run: TeamRunManifest, agents: CrewAgentRecord[] = [], now = Date.now(), staleMs = ORPHANED_ACTIVE_RUN_MS): boolean {
|
||||
if (!isActiveRunStatus(run.status)) return false;
|
||||
if (run.async?.pid !== undefined) return false;
|
||||
const updatedAt = new Date(run.updatedAt).getTime();
|
||||
if (!Number.isFinite(updatedAt) || now - updatedAt < staleMs) return false;
|
||||
if (agents.length === 0) return run.summary === "Creating workflow prompts and placeholder results.";
|
||||
return agents.every((agent) => agent.status === "queued" && !agent.completedAt && !agent.progress);
|
||||
}
|
||||
|
||||
function hasDurableActiveAgentEvidence(agent: CrewAgentRecord): boolean {
|
||||
if (agent.status !== "running" && agent.status !== "queued") return false;
|
||||
return Boolean(agent.statusPath || agent.eventsPath || agent.outputPath || agent.progress || agent.toolUses || agent.jsonEvents);
|
||||
}
|
||||
|
||||
export function hasStaleAsyncProcess(run: TeamRunManifest): boolean {
|
||||
if (!isActiveRunStatus(run.status) || !run.async) return false;
|
||||
return !checkProcessLiveness(run.async.pid).alive;
|
||||
}
|
||||
|
||||
export function isDisplayActiveRun(run: TeamRunManifest, agents: CrewAgentRecord[] = [], now = Date.now()): boolean {
|
||||
if (!isActiveRunStatus(run.status) || hasStaleAsyncProcess(run) || isLikelyOrphanedActiveRun(run, agents, now)) return false;
|
||||
// Keep the always-visible widget quiet until a worker actually exists.
|
||||
// Empty active manifests can be created briefly at startup, by old fixture/scaffold
|
||||
// runs, or from cross-cwd registry history; showing them causes noisy 0/0 rows and
|
||||
// needless spinner redraws. The full dashboard can still list historical runs.
|
||||
if (agents.length === 0) return false;
|
||||
return agents.some(hasDurableActiveAgentEvidence);
|
||||
}
|
||||
43
extensions/pi-crew/src/runtime/progress-event-coalescer.ts
Normal file
43
extensions/pi-crew/src/runtime/progress-event-coalescer.ts
Normal file
@@ -0,0 +1,43 @@
|
||||
export interface ProgressEventSummary {
|
||||
eventType: string;
|
||||
currentTool?: string;
|
||||
toolCount?: number;
|
||||
tokens?: number;
|
||||
turns?: number;
|
||||
activityState?: string;
|
||||
lastActivityAt?: string;
|
||||
}
|
||||
|
||||
export interface ProgressEventCoalesceDecision {
|
||||
shouldAppend: boolean;
|
||||
reason: string;
|
||||
}
|
||||
|
||||
export interface ProgressEventCoalesceInput {
|
||||
previous?: ProgressEventSummary;
|
||||
next: ProgressEventSummary;
|
||||
nowMs: number;
|
||||
lastAppendMs?: number;
|
||||
minIntervalMs: number;
|
||||
force?: boolean;
|
||||
tokenThreshold?: number;
|
||||
}
|
||||
|
||||
const DEFAULT_TOKEN_THRESHOLD = 256;
|
||||
|
||||
function numericIncrease(previous: number | undefined, next: number | undefined): number {
|
||||
return next !== undefined && previous !== undefined ? next - previous : next !== undefined ? next : 0;
|
||||
}
|
||||
|
||||
export function shouldAppendProgressEventUpdate(input: ProgressEventCoalesceInput): ProgressEventCoalesceDecision {
|
||||
if (input.force) return { shouldAppend: true, reason: "force" };
|
||||
if (!input.previous) return { shouldAppend: true, reason: "first" };
|
||||
if (input.previous.activityState !== input.next.activityState) return { shouldAppend: true, reason: "activity_changed" };
|
||||
if (input.previous.currentTool !== input.next.currentTool) return { shouldAppend: true, reason: "tool_changed" };
|
||||
if (numericIncrease(input.previous.toolCount, input.next.toolCount) > 0) return { shouldAppend: true, reason: "tool_count_increased" };
|
||||
if (numericIncrease(input.previous.turns, input.next.turns) > 0) return { shouldAppend: true, reason: "turns_increased" };
|
||||
const tokenIncrease = numericIncrease(input.previous.tokens, input.next.tokens);
|
||||
if (tokenIncrease >= (input.tokenThreshold ?? DEFAULT_TOKEN_THRESHOLD)) return { shouldAppend: true, reason: "tokens_increased" };
|
||||
if (input.lastAppendMs === undefined || input.nowMs - input.lastAppendMs >= input.minIntervalMs) return { shouldAppend: true, reason: "interval" };
|
||||
return { shouldAppend: false, reason: "coalesced" };
|
||||
}
|
||||
74
extensions/pi-crew/src/runtime/recovery-recipes.ts
Normal file
74
extensions/pi-crew/src/runtime/recovery-recipes.ts
Normal file
@@ -0,0 +1,74 @@
|
||||
import type { PolicyDecision, PolicyDecisionReason } from "../state/types.ts";
|
||||
|
||||
export type FailureScenario = "trust_prompt_unresolved" | "prompt_misdelivery" | "stale_branch" | "compile_red_cross_crate" | "mcp_handshake_failure" | "partial_plugin_startup" | "provider_failure" | "task_failed" | "worker_stale" | "green_unsatisfied";
|
||||
export type RecoveryStep = "accept_trust_prompt" | "redirect_prompt_to_agent" | "rebase_branch" | "clean_build" | "retry_mcp_handshake" | "restart_plugin" | "restart_worker" | "rerun_task" | "collect_verification_evidence" | "escalate_to_human";
|
||||
export type RecoveryResultState = "planned" | "skipped" | "escalation_required";
|
||||
|
||||
export interface RecoveryRecipe {
|
||||
scenario: FailureScenario;
|
||||
steps: RecoveryStep[];
|
||||
maxAttempts: number;
|
||||
escalationPolicy: "alert_human" | "log_and_continue" | "abort";
|
||||
}
|
||||
|
||||
export interface RecoveryLedgerEntry {
|
||||
scenario: FailureScenario;
|
||||
taskId?: string;
|
||||
decisionReason: PolicyDecisionReason;
|
||||
attempt: number;
|
||||
state: RecoveryResultState;
|
||||
steps: RecoveryStep[];
|
||||
message: string;
|
||||
createdAt: string;
|
||||
}
|
||||
|
||||
export interface RecoveryLedger {
|
||||
entries: RecoveryLedgerEntry[];
|
||||
}
|
||||
|
||||
export function scenarioForPolicyReason(reason: PolicyDecisionReason): FailureScenario {
|
||||
switch (reason) {
|
||||
case "branch_stale": return "stale_branch";
|
||||
case "worker_stale": return "worker_stale";
|
||||
case "green_unsatisfied": return "green_unsatisfied";
|
||||
case "task_failed": return "task_failed";
|
||||
default: return "provider_failure";
|
||||
}
|
||||
}
|
||||
|
||||
export function recipeFor(scenario: FailureScenario): RecoveryRecipe {
|
||||
switch (scenario) {
|
||||
case "trust_prompt_unresolved": return { scenario, steps: ["accept_trust_prompt"], maxAttempts: 1, escalationPolicy: "alert_human" };
|
||||
case "prompt_misdelivery": return { scenario, steps: ["redirect_prompt_to_agent"], maxAttempts: 1, escalationPolicy: "alert_human" };
|
||||
case "stale_branch": return { scenario, steps: ["rebase_branch", "clean_build"], maxAttempts: 1, escalationPolicy: "alert_human" };
|
||||
case "compile_red_cross_crate": return { scenario, steps: ["clean_build"], maxAttempts: 1, escalationPolicy: "alert_human" };
|
||||
case "mcp_handshake_failure": return { scenario, steps: ["retry_mcp_handshake"], maxAttempts: 1, escalationPolicy: "abort" };
|
||||
case "partial_plugin_startup": return { scenario, steps: ["restart_plugin", "retry_mcp_handshake"], maxAttempts: 1, escalationPolicy: "log_and_continue" };
|
||||
case "worker_stale": return { scenario, steps: ["restart_worker"], maxAttempts: 1, escalationPolicy: "alert_human" };
|
||||
case "green_unsatisfied": return { scenario, steps: ["collect_verification_evidence"], maxAttempts: 1, escalationPolicy: "alert_human" };
|
||||
case "task_failed": return { scenario, steps: ["rerun_task"], maxAttempts: 1, escalationPolicy: "alert_human" };
|
||||
case "provider_failure": return { scenario, steps: ["restart_worker"], maxAttempts: 1, escalationPolicy: "alert_human" };
|
||||
}
|
||||
}
|
||||
|
||||
export function buildRecoveryLedger(decisions: PolicyDecision[], previous: RecoveryLedger = { entries: [] }): RecoveryLedger {
|
||||
const entries = [...previous.entries];
|
||||
for (const item of decisions) {
|
||||
if (!["retry", "escalate", "block"].includes(item.action)) continue;
|
||||
const scenario = scenarioForPolicyReason(item.reason);
|
||||
const recipe = recipeFor(scenario);
|
||||
const priorAttempts = entries.filter((entry) => entry.scenario === scenario && entry.taskId === item.taskId).length;
|
||||
const attempt = priorAttempts + 1;
|
||||
entries.push({
|
||||
scenario,
|
||||
taskId: item.taskId,
|
||||
decisionReason: item.reason,
|
||||
attempt,
|
||||
state: attempt <= recipe.maxAttempts && item.action !== "block" ? "planned" : "escalation_required",
|
||||
steps: attempt <= recipe.maxAttempts ? recipe.steps : ["escalate_to_human"],
|
||||
message: item.message,
|
||||
createdAt: new Date().toISOString(),
|
||||
});
|
||||
}
|
||||
return { entries };
|
||||
}
|
||||
81
extensions/pi-crew/src/runtime/retry-executor.ts
Normal file
81
extensions/pi-crew/src/runtime/retry-executor.ts
Normal file
@@ -0,0 +1,81 @@
|
||||
import { sleep } from "../utils/sleep.ts";
|
||||
import { throwIfCancelled } from "./cancellation.ts";
|
||||
|
||||
export interface RetryPolicy {
|
||||
maxAttempts: number;
|
||||
backoffMs: number;
|
||||
jitterRatio: number;
|
||||
exponentialFactor: number;
|
||||
retryableErrors?: string[];
|
||||
}
|
||||
|
||||
export interface RetryAttemptInfo {
|
||||
attempt: number;
|
||||
attemptId: string;
|
||||
}
|
||||
|
||||
export interface RetryHooks {
|
||||
onAttemptFailed?: (attempt: number, error: Error, nextDelayMs: number, info: RetryAttemptInfo) => void;
|
||||
onRetryGivenUp?: (attempts: number, error: Error, info: RetryAttemptInfo) => void;
|
||||
attemptId?: (attempt: number) => string;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
export const DEFAULT_RETRY_POLICY: RetryPolicy = { maxAttempts: 3, backoffMs: 1000, jitterRatio: 0.3, exponentialFactor: 2 };
|
||||
|
||||
function asError(error: unknown): Error {
|
||||
return error instanceof Error ? error : new Error(String(error));
|
||||
}
|
||||
|
||||
function globToRegex(pattern: string): RegExp {
|
||||
const escaped = pattern.replace(/[.+^${}()|[\]\\]/g, "\\$&").replace(/\*/g, ".*");
|
||||
return new RegExp(`^${escaped}$`, "i");
|
||||
}
|
||||
|
||||
function isRetryable(error: Error, policy: RetryPolicy): boolean {
|
||||
const patterns = policy.retryableErrors ?? [];
|
||||
if (!patterns.length) return true;
|
||||
return patterns.some((pattern) => globToRegex(pattern).test(error.message));
|
||||
}
|
||||
|
||||
export function calculateRetryDelay(attempt: number, policy: RetryPolicy = DEFAULT_RETRY_POLICY, random = Math.random): number {
|
||||
const base = policy.backoffMs * Math.pow(policy.exponentialFactor, Math.max(0, attempt - 1));
|
||||
const jitter = (random() * 2 - 1) * policy.jitterRatio * base;
|
||||
return Math.max(0, base + jitter);
|
||||
}
|
||||
|
||||
function retryAttemptInfo(attempt: number, hooks: RetryHooks): RetryAttemptInfo {
|
||||
return { attempt, attemptId: hooks.attemptId?.(attempt) ?? `retry_attempt_${attempt}` };
|
||||
}
|
||||
|
||||
export async function executeWithRetry<T>(fn: (attempt: number, info: RetryAttemptInfo) => Promise<T>, policy: RetryPolicy = DEFAULT_RETRY_POLICY, hooks: RetryHooks = {}): Promise<T> {
|
||||
const normalized: RetryPolicy = { ...DEFAULT_RETRY_POLICY, ...policy, maxAttempts: Math.max(1, policy.maxAttempts ?? DEFAULT_RETRY_POLICY.maxAttempts) };
|
||||
let lastError: Error | undefined;
|
||||
for (let attempt = 1; attempt <= normalized.maxAttempts; attempt += 1) {
|
||||
throwIfCancelled(hooks.signal);
|
||||
const info = retryAttemptInfo(attempt, hooks);
|
||||
try {
|
||||
return await fn(attempt, info);
|
||||
} catch (error) {
|
||||
lastError = asError(error);
|
||||
// Never retry if aborted — sleep() would immediately reject on every attempt.
|
||||
if (hooks.signal?.aborted) {
|
||||
hooks.onRetryGivenUp?.(attempt, lastError, info);
|
||||
throw lastError;
|
||||
}
|
||||
if (attempt >= normalized.maxAttempts || !isRetryable(lastError, normalized)) {
|
||||
hooks.onRetryGivenUp?.(attempt, lastError, info);
|
||||
throw lastError;
|
||||
}
|
||||
const delay = calculateRetryDelay(attempt, normalized);
|
||||
hooks.onAttemptFailed?.(attempt, lastError, delay, info);
|
||||
try {
|
||||
await sleep(delay, hooks.signal);
|
||||
} catch (sleepError) {
|
||||
if (hooks.signal?.aborted) throwIfCancelled(hooks.signal);
|
||||
throw sleepError;
|
||||
}
|
||||
}
|
||||
}
|
||||
throw lastError ?? new Error("Retry failed without error.");
|
||||
}
|
||||
39
extensions/pi-crew/src/runtime/role-permission.ts
Normal file
39
extensions/pi-crew/src/runtime/role-permission.ts
Normal file
@@ -0,0 +1,39 @@
|
||||
export type RolePermissionMode = "read_only" | "workspace_write" | "danger_full_access" | "explicit_confirm";
|
||||
|
||||
const READ_ONLY_ROLES = new Set(["explorer", "reviewer", "security-reviewer", "verifier", "analyst", "critic", "planner", "writer"]);
|
||||
const WRITE_ROLES = new Set(["executor", "test-engineer"]);
|
||||
const READ_ONLY_COMMANDS = new Set(["cat", "head", "tail", "less", "more", "wc", "ls", "find", "grep", "rg", "awk", "sed", "echo", "printf", "which", "where", "whoami", "pwd", "env", "printenv", "date", "df", "du", "uname", "file", "stat", "diff", "sort", "uniq", "tr", "cut", "paste", "test", "true", "false", "type", "readlink", "realpath", "basename", "dirname", "sha256sum", "md5sum", "xxd", "hexdump", "od", "strings", "tree", "jq", "git", "gh"]);
|
||||
|
||||
export interface PermissionCheckResult {
|
||||
allowed: boolean;
|
||||
mode: RolePermissionMode;
|
||||
reason?: string;
|
||||
}
|
||||
|
||||
export function permissionForRole(role: string): RolePermissionMode {
|
||||
if (READ_ONLY_ROLES.has(role)) return "read_only";
|
||||
if (WRITE_ROLES.has(role)) return "workspace_write";
|
||||
return "workspace_write";
|
||||
}
|
||||
|
||||
export function isReadOnlyCommand(command: string): boolean {
|
||||
const first = command.trim().split(/\s+/)[0]?.split(/[\\/]/).pop() ?? "";
|
||||
return READ_ONLY_COMMANDS.has(first) && !/\s(-i|--in-place)\b|\s>{1,2}\s|\brm\b|\bmv\b|\bcp\b|\b(?:npm|pnpm|yarn|bun)\s+(install|add|ci|remove)\b|\bgit\s+(commit|push|merge|rebase|reset|checkout|clean)\b/.test(command);
|
||||
}
|
||||
|
||||
export function checkRolePermission(role: string, command: string): PermissionCheckResult {
|
||||
const mode = permissionForRole(role);
|
||||
if (mode === "read_only" && !isReadOnlyCommand(command)) return { allowed: false, mode, reason: `Role '${role}' is read-only and command may modify state.` };
|
||||
return { allowed: true, mode };
|
||||
}
|
||||
|
||||
export function currentCrewRole(env: NodeJS.ProcessEnv = process.env): string | undefined {
|
||||
return env.PI_CREW_ROLE?.trim() || env.PI_TEAMS_ROLE?.trim() || undefined;
|
||||
}
|
||||
|
||||
export function checkSubagentSpawnPermission(role: string | undefined): PermissionCheckResult {
|
||||
if (!role) return { allowed: true, mode: "workspace_write" };
|
||||
const mode = permissionForRole(role);
|
||||
if (mode === "read_only") return { allowed: false, mode, reason: `Role '${role}' is read-only and cannot spawn additional subagents.` };
|
||||
return { allowed: true, mode };
|
||||
}
|
||||
93
extensions/pi-crew/src/runtime/runtime-resolver.ts
Normal file
93
extensions/pi-crew/src/runtime/runtime-resolver.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
import type { PiTeamsConfig } from "../config/config.ts";
|
||||
import type { RuntimeResolutionState } from "../state/types.ts";
|
||||
import type { CrewRuntimeKind } from "./crew-agent-runtime.ts";
|
||||
|
||||
export type CrewRuntimeMode = "auto" | "scaffold" | "child-process" | "live-session";
|
||||
|
||||
export type CrewRuntimeSafety = "trusted" | "explicit_dry_run" | "blocked";
|
||||
|
||||
export interface CrewRuntimeCapabilities {
|
||||
kind: CrewRuntimeKind;
|
||||
requestedMode: CrewRuntimeMode;
|
||||
available: boolean;
|
||||
fallback?: CrewRuntimeKind;
|
||||
steer: boolean;
|
||||
resume: boolean;
|
||||
liveToolActivity: boolean;
|
||||
transcript: boolean;
|
||||
reason?: string;
|
||||
safety: CrewRuntimeSafety;
|
||||
}
|
||||
|
||||
export function runtimeResolutionState(runtime: CrewRuntimeCapabilities, resolvedAt = new Date().toISOString()): RuntimeResolutionState {
|
||||
return {
|
||||
kind: runtime.kind,
|
||||
requestedMode: runtime.requestedMode,
|
||||
safety: runtime.safety,
|
||||
available: runtime.available,
|
||||
...(runtime.fallback ? { fallback: runtime.fallback } : {}),
|
||||
...(runtime.reason ? { reason: runtime.reason } : {}),
|
||||
resolvedAt,
|
||||
};
|
||||
}
|
||||
|
||||
export async function isLiveSessionRuntimeAvailable(timeoutMs = 1500, env: NodeJS.ProcessEnv = process.env): Promise<{ available: boolean; reason?: string }> {
|
||||
if (env.PI_CREW_ENABLE_EXPERIMENTAL_LIVE_SESSION !== "1") {
|
||||
return { available: false, reason: "Live-session runtime adapter is experimental and disabled. Set PI_CREW_ENABLE_EXPERIMENTAL_LIVE_SESSION=1 to probe SDK support." };
|
||||
}
|
||||
if (env.PI_CREW_MOCK_LIVE_SESSION === "success") {
|
||||
return { available: true, reason: "Mock live-session runtime is enabled." };
|
||||
}
|
||||
const probe = async (): Promise<{ available: boolean; reason?: string }> => {
|
||||
try {
|
||||
const mod = await import("@mariozechner/pi-coding-agent");
|
||||
const api = mod as Record<string, unknown>;
|
||||
const required = ["createAgentSession", "DefaultResourceLoader", "SessionManager", "SettingsManager"];
|
||||
const missing = required.filter((name) => typeof api[name] === "undefined");
|
||||
if (missing.length) return { available: false, reason: `Pi SDK live-session exports missing: ${missing.join(", ")}.` };
|
||||
return { available: true };
|
||||
} catch (error) {
|
||||
const message = error instanceof Error ? error.message : String(error);
|
||||
return { available: false, reason: `Could not load optional Pi SDK live-session runtime: ${message}` };
|
||||
}
|
||||
};
|
||||
let timer: NodeJS.Timeout | undefined;
|
||||
try {
|
||||
return await Promise.race([
|
||||
probe(),
|
||||
new Promise<{ available: boolean; reason: string }>((resolve) => {
|
||||
timer = setTimeout(() => resolve({ available: false, reason: `Timed out probing optional Pi SDK live-session runtime after ${timeoutMs}ms.` }), timeoutMs);
|
||||
timer.unref();
|
||||
}),
|
||||
]);
|
||||
} finally {
|
||||
if (timer) clearTimeout(timer);
|
||||
}
|
||||
}
|
||||
|
||||
export async function resolveCrewRuntime(config: PiTeamsConfig, env: NodeJS.ProcessEnv = process.env): Promise<CrewRuntimeCapabilities> {
|
||||
const requestedMode = config.runtime?.mode ?? "auto";
|
||||
const workersDisabled = config.executeWorkers === false || env.PI_CREW_EXECUTE_WORKERS === "0" || env.PI_TEAMS_EXECUTE_WORKERS === "0";
|
||||
if (requestedMode === "scaffold") return scaffoldCaps(requestedMode, undefined, "explicit_dry_run");
|
||||
if (workersDisabled) return scaffoldCaps(requestedMode, "Child worker execution disabled by config/env. Set runtime.mode=scaffold or executeWorkers=false only for dry runs.", "blocked");
|
||||
if (requestedMode === "child-process") return childCaps(requestedMode);
|
||||
if (requestedMode === "live-session" || (requestedMode === "auto" && config.runtime?.preferLiveSession === true)) {
|
||||
const live = await isLiveSessionRuntimeAvailable(1500, env);
|
||||
if (live.available) return liveCaps(requestedMode);
|
||||
if (requestedMode === "live-session" && config.runtime?.allowChildProcessFallback === false) return { ...scaffoldCaps(requestedMode), available: false, reason: live.reason };
|
||||
return { ...childCaps(requestedMode), fallback: "child-process", reason: live.reason };
|
||||
}
|
||||
return childCaps(requestedMode);
|
||||
}
|
||||
|
||||
function scaffoldCaps(requestedMode: CrewRuntimeMode, reason?: string, safety: CrewRuntimeSafety = "explicit_dry_run"): CrewRuntimeCapabilities {
|
||||
return { kind: "scaffold", requestedMode, available: safety !== "blocked", steer: false, resume: false, liveToolActivity: false, transcript: false, safety, ...(reason ? { reason } : {}) };
|
||||
}
|
||||
|
||||
function childCaps(requestedMode: CrewRuntimeMode, reason?: string): CrewRuntimeCapabilities {
|
||||
return { kind: "child-process", requestedMode, available: true, steer: false, resume: false, liveToolActivity: false, transcript: true, safety: "trusted", ...(reason ? { reason } : {}) };
|
||||
}
|
||||
|
||||
function liveCaps(requestedMode: CrewRuntimeMode): CrewRuntimeCapabilities {
|
||||
return { kind: "live-session", requestedMode, available: true, steer: true, resume: true, liveToolActivity: true, transcript: true, safety: "trusted" };
|
||||
}
|
||||
25
extensions/pi-crew/src/runtime/session-resources.ts
Normal file
25
extensions/pi-crew/src/runtime/session-resources.ts
Normal file
@@ -0,0 +1,25 @@
|
||||
import type { ExtensionAPI } from "@mariozechner/pi-coding-agent";
|
||||
import { logInternalError } from "../utils/internal-error.ts";
|
||||
|
||||
/**
|
||||
* Try to register a cleanup function with Pi's session resource cleanup API (v0.72+).
|
||||
* Falls back to returning undefined if the API is not available.
|
||||
*
|
||||
* The returned function (if defined) can be called to unregister the cleanup.
|
||||
*/
|
||||
export function tryRegisterSessionCleanup(pi: ExtensionAPI, cleanup: () => void): (() => void) | undefined {
|
||||
const api = pi as unknown as Record<string, unknown>;
|
||||
const registerFn = api["registerSessionResourceCleanup"];
|
||||
if (typeof registerFn === "function") {
|
||||
try {
|
||||
const unregister = (registerFn as (fn: () => void) => (() => void) | void)(cleanup);
|
||||
if (typeof unregister === "function") return unregister;
|
||||
// API returned void — cleanup is registered but cannot be unregistered
|
||||
return undefined;
|
||||
} catch (error) {
|
||||
logInternalError("session-resources.register", error);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
59
extensions/pi-crew/src/runtime/session-snapshot.ts
Normal file
59
extensions/pi-crew/src/runtime/session-snapshot.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
|
||||
|
||||
/**
|
||||
* Creates a lightweight snapshot of task state for event emission.
|
||||
* Prevents mutation-during-callback issues by copying relevant fields.
|
||||
*/
|
||||
export function snapshotTaskState(task: TeamTaskState): Readonly<TeamTaskState> {
|
||||
return {
|
||||
...task,
|
||||
dependsOn: [...task.dependsOn],
|
||||
usage: task.usage ? { ...task.usage } : undefined,
|
||||
agentProgress: task.agentProgress ? { ...task.agentProgress } : undefined,
|
||||
heartbeat: task.heartbeat ? { ...task.heartbeat } : undefined,
|
||||
modelAttempts: task.modelAttempts?.map((a) => ({ ...a })),
|
||||
modelRouting: task.modelRouting ? { ...task.modelRouting } : undefined,
|
||||
claim: task.claim ? { ...task.claim } : undefined,
|
||||
checkpoint: task.checkpoint ? { ...task.checkpoint } : undefined,
|
||||
attempts: task.attempts?.map((a) => ({ ...a })),
|
||||
worktree: task.worktree ? { ...task.worktree } : undefined,
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Session state snapshot for persistence before session switches.
|
||||
* Captures the minimal set of data needed to resume operations.
|
||||
*/
|
||||
export interface SessionStateSnapshot {
|
||||
/** ISO timestamp of the snapshot */
|
||||
capturedAt: string;
|
||||
/** Active run IDs at time of snapshot */
|
||||
activeRunIds: string[];
|
||||
/** Number of pending deliveries */
|
||||
pendingDeliveryCount: number;
|
||||
/** Session generation counter */
|
||||
sessionGeneration: number;
|
||||
/** Summary of active tasks by status */
|
||||
taskSummary: Record<string, number>;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a session state snapshot for pre-switch persistence.
|
||||
*/
|
||||
export function createSessionSnapshot(
|
||||
activeRuns: TeamRunManifest[],
|
||||
pendingDeliveryCount: number,
|
||||
sessionGeneration: number,
|
||||
): SessionStateSnapshot {
|
||||
const taskSummary: Record<string, number> = {};
|
||||
for (const run of activeRuns) {
|
||||
taskSummary[run.status] = (taskSummary[run.status] ?? 0) + 1;
|
||||
}
|
||||
return {
|
||||
capturedAt: new Date().toISOString(),
|
||||
activeRunIds: activeRuns.map((r) => r.runId),
|
||||
pendingDeliveryCount,
|
||||
sessionGeneration,
|
||||
taskSummary,
|
||||
};
|
||||
}
|
||||
79
extensions/pi-crew/src/runtime/session-usage.ts
Normal file
79
extensions/pi-crew/src/runtime/session-usage.ts
Normal file
@@ -0,0 +1,79 @@
|
||||
import * as fs from "node:fs";
|
||||
import type { UsageState } from "../state/types.ts";
|
||||
|
||||
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
||||
return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
|
||||
}
|
||||
|
||||
function numberField(obj: Record<string, unknown>, keys: string[]): number | undefined {
|
||||
for (const key of keys) {
|
||||
const value = obj[key];
|
||||
if (typeof value === "number" && Number.isFinite(value)) return value;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function usageFromValue(value: unknown): UsageState | undefined {
|
||||
const obj = asRecord(value);
|
||||
if (!obj) return undefined;
|
||||
const direct: UsageState = {
|
||||
input: numberField(obj, ["input", "inputTokens", "input_tokens"]),
|
||||
output: numberField(obj, ["output", "outputTokens", "output_tokens"]),
|
||||
cacheRead: numberField(obj, ["cacheRead", "cache_read", "cacheReadTokens", "cache_read_tokens"]),
|
||||
cacheWrite: numberField(obj, ["cacheWrite", "cache_write", "cacheWriteTokens", "cache_write_tokens"]),
|
||||
cost: numberField(obj, ["cost", "costUsd", "cost_usd"]),
|
||||
turns: numberField(obj, ["turns", "turnCount", "turn_count"]),
|
||||
};
|
||||
if (Object.values(direct).some((entry) => entry !== undefined)) return direct;
|
||||
for (const key of ["usage", "tokenUsage", "tokens", "stats"]) {
|
||||
const nested = usageFromValue(obj[key]);
|
||||
if (nested) return nested;
|
||||
}
|
||||
const message = asRecord(obj.message);
|
||||
return message ? usageFromValue(message.usage) : undefined;
|
||||
}
|
||||
|
||||
function addUsage(total: UsageState, usage: UsageState): UsageState {
|
||||
return {
|
||||
input: (total.input ?? 0) + (usage.input ?? 0),
|
||||
output: (total.output ?? 0) + (usage.output ?? 0),
|
||||
cacheRead: (total.cacheRead ?? 0) + (usage.cacheRead ?? 0),
|
||||
cacheWrite: (total.cacheWrite ?? 0) + (usage.cacheWrite ?? 0),
|
||||
cost: (total.cost ?? 0) + (usage.cost ?? 0),
|
||||
turns: (total.turns ?? 0) + (usage.turns ?? 0),
|
||||
};
|
||||
}
|
||||
|
||||
function compactUsage(total: UsageState, foundKeys: Set<keyof UsageState>): UsageState | undefined {
|
||||
if (foundKeys.size === 0) return undefined;
|
||||
const compact: UsageState = {};
|
||||
for (const key of foundKeys) compact[key] = total[key];
|
||||
return compact;
|
||||
}
|
||||
|
||||
export function parseSessionUsageFromJsonlText(text: string): UsageState | undefined {
|
||||
let total: UsageState = {};
|
||||
const foundKeys = new Set<keyof UsageState>();
|
||||
for (const line of text.split(/\r?\n/)) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
try {
|
||||
const usage = usageFromValue(JSON.parse(trimmed) as unknown);
|
||||
if (!usage) continue;
|
||||
for (const key of Object.keys(usage) as Array<keyof UsageState>) foundKeys.add(key);
|
||||
total = addUsage(total, usage);
|
||||
} catch {
|
||||
// Session JSONL can contain partial/corrupt lines after interrupted workers.
|
||||
}
|
||||
}
|
||||
return compactUsage(total, foundKeys);
|
||||
}
|
||||
|
||||
export function parseSessionUsage(filePath: string): UsageState | undefined {
|
||||
try {
|
||||
if (!fs.existsSync(filePath)) return undefined;
|
||||
return parseSessionUsageFromJsonlText(fs.readFileSync(filePath, "utf-8"));
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
29
extensions/pi-crew/src/runtime/sidechain-output.ts
Normal file
29
extensions/pi-crew/src/runtime/sidechain-output.ts
Normal file
@@ -0,0 +1,29 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import { redactSecrets } from "../utils/redaction.ts";
|
||||
|
||||
export interface SidechainEntry {
|
||||
isSidechain: true;
|
||||
agentId: string;
|
||||
type: string;
|
||||
message: unknown;
|
||||
timestamp: string;
|
||||
cwd: string;
|
||||
}
|
||||
|
||||
export function writeSidechainEntry(filePath: string, entry: Omit<SidechainEntry, "isSidechain" | "timestamp">): void {
|
||||
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
||||
fs.appendFileSync(filePath, `${JSON.stringify(redactSecrets({ isSidechain: true, timestamp: new Date().toISOString(), ...entry }))}\n`, "utf-8");
|
||||
}
|
||||
|
||||
export function sidechainOutputPath(stateRoot: string, taskId: string): string {
|
||||
return path.join(stateRoot, "agents", taskId, "sidechain.output.jsonl");
|
||||
}
|
||||
|
||||
export function eventToSidechainType(event: unknown): string | undefined {
|
||||
if (!event || typeof event !== "object" || Array.isArray(event)) return undefined;
|
||||
const type = (event as { type?: unknown }).type;
|
||||
if (type === "message_start" || type === "message_update" || type === "message_end") return "message";
|
||||
if (type === "tool_execution_start" || type === "tool_execution_update" || type === "tool_execution_end") return "tool";
|
||||
return typeof type === "string" ? type : undefined;
|
||||
}
|
||||
222
extensions/pi-crew/src/runtime/skill-instructions.ts
Normal file
222
extensions/pi-crew/src/runtime/skill-instructions.ts
Normal file
@@ -0,0 +1,222 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import { fileURLToPath } from "node:url";
|
||||
import type { AgentConfig } from "../agents/agent-config.ts";
|
||||
import type { TeamRole } from "../teams/team-config.ts";
|
||||
import type { WorkflowStep } from "../workflows/workflow-config.ts";
|
||||
import { isSafePathId, resolveContainedPath, resolveRealContainedPath } from "../utils/safe-paths.ts";
|
||||
|
||||
const PACKAGE_SKILLS_DIR = path.resolve(path.dirname(fileURLToPath(import.meta.url)), "..", "..", "skills");
|
||||
const MAX_SKILL_CHARS = 1500;
|
||||
const MAX_TOTAL_CHARS = 6000;
|
||||
const MAX_SKILL_NAME_CHARS = 80;
|
||||
const MAX_SELECTED_SKILLS = 32;
|
||||
const SKILL_CACHE_MAX_ENTRIES = 128;
|
||||
|
||||
const DEFAULT_ROLE_SKILLS: Record<string, string[]> = {
|
||||
explorer: ["read-only-explorer", "context-artifact-hygiene"],
|
||||
analyst: ["read-only-explorer", "requirements-to-task-packet"],
|
||||
planner: ["delegation-patterns", "requirements-to-task-packet"],
|
||||
critic: ["read-only-explorer", "multi-perspective-review"],
|
||||
executor: ["state-mutation-locking", "safe-bash", "verification-before-done"],
|
||||
reviewer: ["read-only-explorer", "multi-perspective-review"],
|
||||
"security-reviewer": ["secure-agent-orchestration-review", "ownership-session-security"],
|
||||
"test-engineer": ["verification-before-done", "safe-bash"],
|
||||
verifier: ["verification-before-done", "runtime-state-reader"],
|
||||
writer: ["context-artifact-hygiene", "verify-evidence"],
|
||||
};
|
||||
|
||||
export interface ResolveTaskSkillsInput {
|
||||
role: string;
|
||||
agent?: Pick<AgentConfig, "skills">;
|
||||
teamRole?: Pick<TeamRole, "skills">;
|
||||
step?: Pick<WorkflowStep, "skills">;
|
||||
override?: string[] | false;
|
||||
}
|
||||
|
||||
export interface RenderSkillInstructionsInput extends ResolveTaskSkillsInput {
|
||||
cwd: string;
|
||||
}
|
||||
|
||||
function isValidSkillName(name: string): boolean {
|
||||
return name.length > 0 && name.length <= MAX_SKILL_NAME_CHARS && isSafePathId(name);
|
||||
}
|
||||
|
||||
function sanitizeSkillName(name: string): string {
|
||||
return name.replace(/[^A-Za-z0-9_-]/g, "_").slice(0, MAX_SKILL_NAME_CHARS) || "invalid";
|
||||
}
|
||||
|
||||
function unique(items: string[]): string[] {
|
||||
const seen = new Set<string>();
|
||||
const result: string[] = [];
|
||||
for (const item of items.map((entry) => entry.trim()).filter(Boolean)) {
|
||||
if (!isValidSkillName(item)) continue;
|
||||
if (seen.has(item)) continue;
|
||||
seen.add(item);
|
||||
result.push(item);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
export function normalizeSkillOverride(value: string | string[] | boolean | undefined): string[] | false | undefined {
|
||||
if (value === false) return false;
|
||||
if (typeof value === "string") return value.split(",").map((entry) => entry.trim()).filter(Boolean);
|
||||
if (value === true) return undefined;
|
||||
if (Array.isArray(value)) return value.map((entry) => entry.trim()).filter(Boolean);
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function defaultSkillsForRole(role: string): string[] {
|
||||
return DEFAULT_ROLE_SKILLS[role] ?? [];
|
||||
}
|
||||
|
||||
function collectTaskSkillNames(input: ResolveTaskSkillsInput): string[] {
|
||||
if (input.override === false) return [];
|
||||
const roleDefaultsDisabled = input.teamRole?.skills === false || input.step?.skills === false;
|
||||
const names = roleDefaultsDisabled ? [] : defaultSkillsForRole(input.role);
|
||||
if (input.agent?.skills?.length) names.push(...input.agent.skills);
|
||||
if (Array.isArray(input.teamRole?.skills)) names.push(...input.teamRole.skills);
|
||||
if (Array.isArray(input.step?.skills)) names.push(...input.step.skills);
|
||||
if (Array.isArray(input.override)) names.push(...input.override);
|
||||
return unique(names);
|
||||
}
|
||||
|
||||
export function resolveTaskSkillNames(input: ResolveTaskSkillsInput): string[] {
|
||||
return collectTaskSkillNames(input).slice(0, MAX_SELECTED_SKILLS);
|
||||
}
|
||||
|
||||
function candidateSkillDirs(cwd: string): Array<{ root: string; source: "project" | "package" }> {
|
||||
return [
|
||||
{ root: path.resolve(cwd, "skills"), source: "project" },
|
||||
{ root: PACKAGE_SKILLS_DIR, source: "package" },
|
||||
];
|
||||
}
|
||||
|
||||
interface CachedSkillMarkdown {
|
||||
path: string;
|
||||
source: "project" | "package";
|
||||
content: string;
|
||||
mtimeMs: number;
|
||||
size: number;
|
||||
}
|
||||
|
||||
const skillReadCache = new Map<string, CachedSkillMarkdown>();
|
||||
|
||||
function rememberSkill(key: string, value: CachedSkillMarkdown): CachedSkillMarkdown {
|
||||
if (skillReadCache.has(key)) skillReadCache.delete(key);
|
||||
skillReadCache.set(key, value);
|
||||
while (skillReadCache.size > SKILL_CACHE_MAX_ENTRIES) {
|
||||
const oldest = skillReadCache.keys().next().value;
|
||||
if (!oldest) break;
|
||||
skillReadCache.delete(oldest);
|
||||
}
|
||||
return value;
|
||||
}
|
||||
|
||||
export function clearSkillInstructionCache(): void {
|
||||
skillReadCache.clear();
|
||||
}
|
||||
|
||||
function cachedSkillFresh(value: CachedSkillMarkdown): boolean {
|
||||
try {
|
||||
const stat = fs.statSync(value.path);
|
||||
return stat.mtimeMs === value.mtimeMs && stat.size === value.size;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function readSkillMarkdown(cwd: string, name: string): { path: string; source: "project" | "package"; content: string } | undefined {
|
||||
if (!isValidSkillName(name)) return undefined;
|
||||
const cacheKey = `${path.resolve(cwd)}:${name}`;
|
||||
const cached = skillReadCache.get(cacheKey);
|
||||
if (cached && cachedSkillFresh(cached)) return cached;
|
||||
if (cached) skillReadCache.delete(cacheKey);
|
||||
for (const entry of candidateSkillDirs(cwd)) {
|
||||
try {
|
||||
const relative = path.join(name, "SKILL.md");
|
||||
const contained = resolveContainedPath(entry.root, relative);
|
||||
if (!fs.existsSync(contained)) continue;
|
||||
if (fs.lstatSync(contained).isSymbolicLink()) continue;
|
||||
const filePath = resolveRealContainedPath(entry.root, relative);
|
||||
const stat = fs.statSync(filePath);
|
||||
return rememberSkill(cacheKey, { path: filePath, source: entry.source, content: fs.readFileSync(filePath, "utf-8"), mtimeMs: stat.mtimeMs, size: stat.size });
|
||||
} catch {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function frontmatterDescription(content: string): string | undefined {
|
||||
const match = /^---\r?\n([\s\S]*?)\r?\n---/.exec(content);
|
||||
if (!match) return undefined;
|
||||
const line = match[1].split(/\r?\n/).find((entry) => entry.startsWith("description:"));
|
||||
return line?.slice("description:".length).trim();
|
||||
}
|
||||
|
||||
function stripFrontmatter(content: string): string {
|
||||
return content.replace(/^---\r?\n[\s\S]*?\r?\n---\r?\n/, "").trim();
|
||||
}
|
||||
|
||||
function compactSkillContent(content: string): string {
|
||||
const body = stripFrontmatter(content);
|
||||
if (body.length <= MAX_SKILL_CHARS) return body;
|
||||
const preferred = body.split(/\r?\n## Verification\r?\n/)[0]?.trim() ?? body;
|
||||
const truncated = preferred.length > MAX_SKILL_CHARS ? preferred.slice(0, MAX_SKILL_CHARS - 40).trimEnd() : preferred;
|
||||
return `${truncated}\n\n[skill instructions truncated]`;
|
||||
}
|
||||
|
||||
export interface RenderedSkillInstructions {
|
||||
names: string[];
|
||||
paths: string[];
|
||||
block: string;
|
||||
}
|
||||
|
||||
export function renderSkillInstructions(input: RenderSkillInstructionsInput): RenderedSkillInstructions {
|
||||
const allNames = collectTaskSkillNames(input);
|
||||
const names = allNames.slice(0, MAX_SELECTED_SKILLS);
|
||||
const overflowCount = Math.max(0, allNames.length - names.length);
|
||||
if (names.length === 0) return { names, paths: [], block: "" };
|
||||
const sections: string[] = [];
|
||||
const skillPaths: string[] = [];
|
||||
let total = 0;
|
||||
let omittedCount = overflowCount;
|
||||
const pushSection = (section: string): boolean => {
|
||||
if (total + section.length > MAX_TOTAL_CHARS) return false;
|
||||
sections.push(section);
|
||||
total += section.length;
|
||||
return true;
|
||||
};
|
||||
for (const name of names) {
|
||||
const safeName = sanitizeSkillName(name);
|
||||
const loaded = readSkillMarkdown(input.cwd, name);
|
||||
if (!loaded) {
|
||||
const missing = `## ${safeName}\n\nSkill '${safeName}' was selected but no SKILL.md file was found. Continue with the task packet and report this missing skill.`;
|
||||
if (!pushSection(missing)) omittedCount += 1;
|
||||
continue;
|
||||
}
|
||||
skillPaths.push(path.dirname(loaded.path));
|
||||
const description = frontmatterDescription(loaded.content);
|
||||
const source = loaded.source === "project" ? `project:skills/${safeName}` : `package:skills/${safeName}`;
|
||||
const header = [`## ${safeName}`, description ? `Description: ${description}` : undefined, `Source: ${source}`].filter(Boolean).join("\n");
|
||||
const section = `${header}\n\n${compactSkillContent(loaded.content)}`;
|
||||
if (!pushSection(section)) omittedCount += 1;
|
||||
}
|
||||
if (omittedCount > 0) {
|
||||
const summary = `## Omitted skills\n\n[omitted ${omittedCount} selected skill(s): skill instruction budget exceeded]`;
|
||||
if (!pushSection(summary) && sections.length > 0) {
|
||||
sections[sections.length - 1] = summary;
|
||||
}
|
||||
}
|
||||
return {
|
||||
names,
|
||||
paths: [...new Set(skillPaths)],
|
||||
block: [
|
||||
"# Applicable Skills",
|
||||
"The following skills were selected for this worker. Follow them when they match the current task. If a selected skill conflicts with the explicit task packet, project AGENTS.md, or user request, follow the stricter/higher-priority instruction and report the conflict.",
|
||||
"",
|
||||
sections.join("\n\n---\n\n"),
|
||||
].join("\n"),
|
||||
};
|
||||
}
|
||||
189
extensions/pi-crew/src/runtime/stale-reconciler.ts
Normal file
189
extensions/pi-crew/src/runtime/stale-reconciler.ts
Normal file
@@ -0,0 +1,189 @@
|
||||
import type { TeamRunManifest, TeamTaskState } from "../state/types.ts";
|
||||
import { checkProcessLiveness } from "./process-status.ts";
|
||||
|
||||
/**
|
||||
* Result of reconciling a single stale run.
|
||||
*/
|
||||
export interface ReconcileResult {
|
||||
runId: string;
|
||||
/** What was found and what action was taken */
|
||||
verdict: "healthy" | "result_exists" | "pid_dead" | "pid_alive_stale" | "no_status";
|
||||
/** Whether repair was applied */
|
||||
repaired: boolean;
|
||||
/** Human-readable detail */
|
||||
detail: string;
|
||||
/** Repaired task state, returned to a locked caller for persistence. */
|
||||
repairedTasks?: TeamTaskState[];
|
||||
}
|
||||
|
||||
const STALE_ALIVE_PID_MS = 24 * 60 * 60 * 1000; // 24 hours
|
||||
const ACTIVE_EVIDENCE_TTL_MS = 5 * 60 * 1000;
|
||||
|
||||
/**
|
||||
* Phase 1: Check if a result file already exists for the run.
|
||||
* If so, the run completed but status wasn't updated — repair it.
|
||||
*/
|
||||
function checkResultFile(
|
||||
manifest: TeamRunManifest,
|
||||
tasks: TeamTaskState[],
|
||||
): { found: boolean; repaired: boolean } {
|
||||
// Check if all tasks already have terminal status (result was written but manifest wasn't updated)
|
||||
const allTerminal = tasks.length > 0 && tasks.every(
|
||||
(t) => t.status === "completed" || t.status === "failed" || t.status === "cancelled" || t.status === "skipped",
|
||||
);
|
||||
if (allTerminal) {
|
||||
return { found: true, repaired: false };
|
||||
}
|
||||
return { found: false, repaired: false };
|
||||
}
|
||||
|
||||
/**
|
||||
* Phase 2: Check PID liveness.
|
||||
*/
|
||||
function checkPidLiveness(pid: number | undefined): {
|
||||
alive: boolean;
|
||||
detail: string;
|
||||
} {
|
||||
if (pid === undefined || !Number.isInteger(pid) || pid <= 0) {
|
||||
return { alive: false, detail: "no pid recorded" };
|
||||
}
|
||||
const liveness = checkProcessLiveness(pid);
|
||||
return { alive: liveness.alive, detail: liveness.detail };
|
||||
}
|
||||
|
||||
/**
|
||||
* Phase 3: For dead PIDs, repair immediately.
|
||||
* For alive PIDs, only mark stale if status hasn't updated in STALE_ALIVE_PID_MS.
|
||||
*/
|
||||
function evaluateStaleness(
|
||||
manifest: TeamRunManifest,
|
||||
pidAlive: boolean,
|
||||
now: number,
|
||||
): { stale: boolean; reason: string } {
|
||||
if (!pidAlive) {
|
||||
return { stale: true, reason: "pid_dead" };
|
||||
}
|
||||
const updatedAt = new Date(manifest.updatedAt).getTime();
|
||||
if (!Number.isFinite(updatedAt)) {
|
||||
return { stale: false, reason: "updated_at_invalid" };
|
||||
}
|
||||
if (now - updatedAt > STALE_ALIVE_PID_MS) {
|
||||
return { stale: true, reason: `alive_but_stale_${Math.round((now - updatedAt) / 3600_000)}h` };
|
||||
}
|
||||
return { stale: false, reason: "alive_and_recent" };
|
||||
}
|
||||
|
||||
function hasRecentActiveEvidence(tasks: TeamTaskState[], now: number): boolean {
|
||||
return tasks.some((task) => {
|
||||
if (task.status !== "running" && task.status !== "waiting") return false;
|
||||
const heartbeatAt = task.heartbeat?.lastSeenAt ? new Date(task.heartbeat.lastSeenAt).getTime() : Number.NaN;
|
||||
if (task.heartbeat?.alive !== false && Number.isFinite(heartbeatAt) && now - heartbeatAt <= ACTIVE_EVIDENCE_TTL_MS) return true;
|
||||
const activityAt = task.agentProgress?.lastActivityAt ? new Date(task.agentProgress.lastActivityAt).getTime() : Number.NaN;
|
||||
return Number.isFinite(activityAt) && now - activityAt <= ACTIVE_EVIDENCE_TTL_MS;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Repair a stale run by marking it as failed and cancelling running tasks.
|
||||
*/
|
||||
function repairStaleRun(
|
||||
manifest: TeamRunManifest,
|
||||
tasks: TeamTaskState[],
|
||||
reason: string,
|
||||
): TeamTaskState[] {
|
||||
const now = new Date().toISOString();
|
||||
const repairedTasks = tasks.map((task) => {
|
||||
if (task.status === "running" || task.status === "queued" || task.status === "waiting") {
|
||||
return {
|
||||
...task,
|
||||
status: "cancelled" as const,
|
||||
finishedAt: now,
|
||||
error: `Stale run reconciled: ${reason}`,
|
||||
};
|
||||
}
|
||||
return task;
|
||||
});
|
||||
|
||||
return repairedTasks;
|
||||
}
|
||||
|
||||
/**
|
||||
* Three-phase stale run reconciliation.
|
||||
*
|
||||
* 1. Check if result already exists → use it
|
||||
* 2. Check PID liveness
|
||||
* 3. Dead PID → repair immediately; alive PID → only fail if stale > 24h
|
||||
*/
|
||||
export function reconcileStaleRun(
|
||||
manifest: TeamRunManifest,
|
||||
tasks: TeamTaskState[],
|
||||
now = Date.now(),
|
||||
): ReconcileResult {
|
||||
const runId = manifest.runId;
|
||||
|
||||
// Phase 1: Check if results already exist
|
||||
const phase1 = checkResultFile(manifest, tasks);
|
||||
if (phase1.found) {
|
||||
return {
|
||||
runId,
|
||||
verdict: "result_exists",
|
||||
repaired: false,
|
||||
detail: "All tasks already terminal — no repair needed",
|
||||
};
|
||||
}
|
||||
|
||||
// Phase 2: Check PID liveness
|
||||
const pid = manifest.async?.pid;
|
||||
const pidStatus = checkPidLiveness(pid);
|
||||
|
||||
if (pidStatus.detail === "no pid recorded") {
|
||||
// No async PID may be a foreground/live run. Preserve it if task heartbeat
|
||||
// or agent progress proves active work even when manifest.updatedAt is old.
|
||||
if (hasRecentActiveEvidence(tasks, now)) {
|
||||
return {
|
||||
runId,
|
||||
verdict: "no_status",
|
||||
repaired: false,
|
||||
detail: "No PID recorded, but recent task heartbeat/progress exists; not repairing",
|
||||
};
|
||||
}
|
||||
const updatedAt = new Date(manifest.updatedAt).getTime();
|
||||
if (Number.isFinite(updatedAt) && now - updatedAt > STALE_ALIVE_PID_MS) {
|
||||
const repaired = repairStaleRun(manifest, tasks, "no_pid_stale");
|
||||
return {
|
||||
runId,
|
||||
verdict: "no_status",
|
||||
repaired: true,
|
||||
detail: `No PID; stale ${Math.round((now - updatedAt) / 3600_000)}h; repaired ${repaired.filter((t) => t.status === "cancelled").length} tasks`,
|
||||
repairedTasks: repaired,
|
||||
};
|
||||
}
|
||||
return {
|
||||
runId,
|
||||
verdict: "no_status",
|
||||
repaired: false,
|
||||
detail: "No PID recorded; not stale enough to repair",
|
||||
};
|
||||
}
|
||||
|
||||
// Phase 3: Evaluate staleness
|
||||
const staleness = evaluateStaleness(manifest, pidStatus.alive, now);
|
||||
if (!staleness.stale) {
|
||||
return {
|
||||
runId,
|
||||
verdict: "healthy",
|
||||
repaired: false,
|
||||
detail: `PID ${pid}: ${pidStatus.detail}, ${staleness.reason}`,
|
||||
};
|
||||
}
|
||||
|
||||
// Repair
|
||||
const repaired = repairStaleRun(manifest, tasks, staleness.reason);
|
||||
return {
|
||||
runId,
|
||||
verdict: pidStatus.alive ? "pid_alive_stale" : "pid_dead",
|
||||
repaired: true,
|
||||
detail: `PID ${pid}: ${pidStatus.detail}; ${staleness.reason}; repaired ${repaired.filter((t) => t.status === "cancelled").length} tasks`,
|
||||
repairedTasks: repaired,
|
||||
};
|
||||
}
|
||||
394
extensions/pi-crew/src/runtime/subagent-manager.ts
Normal file
394
extensions/pi-crew/src/runtime/subagent-manager.ts
Normal file
@@ -0,0 +1,394 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import { loadRunManifestById } from "../state/state-store.ts";
|
||||
import type { PiTeamsToolResult } from "../extension/tool-result.ts";
|
||||
import { DEFAULT_SUBAGENT } from "../config/defaults.ts";
|
||||
import { projectCrewRoot } from "../utils/paths.ts";
|
||||
import { DEFAULT_PATHS } from "../config/defaults.ts";
|
||||
import { logInternalError } from "../utils/internal-error.ts";
|
||||
import { redactSecrets } from "../utils/redaction.ts";
|
||||
|
||||
export type SubagentStatus = "queued" | "running" | "completed" | "failed" | "cancelled" | "error" | "blocked" | "stopped";
|
||||
|
||||
export interface SubagentSpawnOptions {
|
||||
cwd: string;
|
||||
type: string;
|
||||
description: string;
|
||||
prompt: string;
|
||||
background: boolean;
|
||||
model?: string;
|
||||
skill?: string | string[] | false;
|
||||
maxTurns?: number;
|
||||
ownerSessionGeneration?: number;
|
||||
}
|
||||
|
||||
export interface SubagentRecord {
|
||||
id: string;
|
||||
runId?: string;
|
||||
type: string;
|
||||
description: string;
|
||||
prompt: string;
|
||||
status: SubagentStatus;
|
||||
startedAt: number;
|
||||
completedAt?: number;
|
||||
result?: string;
|
||||
error?: string;
|
||||
resultConsumed?: boolean;
|
||||
model?: string;
|
||||
skill?: string | string[] | false;
|
||||
background: boolean;
|
||||
ownerSessionGeneration?: number;
|
||||
stuckNotified?: boolean;
|
||||
blockedAt?: number;
|
||||
promise?: Promise<void>;
|
||||
// Phase 1.6: Telemetry baseline fields
|
||||
turnCount?: number;
|
||||
terminated?: boolean;
|
||||
durationMs?: number;
|
||||
}
|
||||
|
||||
type SpawnRunner = (options: SubagentSpawnOptions, signal?: AbortSignal) => Promise<PiTeamsToolResult>;
|
||||
type Notify = (record: SubagentRecord) => void;
|
||||
type NotifyEvent = (type: string, data: Record<string, unknown>) => void;
|
||||
|
||||
interface QueuedSpawn {
|
||||
record: SubagentRecord;
|
||||
options: SubagentSpawnOptions;
|
||||
runner: SpawnRunner;
|
||||
signal?: AbortSignal;
|
||||
}
|
||||
|
||||
function persistedSubagentPath(cwd: string, id: string): string {
|
||||
return path.join(projectCrewRoot(cwd), DEFAULT_PATHS.state.subagentsSubdir, `${id}.json`);
|
||||
}
|
||||
|
||||
function serializableRecord(record: SubagentRecord): SubagentRecord {
|
||||
const { promise: _promise, ...rest } = record;
|
||||
return rest;
|
||||
}
|
||||
|
||||
export function savePersistedSubagentRecord(cwd: string, record: SubagentRecord): void {
|
||||
try {
|
||||
const filePath = persistedSubagentPath(cwd, record.id);
|
||||
fs.mkdirSync(path.dirname(filePath), { recursive: true });
|
||||
fs.writeFileSync(filePath, `${JSON.stringify(redactSecrets(serializableRecord(record)), null, 2)}\n`, "utf-8");
|
||||
} catch (error) {
|
||||
logInternalError("subagent-manager.save", error, `id=${record.id}`);
|
||||
}
|
||||
}
|
||||
|
||||
export function readPersistedSubagentRecord(cwd: string, id: string): SubagentRecord | undefined {
|
||||
try {
|
||||
const parsed = JSON.parse(fs.readFileSync(persistedSubagentPath(cwd, id), "utf-8"));
|
||||
return parsed && typeof parsed === "object" && !Array.isArray(parsed) ? parsed as SubagentRecord : undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function resultText(result: PiTeamsToolResult): string {
|
||||
return result.content?.map((item) => item.type === "text" ? item.text : "").filter(Boolean).join("\n") ?? "";
|
||||
}
|
||||
|
||||
function detailsRunId(result: PiTeamsToolResult): string | undefined {
|
||||
const details = result.details as { runId?: unknown } | undefined;
|
||||
return typeof details?.runId === "string" ? details.runId : undefined;
|
||||
}
|
||||
|
||||
function totalRunTurns(cwd: string, runId: string | undefined): number | undefined {
|
||||
if (!runId) return undefined;
|
||||
const loaded = loadRunManifestById(cwd, runId);
|
||||
if (!loaded) return undefined;
|
||||
let total = 0;
|
||||
let hasTurns = false;
|
||||
for (const task of loaded.tasks) {
|
||||
const turns = task.usage?.turns ?? task.agentProgress?.turns;
|
||||
if (typeof turns === "number" && Number.isFinite(turns)) {
|
||||
total += turns;
|
||||
hasTurns = true;
|
||||
}
|
||||
}
|
||||
return hasTurns ? total : undefined;
|
||||
}
|
||||
|
||||
export class SubagentManager {
|
||||
private readonly records = new Map<string, SubagentRecord>();
|
||||
private readonly cwdByRecord = new Map<string, string>();
|
||||
private readonly controllers = new Map<string, AbortController>();
|
||||
private readonly controllerCleanup = new Map<string, () => void>();
|
||||
private queue: QueuedSpawn[] = [];
|
||||
private runningBackground = 0;
|
||||
private counter = 0;
|
||||
private maxConcurrent: number;
|
||||
private readonly onComplete?: Notify;
|
||||
private readonly onEvent?: NotifyEvent;
|
||||
private readonly pollIntervalMs: number;
|
||||
|
||||
constructor(maxConcurrent = 4, onComplete?: Notify, pollIntervalMs = 1000, onEvent?: NotifyEvent) {
|
||||
this.maxConcurrent = maxConcurrent;
|
||||
this.onComplete = onComplete;
|
||||
this.onEvent = onEvent;
|
||||
this.pollIntervalMs = pollIntervalMs;
|
||||
}
|
||||
|
||||
spawn(options: SubagentSpawnOptions, runner: SpawnRunner, signal?: AbortSignal): SubagentRecord {
|
||||
const record: SubagentRecord = {
|
||||
id: `agent_${Date.now().toString(36)}_${(++this.counter).toString(36)}`,
|
||||
type: options.type,
|
||||
description: options.description,
|
||||
prompt: options.prompt,
|
||||
status: options.background && this.runningBackground >= this.maxConcurrent ? "queued" : "running",
|
||||
startedAt: Date.now(),
|
||||
model: options.model,
|
||||
skill: options.skill,
|
||||
background: options.background,
|
||||
ownerSessionGeneration: options.ownerSessionGeneration,
|
||||
};
|
||||
this.records.set(record.id, record);
|
||||
this.cwdByRecord.set(record.id, options.cwd);
|
||||
savePersistedSubagentRecord(options.cwd, record);
|
||||
if (record.status === "queued") {
|
||||
this.queue.push({ record, options, runner, signal });
|
||||
return record;
|
||||
}
|
||||
this.start(record, options, runner, signal);
|
||||
return record;
|
||||
}
|
||||
|
||||
getRecord(id: string): SubagentRecord | undefined {
|
||||
return this.records.get(id);
|
||||
}
|
||||
|
||||
listAgents(): SubagentRecord[] {
|
||||
return [...this.records.values()].sort((a, b) => b.startedAt - a.startedAt);
|
||||
}
|
||||
|
||||
abort(id: string): boolean {
|
||||
const record = this.records.get(id);
|
||||
if (!record) return false;
|
||||
if (record.status === "queued") {
|
||||
this.queue = this.queue.filter((entry) => entry.record.id !== id);
|
||||
this.markStopped(record);
|
||||
return true;
|
||||
}
|
||||
if (record.status !== "running" && record.status !== "blocked") return false;
|
||||
this.controllers.get(id)?.abort();
|
||||
this.markStopped(record);
|
||||
return true;
|
||||
}
|
||||
|
||||
abortAll(): number {
|
||||
let count = 0;
|
||||
for (const entry of this.queue) {
|
||||
this.markStopped(entry.record);
|
||||
count++;
|
||||
}
|
||||
this.queue = [];
|
||||
for (const record of this.records.values()) {
|
||||
if (record.status === "running" || record.status === "blocked") {
|
||||
this.controllers.get(record.id)?.abort();
|
||||
this.markStopped(record);
|
||||
count++;
|
||||
}
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
async waitForAll(): Promise<void> {
|
||||
while (true) {
|
||||
this.drainQueue();
|
||||
const pending = this.listAgents().filter((record) => record.status === "running" || record.status === "queued").map((record) => record.promise).filter((promise): promise is Promise<void> => Boolean(promise));
|
||||
if (!pending.length) break;
|
||||
await Promise.allSettled(pending);
|
||||
}
|
||||
}
|
||||
|
||||
async waitForRecord(id: string): Promise<SubagentRecord | undefined> {
|
||||
while (true) {
|
||||
const record = this.records.get(id);
|
||||
if (!record) return undefined;
|
||||
if (record.status !== "running" && record.status !== "queued") return record;
|
||||
if (record.promise) await record.promise;
|
||||
else await new Promise((resolve) => setTimeout(resolve, 100));
|
||||
}
|
||||
}
|
||||
|
||||
setMaxConcurrent(value: number): void {
|
||||
this.maxConcurrent = Math.max(1, Math.floor(value));
|
||||
this.drainQueue();
|
||||
}
|
||||
|
||||
private start(record: SubagentRecord, options: SubagentSpawnOptions, runner: SpawnRunner, signal?: AbortSignal): void {
|
||||
if (options.background) this.runningBackground++;
|
||||
record.status = "running";
|
||||
record.startedAt = Date.now();
|
||||
record.completedAt = undefined;
|
||||
const runSignal = this.createRunSignal(record.id, signal);
|
||||
savePersistedSubagentRecord(options.cwd, record);
|
||||
record.promise = (async () => {
|
||||
try {
|
||||
const result = await runner(options, runSignal);
|
||||
if (record.status === "stopped") return;
|
||||
record.runId = detailsRunId(result);
|
||||
record.result = resultText(result);
|
||||
savePersistedSubagentRecord(options.cwd, record);
|
||||
if (result.isError) {
|
||||
record.status = "error";
|
||||
record.error = record.result;
|
||||
return;
|
||||
}
|
||||
if (record.runId) await this.pollRunToTerminal(options.cwd, record);
|
||||
else record.status = "completed";
|
||||
} catch (error) {
|
||||
if (record.status === "stopped" || runSignal.aborted) {
|
||||
record.status = "stopped";
|
||||
return;
|
||||
}
|
||||
record.status = "error";
|
||||
record.error = error instanceof Error ? error.message : String(error);
|
||||
} finally {
|
||||
this.cleanupRunSignal(record.id);
|
||||
if (options.background) this.runningBackground = Math.max(0, this.runningBackground - 1);
|
||||
if (record.status !== "blocked") record.completedAt = record.completedAt ?? Date.now();
|
||||
savePersistedSubagentRecord(options.cwd, record);
|
||||
if (record.status === "completed" || record.status === "failed" || record.status === "cancelled" || record.status === "error" || record.status === "stopped") {
|
||||
// Phase 1.6: Populate telemetry fields
|
||||
record.turnCount = record.turnCount ?? totalRunTurns(options.cwd, record.runId);
|
||||
record.durationMs = record.completedAt ? Math.max(0, record.completedAt - record.startedAt) : undefined;
|
||||
savePersistedSubagentRecord(options.cwd, record);
|
||||
this.onComplete?.(record);
|
||||
}
|
||||
this.drainQueue();
|
||||
}
|
||||
})();
|
||||
}
|
||||
|
||||
private markStopped(record: SubagentRecord): void {
|
||||
record.status = "stopped";
|
||||
record.completedAt = Date.now();
|
||||
const cwd = this.cwdByRecord.get(record.id);
|
||||
if (cwd) savePersistedSubagentRecord(cwd, record);
|
||||
}
|
||||
|
||||
private createRunSignal(id: string, signal?: AbortSignal): AbortSignal {
|
||||
const controller = new AbortController();
|
||||
this.controllers.set(id, controller);
|
||||
if (signal?.aborted) {
|
||||
controller.abort();
|
||||
return controller.signal;
|
||||
}
|
||||
if (signal) {
|
||||
const abort = (): void => controller.abort();
|
||||
signal.addEventListener("abort", abort, { once: true });
|
||||
this.controllerCleanup.set(id, () => signal.removeEventListener("abort", abort));
|
||||
}
|
||||
return controller.signal;
|
||||
}
|
||||
|
||||
private cleanupRunSignal(id: string): void {
|
||||
this.controllerCleanup.get(id)?.();
|
||||
this.controllerCleanup.delete(id);
|
||||
this.controllers.delete(id);
|
||||
}
|
||||
|
||||
private drainQueue(): void {
|
||||
while (this.queue.length > 0 && this.runningBackground < this.maxConcurrent) {
|
||||
const next = this.queue.shift();
|
||||
if (!next || next.record.status !== "queued") continue;
|
||||
this.start(next.record, next.options, next.runner, next.signal);
|
||||
}
|
||||
}
|
||||
|
||||
private async pollRunToTerminal(cwd: string, record: SubagentRecord): Promise<void> {
|
||||
while (record.runId && (record.status === "running" || record.status === "blocked")) {
|
||||
const loaded = loadRunManifestById(cwd, record.runId);
|
||||
if (!loaded) {
|
||||
await new Promise((resolve) => setTimeout(resolve, this.pollIntervalMs));
|
||||
continue;
|
||||
}
|
||||
if (loaded.manifest.status === "completed") {
|
||||
record.status = "completed";
|
||||
record.error = undefined;
|
||||
record.turnCount = record.turnCount ?? totalRunTurns(cwd, record.runId);
|
||||
record.completedAt = Date.now();
|
||||
savePersistedSubagentRecord(cwd, record);
|
||||
return;
|
||||
}
|
||||
if (loaded.manifest.status === "failed" || loaded.manifest.status === "cancelled") {
|
||||
record.status = loaded.manifest.status;
|
||||
record.error = loaded.manifest.summary;
|
||||
record.turnCount = record.turnCount ?? totalRunTurns(cwd, record.runId);
|
||||
record.completedAt = Date.now();
|
||||
savePersistedSubagentRecord(cwd, record);
|
||||
return;
|
||||
}
|
||||
if (loaded.manifest.status === "blocked") {
|
||||
record.status = "blocked";
|
||||
record.error = undefined;
|
||||
if (!record.blockedAt) {
|
||||
record.blockedAt = Date.now();
|
||||
record.stuckNotified = false;
|
||||
record.completedAt = undefined;
|
||||
this.onComplete?.(record);
|
||||
this.scheduleStuckBlockedNotify(cwd, record);
|
||||
this.scheduleBlockedTerminalPoll(cwd, record);
|
||||
}
|
||||
savePersistedSubagentRecord(cwd, record);
|
||||
return;
|
||||
}
|
||||
await new Promise((resolve) => setTimeout(resolve, this.pollIntervalMs));
|
||||
}
|
||||
}
|
||||
|
||||
private scheduleBlockedTerminalPoll(cwd: string, record: SubagentRecord): void {
|
||||
const poll = (): void => {
|
||||
const current = this.records.get(record.id);
|
||||
if (!current || current.status !== "blocked" || !current.runId) return;
|
||||
const loaded = loadRunManifestById(cwd, current.runId);
|
||||
if (!loaded || loaded.manifest.status === "blocked" || loaded.manifest.status === "running" || loaded.manifest.status === "planning" || loaded.manifest.status === "queued") {
|
||||
const timer = setTimeout(poll, this.pollIntervalMs);
|
||||
timer.unref();
|
||||
return;
|
||||
}
|
||||
const persisted = readPersistedSubagentRecord(cwd, current.id);
|
||||
current.resultConsumed = current.resultConsumed || persisted?.resultConsumed;
|
||||
if (loaded.manifest.status === "completed") {
|
||||
current.status = "completed";
|
||||
current.error = undefined;
|
||||
} else if (loaded.manifest.status === "failed" || loaded.manifest.status === "cancelled") {
|
||||
current.status = loaded.manifest.status;
|
||||
current.error = loaded.manifest.summary;
|
||||
} else return;
|
||||
current.completedAt = Date.now();
|
||||
current.turnCount = current.turnCount ?? totalRunTurns(cwd, current.runId);
|
||||
current.durationMs = Math.max(0, current.completedAt - current.startedAt);
|
||||
savePersistedSubagentRecord(cwd, current);
|
||||
this.onComplete?.(current);
|
||||
};
|
||||
const timer = setTimeout(poll, this.pollIntervalMs);
|
||||
timer.unref();
|
||||
}
|
||||
|
||||
private scheduleStuckBlockedNotify(cwd: string, record: SubagentRecord): void {
|
||||
const threshold = DEFAULT_SUBAGENT.stuckBlockedNotifyMs;
|
||||
const fire = (): void => {
|
||||
const current = this.records.get(record.id);
|
||||
if (!current || current.status !== "blocked" || !current.blockedAt || current.stuckNotified) return;
|
||||
current.stuckNotified = true;
|
||||
this.onEvent?.("subagent.stuck-blocked", {
|
||||
event: "subagent.stuck-blocked",
|
||||
id: current.id,
|
||||
runId: current.runId,
|
||||
durationMs: Math.max(0, Date.now() - current.blockedAt),
|
||||
ownerSessionGeneration: current.ownerSessionGeneration,
|
||||
});
|
||||
savePersistedSubagentRecord(cwd, current);
|
||||
};
|
||||
if (threshold <= 0) {
|
||||
fire();
|
||||
return;
|
||||
}
|
||||
const timer = setTimeout(fire, threshold);
|
||||
timer.unref();
|
||||
}
|
||||
}
|
||||
59
extensions/pi-crew/src/runtime/supervisor-contact.ts
Normal file
59
extensions/pi-crew/src/runtime/supervisor-contact.ts
Normal file
@@ -0,0 +1,59 @@
|
||||
import type { TeamRunManifest } from "../state/types.ts";
|
||||
import { appendEvent } from "../state/event-log.ts";
|
||||
import { logInternalError } from "../utils/internal-error.ts";
|
||||
|
||||
export interface SupervisorContactPayload {
|
||||
runId: string;
|
||||
taskId: string;
|
||||
reason: "decision_needed" | "clarification" | "approval" | "error_escalation" | "custom";
|
||||
message: string;
|
||||
data?: Record<string, unknown>;
|
||||
timestamp: string;
|
||||
}
|
||||
|
||||
/**
|
||||
* Record a supervisor contact event from a child task.
|
||||
* This represents a child→parent communication where the child needs
|
||||
* a decision, clarification, or approval to continue.
|
||||
*/
|
||||
export function recordSupervisorContact(manifest: TeamRunManifest, payload: Omit<SupervisorContactPayload, "timestamp">): void {
|
||||
const fullPayload: SupervisorContactPayload = {
|
||||
...payload,
|
||||
timestamp: new Date().toISOString(),
|
||||
};
|
||||
try {
|
||||
appendEvent(manifest.eventsPath, {
|
||||
type: "supervisor.contact",
|
||||
runId: manifest.runId,
|
||||
taskId: payload.taskId,
|
||||
data: fullPayload as unknown as Record<string, unknown>,
|
||||
});
|
||||
} catch (error) {
|
||||
logInternalError("supervisor-contact.record", error, `runId=${manifest.runId} taskId=${payload.taskId}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a supervisor contact request from child Pi stdout.
|
||||
* Detects structured JSON lines with type "supervisor_contact".
|
||||
*/
|
||||
export function parseSupervisorContactFromLine(line: string): Omit<SupervisorContactPayload, "timestamp" | "runId"> | undefined {
|
||||
if (!line.trim()) return undefined;
|
||||
let parsed: unknown;
|
||||
try {
|
||||
parsed = JSON.parse(line);
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return undefined;
|
||||
const record = parsed as Record<string, unknown>;
|
||||
if (record.type !== "supervisor_contact" && record.type !== "crew_supervisor_contact") return undefined;
|
||||
return {
|
||||
taskId: typeof record.taskId === "string" ? record.taskId : "",
|
||||
reason: typeof record.reason === "string" && ["decision_needed", "clarification", "approval", "error_escalation", "custom"].includes(record.reason)
|
||||
? record.reason as SupervisorContactPayload["reason"]
|
||||
: "custom",
|
||||
message: typeof record.message === "string" ? record.message : String(record.message ?? ""),
|
||||
data: record.data && typeof record.data === "object" && !Array.isArray(record.data) ? record.data as Record<string, unknown> : undefined,
|
||||
};
|
||||
}
|
||||
38
extensions/pi-crew/src/runtime/task-display.ts
Normal file
38
extensions/pi-crew/src/runtime/task-display.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import type { TeamTaskState } from "../state/types.ts";
|
||||
import type { CrewAgentRecord, CrewRuntimeKind } from "./crew-agent-runtime.ts";
|
||||
import { recordFromTask } from "./crew-agent-records.ts";
|
||||
import type { TeamRunManifest } from "../state/types.ts";
|
||||
|
||||
export function shouldMaterializeAgent(task: TeamTaskState): boolean {
|
||||
return task.status !== "queued" && task.status !== "skipped";
|
||||
}
|
||||
|
||||
export function recordsForMaterializedTasks(manifest: TeamRunManifest, tasks: TeamTaskState[], runtime: CrewRuntimeKind): CrewAgentRecord[] {
|
||||
return tasks.filter(shouldMaterializeAgent).map((task) => recordFromTask(manifest, task, runtime));
|
||||
}
|
||||
|
||||
export function taskById(tasks: TeamTaskState[]): Map<string, TeamTaskState> {
|
||||
const map = new Map<string, TeamTaskState>();
|
||||
for (const task of tasks) {
|
||||
map.set(task.id, task);
|
||||
if (task.stepId) map.set(task.stepId, task);
|
||||
}
|
||||
return map;
|
||||
}
|
||||
|
||||
export function waitingReason(task: TeamTaskState, tasks: TeamTaskState[]): string | undefined {
|
||||
if (task.status !== "queued") return undefined;
|
||||
const byId = taskById(tasks);
|
||||
const waiting = task.dependsOn.map((id) => byId.get(id)?.id ?? id).filter((id) => byId.get(id)?.status !== "completed");
|
||||
if (waiting.length === 0) return "ready";
|
||||
return `waiting for ${waiting.join(", ")}`;
|
||||
}
|
||||
|
||||
export function formatTaskGraphLines(tasks: TeamTaskState[]): string[] {
|
||||
if (tasks.length === 0) return ["- (none)"];
|
||||
return tasks.map((task) => {
|
||||
const icon = task.status === "completed" ? "✓" : task.status === "running" ? "⠋" : task.status === "failed" ? "✗" : task.status === "cancelled" || task.status === "skipped" ? "■" : "◦";
|
||||
const wait = waitingReason(task, tasks);
|
||||
return `- ${icon} ${task.id} [${task.status}] ${task.role}->${task.agent}${wait && wait !== "ready" ? ` (${wait})` : ""}`;
|
||||
});
|
||||
}
|
||||
122
extensions/pi-crew/src/runtime/task-graph-scheduler.ts
Normal file
122
extensions/pi-crew/src/runtime/task-graph-scheduler.ts
Normal file
@@ -0,0 +1,122 @@
|
||||
import type { TeamTaskState } from "../state/types.ts";
|
||||
|
||||
export interface TaskGraphSchedulerSnapshot {
|
||||
ready: string[];
|
||||
blocked: string[];
|
||||
running: string[];
|
||||
done: string[];
|
||||
failed: string[];
|
||||
cancelled: string[];
|
||||
}
|
||||
|
||||
export interface TaskGraphIndex {
|
||||
doneSteps: Set<string>;
|
||||
idMap: Map<string, TeamTaskState>;
|
||||
stepToTaskId: Map<string, string>;
|
||||
}
|
||||
|
||||
export function buildTaskGraphIndex(tasks: TeamTaskState[]): TaskGraphIndex {
|
||||
return {
|
||||
doneSteps: new Set(tasks.filter((task) => task.status === "completed").map((task) => task.stepId).filter((id): id is string => id !== undefined)),
|
||||
idMap: new Map(tasks.map((task) => [task.id, task])),
|
||||
stepToTaskId: new Map(tasks.map((task) => [task.stepId, task.id]).filter((entry): entry is [string, string] => entry[0] !== undefined)),
|
||||
};
|
||||
}
|
||||
|
||||
function taskById(tasks: TeamTaskState[]): Map<string, TeamTaskState> {
|
||||
return new Map(tasks.map((task) => [task.id, task]));
|
||||
}
|
||||
|
||||
function dependencySatisfied(task: TeamTaskState, doneStepIds: Set<string>, idMap: Map<string, TeamTaskState>, stepMap: Map<string, string>): boolean {
|
||||
return task.dependsOn.every((dependency) => {
|
||||
if (doneStepIds.has(dependency)) return true;
|
||||
const taskId = stepMap.get(dependency) ?? dependency;
|
||||
return idMap.get(taskId)?.status === "completed";
|
||||
});
|
||||
}
|
||||
|
||||
function withQueue(task: TeamTaskState, index: TaskGraphIndex): TeamTaskState {
|
||||
if (task.status === "queued") {
|
||||
const isReady = dependencySatisfied(task, index.doneSteps, index.idMap, index.stepToTaskId);
|
||||
return { ...task, graph: task.graph ? { ...task.graph, queue: isReady ? "ready" : "blocked" } : task.graph };
|
||||
}
|
||||
if (task.status === "running") {
|
||||
return { ...task, graph: task.graph ? { ...task.graph, queue: "running" } : task.graph };
|
||||
}
|
||||
if (task.status === "completed" || task.status === "skipped") {
|
||||
return { ...task, graph: task.graph ? { ...task.graph, queue: "done" } : task.graph };
|
||||
}
|
||||
return { ...task, graph: task.graph ? { ...task.graph, queue: "blocked" } : task.graph };
|
||||
}
|
||||
|
||||
function ensureIndex(tasks: TeamTaskState[], index?: TaskGraphIndex): TaskGraphIndex {
|
||||
return index ?? buildTaskGraphIndex(tasks);
|
||||
}
|
||||
|
||||
export function refreshTaskGraphQueues(tasks: TeamTaskState[], index?: TaskGraphIndex): TeamTaskState[] {
|
||||
const resolved = ensureIndex(tasks, index);
|
||||
return tasks.map((task) => withQueue(task, resolved));
|
||||
}
|
||||
|
||||
export function getReadyTasks(tasks: TeamTaskState[], maxCount = 1, index?: TaskGraphIndex): TeamTaskState[] {
|
||||
return refreshTaskGraphQueues(tasks, index).filter((task) => task.status === "queued" && task.graph?.queue === "ready").slice(0, Math.max(0, maxCount));
|
||||
}
|
||||
|
||||
export function markTaskRunning(tasks: TeamTaskState[], taskId: string, now = new Date(), index?: TaskGraphIndex): TeamTaskState[] {
|
||||
const resolved = ensureIndex(tasks, index);
|
||||
return refreshTaskGraphQueues(tasks, resolved).map((task) => task.id === taskId ? withQueue({ ...task, status: "running", startedAt: task.startedAt ?? now.toISOString() }, resolved) : task);
|
||||
}
|
||||
|
||||
export function markTaskDone(tasks: TeamTaskState[], taskId: string, now = new Date(), index?: TaskGraphIndex): TeamTaskState[] {
|
||||
const resolved = ensureIndex(tasks, index);
|
||||
return refreshTaskGraphQueues(tasks.map((task) => task.id === taskId ? { ...task, status: "completed", finishedAt: task.finishedAt ?? now.toISOString() } : task), resolved);
|
||||
}
|
||||
|
||||
export function cancelTaskSubtree(tasks: TeamTaskState[], rootTaskId: string, reason = "Cancelled by task graph scheduler.", now = new Date()): TeamTaskState[] {
|
||||
const ids = taskById(tasks);
|
||||
const toCancel = new Set<string>();
|
||||
const stack = [rootTaskId];
|
||||
while (stack.length) {
|
||||
const current = stack.pop();
|
||||
if (!current || toCancel.has(current)) continue;
|
||||
toCancel.add(current);
|
||||
const task = ids.get(current);
|
||||
for (const child of task?.graph?.children ?? []) stack.push(child);
|
||||
}
|
||||
return refreshTaskGraphQueues(tasks.map((task) => {
|
||||
if (!toCancel.has(task.id)) return task;
|
||||
if (task.status === "completed") return task;
|
||||
return { ...task, status: "cancelled", error: reason, finishedAt: task.finishedAt ?? now.toISOString() };
|
||||
}));
|
||||
}
|
||||
|
||||
export function failTaskAndBlockChildren(tasks: TeamTaskState[], rootTaskId: string, reason: string, now = new Date()): TeamTaskState[] {
|
||||
const ids = taskById(tasks);
|
||||
const blocked = new Set<string>();
|
||||
const root = ids.get(rootTaskId);
|
||||
const stack = [...(root?.graph?.children ?? [])];
|
||||
while (stack.length) {
|
||||
const current = stack.pop();
|
||||
if (!current || blocked.has(current)) continue;
|
||||
blocked.add(current);
|
||||
const task = ids.get(current);
|
||||
for (const child of task?.graph?.children ?? []) stack.push(child);
|
||||
}
|
||||
return refreshTaskGraphQueues(tasks.map((task) => {
|
||||
if (task.id === rootTaskId) return { ...task, status: "failed", error: reason, finishedAt: task.finishedAt ?? now.toISOString() };
|
||||
if (blocked.has(task.id) && task.status === "queued") return { ...task, status: "skipped", error: `Blocked by failed task '${rootTaskId}'.`, finishedAt: task.finishedAt ?? now.toISOString() };
|
||||
return task;
|
||||
}));
|
||||
}
|
||||
|
||||
export function taskGraphSnapshot(tasks: TeamTaskState[], index?: TaskGraphIndex): TaskGraphSchedulerSnapshot {
|
||||
const refreshed = refreshTaskGraphQueues(tasks, index);
|
||||
return {
|
||||
ready: refreshed.filter((task) => task.status === "queued" && task.graph?.queue === "ready").map((task) => task.id),
|
||||
blocked: refreshed.filter((task) => task.status === "queued" && task.graph?.queue === "blocked").map((task) => task.id),
|
||||
running: refreshed.filter((task) => task.status === "running").map((task) => task.id),
|
||||
done: refreshed.filter((task) => task.status === "completed" || task.status === "skipped").map((task) => task.id),
|
||||
failed: refreshed.filter((task) => task.status === "failed").map((task) => task.id),
|
||||
cancelled: refreshed.filter((task) => task.status === "cancelled").map((task) => task.id),
|
||||
};
|
||||
}
|
||||
127
extensions/pi-crew/src/runtime/task-output-context.ts
Normal file
127
extensions/pi-crew/src/runtime/task-output-context.ts
Normal file
@@ -0,0 +1,127 @@
|
||||
import * as fs from "node:fs";
|
||||
import * as path from "node:path";
|
||||
import type { ArtifactDescriptor, TeamRunManifest, TeamTaskState } from "../state/types.ts";
|
||||
import { writeArtifact } from "../state/artifact-store.ts";
|
||||
import { resolveRealContainedPath } from "../utils/safe-paths.ts";
|
||||
import type { WorkflowStep } from "../workflows/workflow-config.ts";
|
||||
|
||||
export interface DependencyOutputContext {
|
||||
dependencies: Array<{ taskId: string; title: string; status: string; result?: string; resultPath?: string }>;
|
||||
sharedReads: Array<{ name: string; path: string; content: string }>;
|
||||
}
|
||||
|
||||
function containedExists(filePath: string, baseDir?: string): boolean {
|
||||
try {
|
||||
const safePath = baseDir ? resolveRealContainedPath(baseDir, filePath) : filePath;
|
||||
return fs.existsSync(safePath);
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
function readIfSmall(filePath: string, maxBytes = 24_000, baseDir?: string): string | undefined {
|
||||
try {
|
||||
const safePath = baseDir ? resolveRealContainedPath(baseDir, filePath) : filePath;
|
||||
const stat = fs.statSync(safePath);
|
||||
if (stat.size > maxBytes) return `${fs.readFileSync(safePath, "utf-8").slice(0, maxBytes)}\n\n...(truncated ${stat.size - maxBytes} bytes)`;
|
||||
return fs.readFileSync(safePath, "utf-8");
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
function safeSharedName(name: string): string {
|
||||
const normalized = name.replaceAll("\\", "/").replace(/^\.\/+/, "");
|
||||
if (!normalized || normalized.split("/").some((segment) => segment === "..") || path.isAbsolute(normalized)) throw new Error(`Invalid shared artifact name: ${name}`);
|
||||
return normalized;
|
||||
}
|
||||
|
||||
export function sharedPath(manifest: TeamRunManifest, name: string): string {
|
||||
const sharedRoot = path.resolve(manifest.artifactsRoot, "shared");
|
||||
const resolved = path.resolve(sharedRoot, safeSharedName(name));
|
||||
const relative = path.relative(sharedRoot, resolved);
|
||||
if (relative.startsWith("..") || path.isAbsolute(relative)) throw new Error(`Invalid shared artifact name: ${name}`);
|
||||
return resolved;
|
||||
}
|
||||
|
||||
export function collectDependencyOutputContext(manifest: TeamRunManifest, tasks: TeamTaskState[], task: TeamTaskState, step: WorkflowStep): DependencyOutputContext {
|
||||
const byStep = new Map(tasks.map((item) => [item.stepId, item]).filter((entry): entry is [string, TeamTaskState] => Boolean(entry[0])));
|
||||
const byId = new Map(tasks.map((item) => [item.id, item]));
|
||||
const dependencies = task.dependsOn.map((dep) => byStep.get(dep) ?? byId.get(dep)).filter((item): item is TeamTaskState => Boolean(item)).map((item) => ({
|
||||
taskId: item.id,
|
||||
title: item.title,
|
||||
status: item.status,
|
||||
resultPath: item.resultArtifact?.path,
|
||||
result: item.resultArtifact ? readIfSmall(item.resultArtifact.path, 24_000, manifest.artifactsRoot) : undefined,
|
||||
}));
|
||||
const sharedReads = (step.reads === false ? [] : step.reads ?? []).map((name) => {
|
||||
const filePath = sharedPath(manifest, name);
|
||||
return { name, path: filePath, content: readIfSmall(filePath, 24_000, path.resolve(manifest.artifactsRoot, "shared")) ?? "" };
|
||||
}).filter((item) => item.content.trim().length > 0);
|
||||
return { dependencies, sharedReads };
|
||||
}
|
||||
|
||||
export function renderDependencyOutputContext(context: DependencyOutputContext): string {
|
||||
const parts: string[] = [];
|
||||
if (context.dependencies.length) {
|
||||
parts.push("# Dependency Outputs", "");
|
||||
for (const dep of context.dependencies) {
|
||||
parts.push(`## ${dep.taskId} (${dep.title})`, `Status: ${dep.status}`, dep.resultPath ? `Result artifact: ${dep.resultPath}` : "", "", dep.result?.trim() || "(no result output)", "");
|
||||
}
|
||||
}
|
||||
if (context.sharedReads.length) {
|
||||
parts.push("# Shared Run Context Reads", "");
|
||||
for (const read of context.sharedReads) parts.push(`## shared/${read.name}`, `Path: ${read.path}`, "", read.content.trim(), "");
|
||||
}
|
||||
return parts.join("\n").trim();
|
||||
}
|
||||
|
||||
export function writeTaskSharedOutput(manifest: TeamRunManifest, step: WorkflowStep, task: TeamTaskState): ArtifactDescriptor | undefined {
|
||||
if (step.output === false) return undefined;
|
||||
const name = safeSharedName(step.output || `${task.id}.md`);
|
||||
const source = task.resultArtifact ? readIfSmall(task.resultArtifact.path, 80_000, manifest.artifactsRoot) : undefined;
|
||||
if (!source) return undefined;
|
||||
return writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: `shared/${name}`,
|
||||
producer: task.id,
|
||||
content: source.endsWith("\n") ? source : `${source}\n`,
|
||||
});
|
||||
}
|
||||
|
||||
export function writeTaskInputsArtifact(manifest: TeamRunManifest, task: TeamTaskState, context: DependencyOutputContext): ArtifactDescriptor {
|
||||
return writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: `metadata/${task.id}.inputs.json`,
|
||||
producer: task.id,
|
||||
content: `${JSON.stringify(context, null, 2)}\n`,
|
||||
});
|
||||
}
|
||||
|
||||
export function aggregateTaskOutputs(tasks: TeamTaskState[], manifest?: TeamRunManifest): string {
|
||||
return tasks.map((task, index) => {
|
||||
const body = task.resultArtifact ? readIfSmall(task.resultArtifact.path, 40_000, manifest?.artifactsRoot) : undefined;
|
||||
const hasBody = Boolean(body?.trim());
|
||||
const expectedMissing = task.resultArtifact && !containedExists(task.resultArtifact.path, manifest?.artifactsRoot);
|
||||
const status = task.status === "skipped"
|
||||
? "SKIPPED"
|
||||
: task.status === "failed"
|
||||
? `FAILED${task.exitCode !== undefined ? ` (exit code ${task.exitCode ?? "null"})` : ""}${task.error ? `: ${task.error}` : ""}`
|
||||
: expectedMissing
|
||||
? `EMPTY OUTPUT (expected result artifact missing: ${task.resultArtifact?.path})`
|
||||
: !hasBody
|
||||
? "EMPTY OUTPUT (no textual response returned)"
|
||||
: task.status.toUpperCase();
|
||||
return [
|
||||
`=== Task ${index + 1}: ${task.id} (${task.agent}) ===`,
|
||||
`Status: ${status}`,
|
||||
task.role ? `Role: ${task.role}` : "",
|
||||
task.resultArtifact?.path ? `Result artifact: ${task.resultArtifact.path}` : "",
|
||||
task.logArtifact?.path ? `Log artifact: ${task.logArtifact.path}` : "",
|
||||
task.transcriptArtifact?.path ? `Transcript: ${task.transcriptArtifact.path}` : "",
|
||||
task.usage ? `Usage: ${JSON.stringify(task.usage)}` : "",
|
||||
"",
|
||||
hasBody ? body!.trim() : status,
|
||||
].filter(Boolean).join("\n");
|
||||
}).join("\n\n");
|
||||
}
|
||||
93
extensions/pi-crew/src/runtime/task-packet.ts
Normal file
93
extensions/pi-crew/src/runtime/task-packet.ts
Normal file
@@ -0,0 +1,93 @@
|
||||
import * as path from "node:path";
|
||||
import type { TeamRunManifest, TaskPacket, TaskScope, VerificationContract } from "../state/types.ts";
|
||||
import type { WorkflowStep } from "../workflows/workflow-config.ts";
|
||||
|
||||
export interface BuildTaskPacketInput {
|
||||
manifest: TeamRunManifest;
|
||||
step: WorkflowStep;
|
||||
taskId: string;
|
||||
cwd: string;
|
||||
worktreePath?: string;
|
||||
}
|
||||
|
||||
export interface TaskPacketValidationResult {
|
||||
valid: boolean;
|
||||
errors: string[];
|
||||
}
|
||||
|
||||
export function inferTaskScope(step: WorkflowStep): TaskScope {
|
||||
const reads = step.reads === false ? [] : step.reads ?? [];
|
||||
if (reads.length === 1) return "single_file";
|
||||
if (reads.length > 1) return "module";
|
||||
return "workspace";
|
||||
}
|
||||
|
||||
export function defaultVerificationContract(step: WorkflowStep): VerificationContract {
|
||||
return {
|
||||
requiredGreenLevel: step.verify ? "targeted" : "none",
|
||||
commands: [],
|
||||
allowManualEvidence: true,
|
||||
};
|
||||
}
|
||||
|
||||
export function buildTaskPacket(input: BuildTaskPacketInput): TaskPacket {
|
||||
const scope = inferTaskScope(input.step);
|
||||
const reads = input.step.reads === false ? [] : input.step.reads ?? [];
|
||||
const scopePath = reads.length === 1 ? reads[0] : reads.length > 1 ? reads.join(", ") : undefined;
|
||||
return {
|
||||
objective: input.step.task.replaceAll("{goal}", input.manifest.goal),
|
||||
scope,
|
||||
scopePath,
|
||||
repo: path.basename(input.manifest.cwd) || input.manifest.cwd,
|
||||
worktree: input.worktreePath,
|
||||
branchPolicy: input.manifest.workspaceMode === "worktree" ? "Use the assigned task worktree and avoid modifying the leader checkout." : "Use the current checkout; do not create branches unless explicitly requested.",
|
||||
acceptanceTests: [],
|
||||
commitPolicy: "Do not commit unless explicitly requested by the user or workflow.",
|
||||
reportingContract: "Report intended/changed files, verification evidence, blockers, conflict risks, and next recommended action.",
|
||||
escalationPolicy: "Stop and report if scope is ambiguous, destructive action is needed, permissions are missing, verification cannot be completed, or edits may overlap with another worker/task.",
|
||||
constraints: [
|
||||
"Stay within the assigned task scope.",
|
||||
"Do not claim completion without verification evidence.",
|
||||
"Use mailbox/API state for coordination when available.",
|
||||
"Do not make overlapping edits to the same file/symbol without explicit leader sequencing or ownership guidance.",
|
||||
],
|
||||
expectedArtifacts: ["prompt", "result", "verification"],
|
||||
verification: defaultVerificationContract(input.step),
|
||||
};
|
||||
}
|
||||
|
||||
export function validateTaskPacket(packet: TaskPacket): TaskPacketValidationResult {
|
||||
const errors: string[] = [];
|
||||
if (!packet.objective.trim()) errors.push("objective must not be empty");
|
||||
if (!packet.repo.trim()) errors.push("repo must not be empty");
|
||||
if (!packet.branchPolicy.trim()) errors.push("branchPolicy must not be empty");
|
||||
if (!packet.commitPolicy.trim()) errors.push("commitPolicy must not be empty");
|
||||
if (!packet.reportingContract.trim()) errors.push("reportingContract must not be empty");
|
||||
if (!packet.escalationPolicy.trim()) errors.push("escalationPolicy must not be empty");
|
||||
if ((packet.scope === "module" || packet.scope === "single_file" || packet.scope === "custom") && !packet.scopePath?.trim()) {
|
||||
errors.push(`scopePath is required for scope '${packet.scope}'`);
|
||||
}
|
||||
if (packet.constraints.length === 0) errors.push("constraints must contain at least one entry");
|
||||
for (const [index, constraint] of packet.constraints.entries()) {
|
||||
if (!constraint.trim()) errors.push(`constraints contains an empty value at index ${index}`);
|
||||
}
|
||||
if (packet.expectedArtifacts.length === 0) errors.push("expectedArtifacts must contain at least one entry");
|
||||
for (const [index, artifact] of packet.expectedArtifacts.entries()) {
|
||||
if (!artifact.trim()) errors.push(`expectedArtifacts contains an empty value at index ${index}`);
|
||||
}
|
||||
for (const [index, test] of packet.acceptanceTests.entries()) {
|
||||
if (!test.trim()) errors.push(`acceptanceTests contains an empty value at index ${index}`);
|
||||
}
|
||||
return { valid: errors.length === 0, errors };
|
||||
}
|
||||
|
||||
export function renderTaskPacket(packet: TaskPacket): string {
|
||||
return [
|
||||
"# Task Packet",
|
||||
"",
|
||||
"```json",
|
||||
JSON.stringify(packet, null, 2),
|
||||
"```",
|
||||
"",
|
||||
].join("\n");
|
||||
}
|
||||
387
extensions/pi-crew/src/runtime/task-runner.ts
Normal file
387
extensions/pi-crew/src/runtime/task-runner.ts
Normal file
@@ -0,0 +1,387 @@
|
||||
import * as fs from "node:fs";
|
||||
import type { AgentConfig } from "../agents/agent-config.ts";
|
||||
import type { CrewLimitsConfig, CrewRuntimeConfig } from "../config/config.ts";
|
||||
import type { ArtifactDescriptor, OperationTerminalEvidence, TeamRunManifest, TeamTaskState, UsageState } from "../state/types.ts";
|
||||
import { writeArtifact } from "../state/artifact-store.ts";
|
||||
import { appendEvent } from "../state/event-log.ts";
|
||||
import { saveRunManifest } from "../state/state-store.ts";
|
||||
import { createTaskClaim } from "../state/task-claims.ts";
|
||||
import { createWorkerHeartbeat, touchWorkerHeartbeat } from "./worker-heartbeat.ts";
|
||||
import type { WorkflowStep } from "../workflows/workflow-config.ts";
|
||||
import { captureWorktreeDiff, captureWorktreeDiffStat, prepareTaskWorkspace } from "../worktree/worktree-manager.ts";
|
||||
import { buildConfiguredModelRouting, formatModelAttemptNote, isRetryableModelFailure, type ModelAttemptSummary } from "./model-fallback.ts";
|
||||
import { parsePiJsonOutput, type ParsedPiJsonOutput } from "./pi-json-output.ts";
|
||||
import { runChildPi } from "./child-pi.ts";
|
||||
import { buildTaskPacket } from "./task-packet.ts";
|
||||
import { createVerificationEvidence } from "./green-contract.ts";
|
||||
import { createStartupEvidence } from "./worker-startup.ts";
|
||||
import { permissionForRole } from "./role-permission.ts";
|
||||
import { collectDependencyOutputContext, renderDependencyOutputContext, writeTaskInputsArtifact, writeTaskSharedOutput } from "./task-output-context.ts";
|
||||
import { appendCrewAgentEvent, appendCrewAgentOutput, emptyCrewAgentProgress, recordFromTask, upsertCrewAgent } from "./crew-agent-records.ts";
|
||||
import { parseSessionUsage } from "./session-usage.ts";
|
||||
import type { CrewAgentProgress, CrewRuntimeKind } from "./crew-agent-runtime.ts";
|
||||
import { shouldAppendProgressEventUpdate, type ProgressEventSummary } from "./progress-event-coalescer.ts";
|
||||
import { coordinationBridgeInstructions, renderTaskPrompt } from "./task-runner/prompt-builder.ts";
|
||||
import { buildWorkerPromptPipeline } from "./task-runner/prompt-pipeline.ts";
|
||||
import { buildWorkerCapabilityInventory } from "./task-runner/capabilities.ts";
|
||||
import { applyAgentProgressEvent, applyUsageToProgress, progressEventSummary, shouldFlushProgressEvent } from "./task-runner/progress.ts";
|
||||
import { checkpointTask, persistSingleTaskUpdate, updateTask } from "./task-runner/state-helpers.ts";
|
||||
import { cleanResultText, isFinalChildEvent } from "./task-runner/result-utils.ts";
|
||||
import { evaluateCompletionMutationGuard } from "./completion-guard.ts";
|
||||
import { cancellationReasonFromSignal } from "./cancellation.ts";
|
||||
import { appendTaskAttentionEvent } from "./attention-events.ts";
|
||||
import { parseSupervisorContactFromLine, recordSupervisorContact } from "./supervisor-contact.ts";
|
||||
import { renderSkillInstructions } from "./skill-instructions.ts";
|
||||
|
||||
export interface TaskRunnerInput {
|
||||
manifest: TeamRunManifest;
|
||||
tasks: TeamTaskState[];
|
||||
task: TeamTaskState;
|
||||
step: WorkflowStep;
|
||||
agent: AgentConfig;
|
||||
signal?: AbortSignal;
|
||||
executeWorkers: boolean;
|
||||
runtimeKind?: CrewRuntimeKind;
|
||||
runtimeConfig?: CrewRuntimeConfig;
|
||||
parentContext?: string;
|
||||
parentModel?: unknown;
|
||||
modelRegistry?: unknown;
|
||||
modelOverride?: string;
|
||||
teamRoleModel?: string;
|
||||
teamRoleSkills?: string[] | false;
|
||||
skillOverride?: string[] | false;
|
||||
limits?: CrewLimitsConfig;
|
||||
dependencyContextText?: string;
|
||||
skillBlock?: string;
|
||||
skillNames?: string[];
|
||||
skillPaths?: string[];
|
||||
/** Optional callback for JSON events from child Pi. Used for overflow recovery tracking. */
|
||||
onJsonEvent?: (taskId: string, runId: string, event: unknown) => void;
|
||||
}
|
||||
|
||||
export async function runTeamTask(input: TaskRunnerInput): Promise<{ manifest: TeamRunManifest; tasks: TeamTaskState[] }> {
|
||||
let manifest = input.manifest;
|
||||
const workspace = prepareTaskWorkspace(manifest, input.task);
|
||||
const worktree = workspace.worktreePath && workspace.branch ? { path: workspace.worktreePath, branch: workspace.branch, reused: workspace.reused ?? false } : input.task.worktree;
|
||||
const taskPacket = buildTaskPacket({ manifest, step: input.step, taskId: input.task.id, cwd: workspace.cwd, worktreePath: worktree?.path });
|
||||
const dependencyContext = collectDependencyOutputContext(manifest, input.tasks, input.task, input.step);
|
||||
const dependencyContextText = input.dependencyContextText ?? renderDependencyOutputContext(dependencyContext);
|
||||
let task: TeamTaskState = {
|
||||
...input.task,
|
||||
cwd: workspace.cwd,
|
||||
worktree,
|
||||
taskPacket,
|
||||
status: "running",
|
||||
startedAt: new Date().toISOString(),
|
||||
claim: createTaskClaim(`task-runner:${input.task.id}`),
|
||||
heartbeat: createWorkerHeartbeat(input.task.id),
|
||||
agentProgress: input.task.agentProgress ?? emptyCrewAgentProgress(),
|
||||
...(dependencyContextText ? { dependencyContextText } : {}),
|
||||
} as TeamTaskState;
|
||||
let tasks = updateTask(input.tasks, task);
|
||||
const runtimeKind = input.runtimeKind ?? (input.executeWorkers ? "child-process" : "scaffold");
|
||||
tasks = persistSingleTaskUpdate(manifest, tasks, task);
|
||||
if (runtimeKind === "child-process") ({ task, tasks } = checkpointTask(manifest, tasks, task, "started"));
|
||||
upsertCrewAgent(manifest, recordFromTask(manifest, task, runtimeKind));
|
||||
appendEvent(manifest.eventsPath, { type: "task.started", runId: manifest.runId, taskId: task.id, data: { role: task.role, agent: task.agent, runtime: runtimeKind, cwd: task.cwd, worktreePath: workspace.worktreePath, worktreeBranch: workspace.branch, worktreeReused: workspace.reused } });
|
||||
const permissionMode = permissionForRole(task.role);
|
||||
const renderedSkills = input.skillBlock === undefined ? renderSkillInstructions({ cwd: task.cwd, role: task.role, agent: input.agent, teamRole: { skills: input.teamRoleSkills }, step: input.step, override: input.skillOverride }) : undefined;
|
||||
const skillBlock = input.skillBlock ?? renderedSkills?.block;
|
||||
const skillNames = input.skillNames ?? renderedSkills?.names;
|
||||
const skillPaths = input.skillPaths ?? renderedSkills?.paths;
|
||||
|
||||
const prompt = renderTaskPrompt(manifest, input.step, task, input.agent, skillBlock);
|
||||
const promptArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "prompt",
|
||||
relativePath: `prompts/${task.id}.md`,
|
||||
content: `${prompt}\n`,
|
||||
producer: task.id,
|
||||
});
|
||||
|
||||
let resultArtifact: ArtifactDescriptor;
|
||||
let logArtifact: ArtifactDescriptor | undefined;
|
||||
let transcriptArtifact: ArtifactDescriptor | undefined;
|
||||
let exitCode: number | null = 0;
|
||||
let error: string | undefined;
|
||||
let modelAttempts: ModelAttemptSummary[] | undefined;
|
||||
let parsedOutput: ParsedPiJsonOutput | undefined;
|
||||
let finalStdout = "";
|
||||
let transcriptPath: string | undefined;
|
||||
let terminalEvidence: OperationTerminalEvidence[] = [];
|
||||
|
||||
let startupEvidence = createStartupEvidence({ command: runtimeKind === "child-process" ? "pi" : runtimeKind === "live-session" ? "live-session" : "safe-scaffold", startedAt: new Date(task.startedAt ?? new Date().toISOString()), finishedAt: new Date(), promptSentAt: new Date(task.startedAt ?? new Date().toISOString()), promptAccepted: true, exitCode: 0 });
|
||||
const inputsArtifact = writeTaskInputsArtifact(manifest, task, dependencyContext);
|
||||
const skillArtifact = skillBlock ? writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: `metadata/${task.id}.skills.md`,
|
||||
content: [`Selected skills: ${skillNames?.join(", ") ?? "(none)"}`, `Skill paths passed to child Pi: ${(skillPaths ?? []).length}`, "", skillBlock, ""].join("\n"),
|
||||
producer: task.id,
|
||||
}) : undefined;
|
||||
const coordinationArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: `metadata/${task.id}.coordination-bridge.md`,
|
||||
content: `${coordinationBridgeInstructions(task)}\n`,
|
||||
producer: task.id,
|
||||
});
|
||||
if (runtimeKind === "child-process") {
|
||||
const modelRoutingPlan = buildConfiguredModelRouting({ overrideModel: input.modelOverride, stepModel: input.step.model, teamRoleModel: input.teamRoleModel, agentModel: input.agent.model, fallbackModels: input.agent.fallbackModels, parentModel: input.parentModel, modelRegistry: input.modelRegistry, cwd: task.cwd });
|
||||
const candidates = modelRoutingPlan.candidates;
|
||||
const attemptModels = candidates.length > 0 ? candidates : [undefined];
|
||||
const logs: string[] = [];
|
||||
let finalStderr = "";
|
||||
modelAttempts = [];
|
||||
transcriptPath = `${manifest.artifactsRoot}/transcripts/${task.id}.jsonl`;
|
||||
let finalCheckpointWritten = false;
|
||||
let lastAgentRecordPersistedAt = 0;
|
||||
let lastHeartbeatPersistedAt = 0;
|
||||
let lastRunProgressPersistedAt = 0;
|
||||
let lastRunProgressSummary: ProgressEventSummary | undefined;
|
||||
const persistHeartbeat = (force = false): void => {
|
||||
const now = Date.now();
|
||||
if (!force && now - lastHeartbeatPersistedAt < 1000) return;
|
||||
lastHeartbeatPersistedAt = now;
|
||||
task = { ...task, heartbeat: touchWorkerHeartbeat(task.heartbeat ?? createWorkerHeartbeat(task.id)) };
|
||||
tasks = persistSingleTaskUpdate(manifest, tasks, task);
|
||||
};
|
||||
const persistChildProgress = (event: unknown, force = false): void => {
|
||||
const now = Date.now();
|
||||
if (force || shouldFlushProgressEvent(event) || now - lastAgentRecordPersistedAt >= 500) {
|
||||
upsertCrewAgent(manifest, recordFromTask(manifest, task, "child-process"));
|
||||
lastAgentRecordPersistedAt = now;
|
||||
}
|
||||
const summary = progressEventSummary(task, event);
|
||||
const decision = shouldAppendProgressEventUpdate({ previous: lastRunProgressSummary, next: summary, nowMs: now, lastAppendMs: lastRunProgressPersistedAt || undefined, minIntervalMs: 1000, force });
|
||||
if (decision.shouldAppend) {
|
||||
appendEvent(manifest.eventsPath, { type: "task.progress", runId: manifest.runId, taskId: task.id, data: { ...summary, coalesceReason: decision.reason } });
|
||||
lastRunProgressSummary = summary;
|
||||
lastRunProgressPersistedAt = now;
|
||||
}
|
||||
};
|
||||
for (let i = 0; i < attemptModels.length; i++) {
|
||||
const model = attemptModels[i];
|
||||
const attemptStartedAt = new Date();
|
||||
const pendingAttempt: ModelAttemptSummary = { model: model ?? "default", success: false };
|
||||
task = { ...task, modelAttempts: [...modelAttempts, pendingAttempt] };
|
||||
tasks = updateTask(tasks, task);
|
||||
upsertCrewAgent(manifest, recordFromTask(manifest, task, "child-process"));
|
||||
const childResult = await runChildPi({
|
||||
cwd: task.cwd,
|
||||
task: prompt,
|
||||
agent: input.agent,
|
||||
model,
|
||||
signal: input.signal,
|
||||
transcriptPath,
|
||||
maxDepth: input.limits?.maxTaskDepth,
|
||||
skillPaths,
|
||||
onSpawn: (pid) => {
|
||||
({ task, tasks } = checkpointTask(manifest, tasks, task, "child-spawned", pid));
|
||||
},
|
||||
onStdoutLine: (line) => {
|
||||
appendCrewAgentOutput(manifest, task.id, line);
|
||||
persistHeartbeat();
|
||||
// Check for supervisor contact requests from child Pi
|
||||
const contact = parseSupervisorContactFromLine(line);
|
||||
if (contact) {
|
||||
recordSupervisorContact(manifest, { runId: manifest.runId, ...contact });
|
||||
}
|
||||
},
|
||||
onJsonEvent: (event) => {
|
||||
appendCrewAgentEvent(manifest, task.id, event);
|
||||
persistHeartbeat();
|
||||
task = { ...task, agentProgress: applyAgentProgressEvent(task.agentProgress ?? emptyCrewAgentProgress(), event, task.startedAt) };
|
||||
tasks = updateTask(tasks, task);
|
||||
// Feed overflow recovery tracker
|
||||
if (input.onJsonEvent) {
|
||||
try {
|
||||
input.onJsonEvent(task.id, manifest.runId, event);
|
||||
} catch { /* overflow tracking errors should not affect task */ }
|
||||
}
|
||||
if (!finalCheckpointWritten && isFinalChildEvent(event)) {
|
||||
finalCheckpointWritten = true;
|
||||
({ task, tasks } = checkpointTask(manifest, tasks, task, "child-stdout-final"));
|
||||
}
|
||||
persistChildProgress(event);
|
||||
},
|
||||
});
|
||||
const evidenceStatus = childResult.exitStatus?.cancelled ? "cancelled" : childResult.error || (childResult.exitCode && childResult.exitCode !== 0) ? "failed" : "completed";
|
||||
terminalEvidence = [...terminalEvidence, { operation: "worker", status: evidenceStatus, startedAt: attemptStartedAt.toISOString(), finishedAt: new Date().toISOString(), ...(input.signal?.aborted ? { reason: cancellationReasonFromSignal(input.signal) } : {}), ...(childResult.exitStatus ? { exitStatus: childResult.exitStatus } : {}) }];
|
||||
if (evidenceStatus === "cancelled") appendEvent(manifest.eventsPath, { type: "worker.cancelled", runId: manifest.runId, taskId: task.id, message: input.signal?.aborted ? cancellationReasonFromSignal(input.signal).message : "Worker cancelled.", data: { terminalEvidence: terminalEvidence.at(-1) } });
|
||||
startupEvidence = createStartupEvidence({ command: "pi", startedAt: attemptStartedAt, finishedAt: new Date(), promptSentAt: attemptStartedAt, promptAccepted: childResult.exitCode === 0 && !childResult.error, stderr: childResult.stderr, error: childResult.error, exitCode: childResult.exitCode });
|
||||
exitCode = childResult.exitCode;
|
||||
finalStdout = childResult.stdout;
|
||||
finalStderr = childResult.stderr;
|
||||
parsedOutput = parsePiJsonOutput(fs.existsSync(transcriptPath) ? fs.readFileSync(transcriptPath, "utf-8") : childResult.stdout);
|
||||
error = childResult.error || (childResult.exitCode && childResult.exitCode !== 0 ? childResult.stderr || `Child Pi exited with ${childResult.exitCode}` : undefined);
|
||||
persistHeartbeat(true);
|
||||
persistChildProgress({ type: "attempt_finished" }, true);
|
||||
const attempt: ModelAttemptSummary = { model: model ?? "default", success: !error, exitCode, error };
|
||||
modelAttempts.push(attempt);
|
||||
task = { ...task, modelAttempts: [...modelAttempts] };
|
||||
tasks = updateTask(tasks, task);
|
||||
logs.push(`MODEL ATTEMPT ${i + 1}: ${attempt.model}`, `success=${attempt.success}`, `exitCode=${attempt.exitCode ?? "null"}`, attempt.error ? `error=${attempt.error}` : "", "");
|
||||
if (!error) break;
|
||||
const nextModel = attemptModels[i + 1];
|
||||
if (!nextModel || !isRetryableModelFailure(error)) break;
|
||||
logs.push(formatModelAttemptNote(attempt, nextModel), "");
|
||||
}
|
||||
resultArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "result",
|
||||
relativePath: `results/${task.id}.txt`,
|
||||
content: cleanResultText(parsedOutput?.finalText) ?? cleanResultText(finalStdout) ?? cleanResultText(finalStderr) ?? "(no output)",
|
||||
producer: task.id,
|
||||
});
|
||||
logArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "log",
|
||||
relativePath: `logs/${task.id}.log`,
|
||||
content: [...logs, `finalExitCode=${exitCode ?? "null"}`, `jsonEvents=${parsedOutput?.jsonEvents ?? 0}`, parsedOutput?.usage ? `usage=${JSON.stringify(parsedOutput.usage)}` : "", "", "STDOUT:", finalStdout, "", "STDERR:", finalStderr].join("\n"),
|
||||
producer: task.id,
|
||||
});
|
||||
const successfulAttemptIndex = modelAttempts.findIndex((attempt) => attempt.success);
|
||||
const usedAttempt = successfulAttemptIndex === -1 ? Math.max(0, modelAttempts.length - 1) : successfulAttemptIndex;
|
||||
const resolvedModel = modelAttempts[usedAttempt]?.model ?? candidates[0] ?? "default";
|
||||
const fallbackReason = usedAttempt > 0 ? modelAttempts[usedAttempt - 1]?.error : undefined;
|
||||
task = { ...task, modelRouting: { requested: modelRoutingPlan.requested, resolved: resolvedModel, fallbackChain: candidates, reason: fallbackReason ?? modelRoutingPlan.reason, usedAttempt } };
|
||||
tasks = updateTask(tasks, task);
|
||||
const sessionUsage = parseSessionUsage(transcriptPath);
|
||||
const effectiveUsage = parsedOutput?.usage ?? sessionUsage;
|
||||
if (effectiveUsage) {
|
||||
parsedOutput = { ...(parsedOutput ?? { jsonEvents: 0, textEvents: [] }), usage: effectiveUsage };
|
||||
task = { ...task, usage: effectiveUsage, agentProgress: applyUsageToProgress(task.agentProgress, effectiveUsage) };
|
||||
tasks = updateTask(tasks, task);
|
||||
upsertCrewAgent(manifest, recordFromTask(manifest, task, "child-process"));
|
||||
}
|
||||
if (fs.existsSync(transcriptPath)) {
|
||||
transcriptArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "log",
|
||||
relativePath: `transcripts/${task.id}.jsonl`,
|
||||
content: fs.readFileSync(transcriptPath, "utf-8"),
|
||||
producer: task.id,
|
||||
});
|
||||
}
|
||||
task = { ...task, resultArtifact, ...(logArtifact ? { logArtifact } : {}), ...(transcriptArtifact ? { transcriptArtifact } : {}) };
|
||||
tasks = updateTask(tasks, task);
|
||||
({ task, tasks } = checkpointTask(manifest, tasks, task, "artifact-written"));
|
||||
} else if (runtimeKind === "live-session") {
|
||||
const { runLiveTask } = await import("./task-runner/live-executor.ts");
|
||||
const live = await runLiveTask({ manifest, tasks, task, step: input.step, agent: input.agent, prompt, signal: input.signal, runtimeConfig: input.runtimeConfig, parentContext: input.parentContext, parentModel: input.parentModel, modelRegistry: input.modelRegistry, modelOverride: input.modelOverride, teamRoleModel: input.teamRoleModel });
|
||||
task = live.task;
|
||||
tasks = live.tasks;
|
||||
startupEvidence = live.startupEvidence;
|
||||
exitCode = live.exitCode;
|
||||
error = live.error;
|
||||
parsedOutput = live.parsedOutput;
|
||||
resultArtifact = live.resultArtifact;
|
||||
logArtifact = live.logArtifact;
|
||||
transcriptArtifact = live.transcriptArtifact;
|
||||
} else {
|
||||
resultArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "result",
|
||||
relativePath: `results/${task.id}.md`,
|
||||
content: [
|
||||
`# ${task.id}`,
|
||||
"",
|
||||
"Worker execution is disabled in this scaffold-safe run.",
|
||||
"The prompt artifact contains the exact task that will be sent to a child Pi worker when execution is enabled.",
|
||||
].join("\n"),
|
||||
producer: task.id,
|
||||
});
|
||||
}
|
||||
|
||||
const diffArtifact = workspace.worktreePath ? writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "diff",
|
||||
relativePath: `diffs/${task.id}.diff`,
|
||||
content: captureWorktreeDiff(workspace.worktreePath),
|
||||
producer: task.id,
|
||||
}) : undefined;
|
||||
const diffStatArtifact = workspace.worktreePath ? writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: `metadata/${task.id}.diff-stat.json`,
|
||||
content: `${JSON.stringify({ ...captureWorktreeDiffStat(workspace.worktreePath), syntheticPaths: workspace.syntheticPaths ?? [], nodeModulesLinked: workspace.nodeModulesLinked ?? false }, null, 2)}\n`,
|
||||
producer: task.id,
|
||||
}) : undefined;
|
||||
|
||||
const mutationGuardMode = input.runtimeConfig?.completionMutationGuard ?? "warn";
|
||||
const mutationGuard = !error && mutationGuardMode !== "off" ? evaluateCompletionMutationGuard({ role: task.role, taskText: `${task.title}\n${input.step.task}`, transcriptPath: runtimeKind === "child-process" ? transcriptPath : transcriptArtifact?.path, stdout: finalStdout }) : undefined;
|
||||
if (mutationGuard?.reason === "no_mutation_observed") {
|
||||
appendTaskAttentionEvent({
|
||||
manifest,
|
||||
taskId: task.id,
|
||||
message: "Implementation-style task completed without an observed mutation tool call.",
|
||||
data: { activityState: "needs_attention", reason: "completion_guard", taskId: task.id, agentName: task.agent, observedTools: mutationGuard.observedTools, suggestedAction: mutationGuardMode === "fail" ? "Review the worker output and rerun with a concrete implementation task." : "Review the worker output; set runtime.completionMutationGuard='fail' to enforce this." },
|
||||
});
|
||||
task = { ...task, agentProgress: { ...(task.agentProgress ?? emptyCrewAgentProgress()), activityState: "needs_attention" } };
|
||||
if (mutationGuardMode === "fail") {
|
||||
error = "Completion mutation guard failed: implementation-style task completed without an observed mutation tool call.";
|
||||
exitCode = exitCode === 0 ? 1 : exitCode;
|
||||
if (modelAttempts?.length) {
|
||||
modelAttempts = modelAttempts.map((attempt, index) => index === modelAttempts!.length - 1 ? { ...attempt, success: false, exitCode, error } : attempt);
|
||||
}
|
||||
}
|
||||
tasks = updateTask(tasks, task);
|
||||
}
|
||||
|
||||
task = {
|
||||
...task,
|
||||
status: error ? "failed" : "completed",
|
||||
finishedAt: new Date().toISOString(),
|
||||
exitCode,
|
||||
modelAttempts,
|
||||
usage: parsedOutput?.usage,
|
||||
jsonEvents: parsedOutput?.jsonEvents,
|
||||
agentProgress: error && task.agentProgress?.currentTool ? { ...task.agentProgress, failedTool: task.agentProgress.currentTool } : task.agentProgress,
|
||||
error,
|
||||
verification: createVerificationEvidence(taskPacket.verification, !error, error ? `Task failed: ${error}` : runtimeKind === "scaffold" ? "Safe scaffold mode; verification commands were not executed." : `${runtimeKind} worker finished without reporting a verification failure.`),
|
||||
promptArtifact,
|
||||
resultArtifact,
|
||||
claim: undefined,
|
||||
heartbeat: touchWorkerHeartbeat(task.heartbeat ?? createWorkerHeartbeat(task.id), { alive: false }),
|
||||
workerExitStatus: terminalEvidence.at(-1)?.exitStatus,
|
||||
terminalEvidence: terminalEvidence.length ? [...(task.terminalEvidence ?? []), ...terminalEvidence] : task.terminalEvidence,
|
||||
...(logArtifact ? { logArtifact } : {}),
|
||||
...(transcriptArtifact ? { transcriptArtifact } : {}),
|
||||
};
|
||||
tasks = updateTask(tasks, task);
|
||||
const packetArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: `metadata/${task.id}.task-packet.json`,
|
||||
content: `${JSON.stringify(task.taskPacket, null, 2)}\n`,
|
||||
producer: task.id,
|
||||
});
|
||||
const verificationArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: `metadata/${task.id}.verification.json`,
|
||||
content: `${JSON.stringify(task.verification, null, 2)}\n`,
|
||||
producer: task.id,
|
||||
});
|
||||
const sharedOutputArtifact = writeTaskSharedOutput(manifest, input.step, task);
|
||||
const startupArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: `metadata/${task.id}.startup-evidence.json`,
|
||||
content: `${JSON.stringify(startupEvidence, null, 2)}\n`,
|
||||
producer: task.id,
|
||||
});
|
||||
const permissionArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: `metadata/${task.id}.permission.json`,
|
||||
content: `${JSON.stringify({ role: task.role, permissionMode }, null, 2)}\n`,
|
||||
producer: task.id,
|
||||
});
|
||||
const capabilityArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: `metadata/${task.id}.capabilities.json`,
|
||||
content: `${JSON.stringify(buildWorkerCapabilityInventory({ taskId: task.id, role: task.role, agent: input.agent, runtime: runtimeKind, permissionMode, skillNames, skillPaths, skillsDisabled: input.skillOverride === false || input.teamRoleSkills === false, modelOverride: input.modelOverride, teamRoleModel: input.teamRoleModel, stepModel: input.step.model }), null, 2)}\n`,
|
||||
producer: task.id,
|
||||
});
|
||||
const promptPipelineArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: `metadata/${task.id}.prompt-pipeline.json`,
|
||||
content: `${JSON.stringify(buildWorkerPromptPipeline({ artifactsRoot: manifest.artifactsRoot, taskId: task.id, promptArtifact, inputsArtifact, skillArtifact, capabilityArtifact, coordinationArtifact, skillInstructionCount: skillNames?.length ?? 0, skillsDisabled: input.skillOverride === false || input.teamRoleSkills === false }), null, 2)}\n`,
|
||||
producer: task.id,
|
||||
});
|
||||
manifest = { ...manifest, updatedAt: new Date().toISOString(), artifacts: [...manifest.artifacts, promptArtifact, resultArtifact, inputsArtifact, coordinationArtifact, ...(skillArtifact ? [skillArtifact] : []), packetArtifact, verificationArtifact, startupArtifact, permissionArtifact, capabilityArtifact, promptPipelineArtifact, ...(sharedOutputArtifact ? [sharedOutputArtifact] : []), ...(logArtifact ? [logArtifact] : []), ...(transcriptArtifact ? [transcriptArtifact] : []), ...(diffArtifact ? [diffArtifact] : []), ...(diffStatArtifact ? [diffStatArtifact] : [])] };
|
||||
saveRunManifest(manifest);
|
||||
tasks = persistSingleTaskUpdate(manifest, tasks, task);
|
||||
upsertCrewAgent(manifest, recordFromTask(manifest, task, runtimeKind));
|
||||
appendEvent(manifest.eventsPath, { type: error ? "task.failed" : "task.completed", runId: manifest.runId, taskId: task.id, message: error });
|
||||
return { manifest, tasks };
|
||||
}
|
||||
78
extensions/pi-crew/src/runtime/task-runner/capabilities.ts
Normal file
78
extensions/pi-crew/src/runtime/task-runner/capabilities.ts
Normal file
@@ -0,0 +1,78 @@
|
||||
import type { AgentConfig } from "../../agents/agent-config.ts";
|
||||
import type { CrewRuntimeKind } from "../crew-agent-runtime.ts";
|
||||
|
||||
export interface WorkerCapabilityInventory {
|
||||
schemaVersion: 1;
|
||||
taskId: string;
|
||||
role: string;
|
||||
agent: string;
|
||||
runtime: CrewRuntimeKind;
|
||||
permissionMode: string;
|
||||
tools: string[];
|
||||
extensions: string[];
|
||||
skills: {
|
||||
names: string[];
|
||||
paths: string[];
|
||||
disabled: boolean;
|
||||
};
|
||||
model: {
|
||||
requested?: string;
|
||||
agentDefault?: string;
|
||||
fallbacks: string[];
|
||||
teamRole?: string;
|
||||
step?: string;
|
||||
};
|
||||
inheritance: {
|
||||
projectContext: boolean;
|
||||
skills: boolean;
|
||||
systemPromptMode: "replace" | "append";
|
||||
};
|
||||
}
|
||||
|
||||
export interface BuildWorkerCapabilityInventoryInput {
|
||||
taskId: string;
|
||||
role: string;
|
||||
agent: AgentConfig;
|
||||
runtime: CrewRuntimeKind;
|
||||
permissionMode: string;
|
||||
skillNames?: string[];
|
||||
skillPaths?: string[];
|
||||
skillsDisabled: boolean;
|
||||
modelOverride?: string;
|
||||
teamRoleModel?: string;
|
||||
stepModel?: string;
|
||||
}
|
||||
|
||||
function uniqueSorted(values: readonly string[] | undefined): string[] {
|
||||
return [...new Set((values ?? []).map((value) => value.trim()).filter(Boolean))].sort((a, b) => a.localeCompare(b));
|
||||
}
|
||||
|
||||
export function buildWorkerCapabilityInventory(input: BuildWorkerCapabilityInventoryInput): WorkerCapabilityInventory {
|
||||
return {
|
||||
schemaVersion: 1,
|
||||
taskId: input.taskId,
|
||||
role: input.role,
|
||||
agent: input.agent.name,
|
||||
runtime: input.runtime,
|
||||
permissionMode: input.permissionMode,
|
||||
tools: uniqueSorted(input.agent.tools),
|
||||
extensions: uniqueSorted(input.agent.extensions),
|
||||
skills: {
|
||||
names: uniqueSorted(input.skillNames),
|
||||
paths: uniqueSorted(input.skillPaths),
|
||||
disabled: input.skillsDisabled,
|
||||
},
|
||||
model: {
|
||||
requested: input.modelOverride,
|
||||
agentDefault: input.agent.model,
|
||||
fallbacks: uniqueSorted(input.agent.fallbackModels),
|
||||
teamRole: input.teamRoleModel,
|
||||
step: input.stepModel,
|
||||
},
|
||||
inheritance: {
|
||||
projectContext: input.agent.inheritProjectContext === true,
|
||||
skills: input.agent.inheritSkills === true,
|
||||
systemPromptMode: input.agent.systemPromptMode ?? "replace",
|
||||
},
|
||||
};
|
||||
}
|
||||
105
extensions/pi-crew/src/runtime/task-runner/live-executor.ts
Normal file
105
extensions/pi-crew/src/runtime/task-runner/live-executor.ts
Normal file
@@ -0,0 +1,105 @@
|
||||
import * as fs from "node:fs";
|
||||
import type { AgentConfig } from "../../agents/agent-config.ts";
|
||||
import type { CrewRuntimeConfig } from "../../config/config.ts";
|
||||
import { writeArtifact } from "../../state/artifact-store.ts";
|
||||
import { appendEvent } from "../../state/event-log.ts";
|
||||
import type { ArtifactDescriptor, TeamRunManifest, TeamTaskState } from "../../state/types.ts";
|
||||
import type { WorkflowStep } from "../../workflows/workflow-config.ts";
|
||||
import { appendCrewAgentEvent, appendCrewAgentOutput, emptyCrewAgentProgress, recordFromTask, upsertCrewAgent } from "../crew-agent-records.ts";
|
||||
import { createStartupEvidence, type WorkerStartupEvidence } from "../worker-startup.ts";
|
||||
import { runLiveSessionTask } from "../live-session-runtime.ts";
|
||||
import { shouldAppendProgressEventUpdate, type ProgressEventSummary } from "../progress-event-coalescer.ts";
|
||||
import { applyAgentProgressEvent, applyUsageToProgress, progressEventSummary, shouldFlushProgressEvent } from "./progress.ts";
|
||||
import type { ParsedPiJsonOutput } from "../pi-json-output.ts";
|
||||
|
||||
export interface RunLiveTaskInput {
|
||||
manifest: TeamRunManifest;
|
||||
tasks: TeamTaskState[];
|
||||
task: TeamTaskState;
|
||||
step: WorkflowStep;
|
||||
agent: AgentConfig;
|
||||
prompt: string;
|
||||
signal?: AbortSignal;
|
||||
runtimeConfig?: CrewRuntimeConfig;
|
||||
parentContext?: string;
|
||||
parentModel?: unknown;
|
||||
modelRegistry?: unknown;
|
||||
modelOverride?: string;
|
||||
teamRoleModel?: string;
|
||||
isCurrent?: () => boolean;
|
||||
}
|
||||
|
||||
export interface RunLiveTaskOutput {
|
||||
task: TeamTaskState;
|
||||
tasks: TeamTaskState[];
|
||||
startupEvidence: WorkerStartupEvidence;
|
||||
exitCode: number | null;
|
||||
error?: string;
|
||||
parsedOutput?: ParsedPiJsonOutput;
|
||||
resultArtifact: ArtifactDescriptor;
|
||||
logArtifact?: ArtifactDescriptor;
|
||||
transcriptArtifact?: ArtifactDescriptor;
|
||||
}
|
||||
|
||||
function updateTask(tasks: TeamTaskState[], updated: TeamTaskState): TeamTaskState[] {
|
||||
return tasks.map((task) => task.id === updated.id ? updated : task);
|
||||
}
|
||||
|
||||
export async function runLiveTask(input: RunLiveTaskInput): Promise<RunLiveTaskOutput> {
|
||||
const { manifest, step, agent, prompt } = input;
|
||||
let task = input.task;
|
||||
let tasks = input.tasks;
|
||||
const transcriptPath = `${manifest.artifactsRoot}/transcripts/${task.id}.jsonl`;
|
||||
let lastAgentRecordPersistedAt = 0;
|
||||
let lastRunProgressPersistedAt = 0;
|
||||
let lastRunProgressSummary: ProgressEventSummary | undefined;
|
||||
const persistLiveProgress = (event: unknown, force = false): void => {
|
||||
const now = Date.now();
|
||||
if (force || shouldFlushProgressEvent(event) || now - lastAgentRecordPersistedAt >= 500) {
|
||||
upsertCrewAgent(manifest, recordFromTask(manifest, task, "live-session"));
|
||||
lastAgentRecordPersistedAt = now;
|
||||
}
|
||||
const summary = progressEventSummary(task, event);
|
||||
const decision = shouldAppendProgressEventUpdate({ previous: lastRunProgressSummary, next: summary, nowMs: now, lastAppendMs: lastRunProgressPersistedAt || undefined, minIntervalMs: 1000, force });
|
||||
if (decision.shouldAppend) {
|
||||
appendEvent(manifest.eventsPath, { type: "task.progress", runId: manifest.runId, taskId: task.id, data: { ...summary, coalesceReason: decision.reason } });
|
||||
lastRunProgressSummary = summary;
|
||||
lastRunProgressPersistedAt = now;
|
||||
}
|
||||
};
|
||||
const attemptStartedAt = new Date();
|
||||
const isCurrent = input.isCurrent ?? (() => input.signal?.aborted !== true);
|
||||
const liveResult = await runLiveSessionTask({
|
||||
manifest,
|
||||
task,
|
||||
step,
|
||||
agent,
|
||||
prompt,
|
||||
signal: input.signal,
|
||||
transcriptPath,
|
||||
runtimeConfig: input.runtimeConfig,
|
||||
parentContext: input.parentContext,
|
||||
parentModel: input.parentModel,
|
||||
modelRegistry: input.modelRegistry,
|
||||
modelOverride: input.modelOverride,
|
||||
teamRoleModel: input.teamRoleModel,
|
||||
isCurrent,
|
||||
onOutput: (text) => appendCrewAgentOutput(manifest, task.id, text),
|
||||
onEvent: (event) => {
|
||||
appendCrewAgentEvent(manifest, task.id, event);
|
||||
task = { ...task, agentProgress: applyAgentProgressEvent(task.agentProgress ?? emptyCrewAgentProgress(), event, task.startedAt) };
|
||||
tasks = updateTask(tasks, task);
|
||||
persistLiveProgress(event);
|
||||
},
|
||||
});
|
||||
const startupEvidence = createStartupEvidence({ command: "live-session", startedAt: attemptStartedAt, finishedAt: new Date(), promptSentAt: attemptStartedAt, promptAccepted: liveResult.exitCode === 0 && !liveResult.error, stderr: liveResult.stderr, error: liveResult.error, exitCode: liveResult.exitCode });
|
||||
const exitCode = liveResult.exitCode;
|
||||
const error = liveResult.error || (liveResult.exitCode && liveResult.exitCode !== 0 ? liveResult.stderr || `Live session exited with ${liveResult.exitCode}` : undefined);
|
||||
const parsedOutput = { finalText: liveResult.stdout, textEvents: liveResult.stdout ? [liveResult.stdout] : [], jsonEvents: liveResult.jsonEvents, usage: liveResult.usage };
|
||||
if (liveResult.usage) task = { ...task, usage: liveResult.usage, agentProgress: applyUsageToProgress(task.agentProgress, liveResult.usage) };
|
||||
persistLiveProgress({ type: "attempt_finished" }, true);
|
||||
const resultArtifact = writeArtifact(manifest.artifactsRoot, { kind: "result", relativePath: `results/${task.id}.txt`, content: liveResult.stdout || liveResult.stderr || "(no output)", producer: task.id });
|
||||
const logArtifact = writeArtifact(manifest.artifactsRoot, { kind: "log", relativePath: `logs/${task.id}.log`, content: [`runtime=live-session`, `finalExitCode=${exitCode ?? "null"}`, `jsonEvents=${liveResult.jsonEvents}`, liveResult.usage ? `usage=${JSON.stringify(liveResult.usage)}` : "", "", "STDOUT:", liveResult.stdout, "", "STDERR:", liveResult.stderr].join("\n"), producer: task.id });
|
||||
const transcriptArtifact = fs.existsSync(transcriptPath) ? writeArtifact(manifest.artifactsRoot, { kind: "log", relativePath: `transcripts/${task.id}.jsonl`, content: fs.readFileSync(transcriptPath, "utf-8"), producer: task.id }) : undefined;
|
||||
return { task, tasks, startupEvidence, exitCode, error: error || undefined, parsedOutput, resultArtifact, logArtifact, transcriptArtifact };
|
||||
}
|
||||
119
extensions/pi-crew/src/runtime/task-runner/progress.ts
Normal file
119
extensions/pi-crew/src/runtime/task-runner/progress.ts
Normal file
@@ -0,0 +1,119 @@
|
||||
import type { UsageState } from "../../state/types.ts";
|
||||
import type { CrewAgentProgress } from "../crew-agent-runtime.ts";
|
||||
import { emptyCrewAgentProgress } from "../crew-agent-records.ts";
|
||||
import type { ProgressEventSummary } from "../progress-event-coalescer.ts";
|
||||
import type { TeamTaskState } from "../../state/types.ts";
|
||||
|
||||
function asRecord(value: unknown): Record<string, unknown> | undefined {
|
||||
return value && typeof value === "object" && !Array.isArray(value) ? value as Record<string, unknown> : undefined;
|
||||
}
|
||||
|
||||
function safeNum(v: number | undefined): number {
|
||||
return Number.isFinite(v) ? v! : 0;
|
||||
}
|
||||
|
||||
function textFromContent(content: unknown): string[] {
|
||||
if (typeof content === "string") return [content];
|
||||
if (!Array.isArray(content)) return [];
|
||||
const text: string[] = [];
|
||||
for (const part of content) {
|
||||
const obj = asRecord(part);
|
||||
if (!obj) continue;
|
||||
if (obj.type === "text" && typeof obj.text === "string") text.push(obj.text);
|
||||
else if (typeof obj.content === "string") text.push(obj.content);
|
||||
}
|
||||
return text;
|
||||
}
|
||||
|
||||
function eventText(event: unknown): string[] {
|
||||
const obj = asRecord(event);
|
||||
if (!obj) return [];
|
||||
const text: string[] = [];
|
||||
if (typeof obj.text === "string") text.push(obj.text);
|
||||
if (typeof obj.output === "string") text.push(obj.output);
|
||||
text.push(...textFromContent(obj.content));
|
||||
const message = asRecord(obj.message);
|
||||
if (message) text.push(...textFromContent(message.content));
|
||||
return text.filter((entry) => entry.trim());
|
||||
}
|
||||
|
||||
function numberField(obj: Record<string, unknown>, keys: string[]): number | undefined {
|
||||
for (const key of keys) {
|
||||
const value = obj[key];
|
||||
if (typeof value === "number" && Number.isFinite(value)) return value;
|
||||
}
|
||||
return undefined;
|
||||
}
|
||||
|
||||
function eventUsage(event: unknown): { input?: number; output?: number; turns?: number } | undefined {
|
||||
const obj = asRecord(event);
|
||||
if (!obj) return undefined;
|
||||
const direct = { input: numberField(obj, ["input", "inputTokens", "input_tokens"]), output: numberField(obj, ["output", "outputTokens", "output_tokens"]), turns: numberField(obj, ["turns", "turnCount", "turn_count"]) };
|
||||
if (Object.values(direct).some((value) => value !== undefined)) return direct;
|
||||
for (const key of ["usage", "tokenUsage", "tokens", "stats"]) {
|
||||
const nested = eventUsage(obj[key]);
|
||||
if (nested) return nested;
|
||||
}
|
||||
const message = asRecord(obj.message);
|
||||
return message ? eventUsage(message.usage) : undefined;
|
||||
}
|
||||
|
||||
function previewArgs(args: unknown): string | undefined {
|
||||
if (!args) return undefined;
|
||||
try {
|
||||
const text = typeof args === "string" ? args : JSON.stringify(args);
|
||||
return text.length > 240 ? `${text.slice(0, 240)}…` : text;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export function applyUsageToProgress(progress: CrewAgentProgress | undefined, usage: UsageState | undefined): CrewAgentProgress | undefined {
|
||||
if (!usage) return progress;
|
||||
const base = progress ?? emptyCrewAgentProgress();
|
||||
const tokens = safeNum(usage.input) + safeNum(usage.output) + safeNum(usage.cacheRead) + safeNum(usage.cacheWrite);
|
||||
return { ...base, tokens, turns: usage.turns ?? base.turns };
|
||||
}
|
||||
|
||||
export function shouldFlushProgressEvent(event: unknown): boolean {
|
||||
const type = asRecord(event)?.type;
|
||||
return type === "tool_execution_start" || type === "tool_execution_end" || type === "message_end" || type === "tool_result_end";
|
||||
}
|
||||
|
||||
export function progressEventSummary(task: TeamTaskState, event: unknown): ProgressEventSummary {
|
||||
const type = asRecord(event)?.type;
|
||||
return { eventType: typeof type === "string" ? type : "event", currentTool: task.agentProgress?.currentTool, toolCount: task.agentProgress?.toolCount, tokens: task.agentProgress?.tokens, turns: task.agentProgress?.turns, activityState: task.agentProgress?.activityState, lastActivityAt: task.agentProgress?.lastActivityAt };
|
||||
}
|
||||
|
||||
export function applyAgentProgressEvent(progress: CrewAgentProgress, event: unknown, startedAt: string | undefined): CrewAgentProgress {
|
||||
const obj = asRecord(event);
|
||||
const now = new Date().toISOString();
|
||||
const next: CrewAgentProgress = { ...progress, recentTools: [...progress.recentTools], recentOutput: [...progress.recentOutput], lastActivityAt: now, activityState: "active" };
|
||||
if (startedAt) {
|
||||
const startMs = new Date(startedAt).getTime();
|
||||
next.durationMs = Number.isFinite(startMs) ? Date.now() - startMs : undefined;
|
||||
}
|
||||
if (obj?.type === "tool_execution_start") {
|
||||
next.toolCount += 1;
|
||||
next.currentTool = typeof obj.toolName === "string" ? obj.toolName : typeof obj.name === "string" ? obj.name : "tool";
|
||||
next.currentToolArgs = previewArgs(obj.args);
|
||||
next.currentToolStartedAt = now;
|
||||
}
|
||||
if (obj?.type === "tool_execution_end") {
|
||||
if (next.currentTool) next.recentTools.push({ tool: next.currentTool, args: next.currentToolArgs, endedAt: now });
|
||||
next.currentTool = undefined;
|
||||
next.currentToolArgs = undefined;
|
||||
next.currentToolStartedAt = undefined;
|
||||
}
|
||||
if ((obj?.type === "tool_execution_error" || obj?.type === "tool_execution_failed") && next.currentTool) next.failedTool = next.currentTool;
|
||||
const usage = eventUsage(event);
|
||||
if (usage) {
|
||||
next.tokens = safeNum(usage.input) + safeNum(usage.output);
|
||||
next.turns = usage.turns ?? next.turns;
|
||||
}
|
||||
const text = eventText(event);
|
||||
if (text.length > 0) next.recentOutput.push(...text.flatMap((entry) => entry.split(/\r?\n/)).filter(Boolean).slice(-10));
|
||||
if (next.recentTools.length > 25) next.recentTools.splice(0, next.recentTools.length - 25);
|
||||
if (next.recentOutput.length > 50) next.recentOutput.splice(0, next.recentOutput.length - 50);
|
||||
return next;
|
||||
}
|
||||
77
extensions/pi-crew/src/runtime/task-runner/prompt-builder.ts
Normal file
77
extensions/pi-crew/src/runtime/task-runner/prompt-builder.ts
Normal file
@@ -0,0 +1,77 @@
|
||||
import type { AgentConfig } from "../../agents/agent-config.ts";
|
||||
import type { TeamRunManifest, TeamTaskState } from "../../state/types.ts";
|
||||
import type { WorkflowStep } from "../../workflows/workflow-config.ts";
|
||||
import { buildMemoryBlock } from "../agent-memory.ts";
|
||||
import { permissionForRole } from "../role-permission.ts";
|
||||
import { renderTaskPacket } from "../task-packet.ts";
|
||||
|
||||
function readOnlyRoleInstructions(role: string): string {
|
||||
if (permissionForRole(role) !== "read_only") return "";
|
||||
return [
|
||||
"# READ-ONLY ROLE CONTRACT",
|
||||
"You are running in READ-ONLY mode for this task.",
|
||||
"- Do not create, modify, delete, move, or copy files.",
|
||||
"- Do not use shell redirects, heredocs, in-place edits, package installs, git commit/merge/rebase/reset/checkout, or other state-mutating commands.",
|
||||
"- If implementation changes are needed, report exact recommendations instead of applying them.",
|
||||
"- Prefer read/grep/find/listing tools and read-only git inspection commands.",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
export function coordinationBridgeInstructions(task: TeamTaskState): string {
|
||||
return [
|
||||
"# Crew Coordination Channel",
|
||||
`Mailbox target for this task: ${task.id}`,
|
||||
"Use the run mailbox contract for coordination with the leader/orchestrator:",
|
||||
"- If blocked or uncertain, report the blocker in your final result and, when mailbox tools/API are available, send an inbox/outbox message addressed to the leader.",
|
||||
"- Ask the leader before editing when scope is ambiguous, requirements conflict, destructive action is needed, or you discover likely overlap with another task.",
|
||||
"- Before making non-trivial edits, state intended changed files in your notes/result; if another worker may touch the same file/symbol, pause and request sequencing/ownership guidance.",
|
||||
"- Do not resolve cross-worker conflicts silently. Escalate via mailbox/result with: file/symbol, conflicting task if known, proposed owner, and safest next step.",
|
||||
"- If nudged, answer with current status, blocker, or smallest next step.",
|
||||
"- Treat inherited/dependency context as reference-only; do not continue the parent conversation directly.",
|
||||
"- Completion handoff should include: DONE/FAILED, summary, changed/read files, verification evidence, and remaining risks.",
|
||||
].join("\n");
|
||||
}
|
||||
|
||||
function inputDependencyContext(task: TeamTaskState): string {
|
||||
return (task as TeamTaskState & { dependencyContextText?: string }).dependencyContextText ?? "";
|
||||
}
|
||||
|
||||
export function renderTaskPrompt(manifest: TeamRunManifest, step: WorkflowStep, task: TeamTaskState, agent?: AgentConfig, skillBlock = ""): string {
|
||||
const memoryBlock = agent?.memory ? buildMemoryBlock(agent.name, agent.memory, task.cwd, Boolean(agent.tools?.some((tool) => tool === "write" || tool === "edit"))) : "";
|
||||
return [
|
||||
"# pi-crew Worker Runtime Context",
|
||||
`Run ID: ${manifest.runId}`,
|
||||
`Team: ${manifest.team}`,
|
||||
`Workflow: ${manifest.workflow ?? "(none)"}`,
|
||||
`State root: ${manifest.stateRoot}`,
|
||||
`Artifacts root: ${manifest.artifactsRoot}`,
|
||||
`Events path: ${manifest.eventsPath}`,
|
||||
`Task ID: ${task.id}`,
|
||||
`Task cwd: ${task.cwd}`,
|
||||
`Workspace mode: ${manifest.workspaceMode}`,
|
||||
"",
|
||||
`Goal:\n${manifest.goal}`,
|
||||
"",
|
||||
`Step: ${step.id}`,
|
||||
`Role: ${step.role}`,
|
||||
"",
|
||||
"Protocol:",
|
||||
"- Stay within the task scope unless the prompt explicitly says otherwise.",
|
||||
"- Report blockers and verification evidence in the final result.",
|
||||
"- Do not claim completion without evidence.",
|
||||
"- Follow the Task Packet contract below; escalate if any contract field is impossible to satisfy.",
|
||||
"",
|
||||
readOnlyRoleInstructions(task.role),
|
||||
"",
|
||||
coordinationBridgeInstructions(task),
|
||||
"",
|
||||
skillBlock,
|
||||
"",
|
||||
task.taskPacket ? renderTaskPacket(task.taskPacket) : "",
|
||||
"",
|
||||
(inputDependencyContext(task) || ""),
|
||||
memoryBlock,
|
||||
"Task:",
|
||||
step.task.replaceAll("{goal}", manifest.goal),
|
||||
].join("\n");
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
import * as path from "node:path";
|
||||
import type { ArtifactDescriptor } from "../../state/types.ts";
|
||||
|
||||
export type WorkerPromptPipelineStageName =
|
||||
| "task-packet-built"
|
||||
| "dependency-context-collected"
|
||||
| "skills-rendered-or-disabled"
|
||||
| "capability-inventory-recorded"
|
||||
| "coordination-bridge-attached"
|
||||
| "prompt-rendered"
|
||||
| "prompt-artifact-written";
|
||||
|
||||
export interface WorkerPromptPipelineStage {
|
||||
name: WorkerPromptPipelineStageName;
|
||||
references: string[];
|
||||
details?: Record<string, string | number | boolean>;
|
||||
}
|
||||
|
||||
export interface WorkerPromptPipelineArtifact {
|
||||
schemaVersion: 1;
|
||||
taskId: string;
|
||||
stages: WorkerPromptPipelineStage[];
|
||||
}
|
||||
|
||||
function artifactReference(artifactsRoot: string, artifact?: ArtifactDescriptor): string | undefined {
|
||||
if (!artifact) return undefined;
|
||||
const root = path.resolve(artifactsRoot);
|
||||
const target = path.resolve(artifact.path);
|
||||
const relative = path.relative(root, target);
|
||||
if (!relative || relative.startsWith("..") || path.isAbsolute(relative)) return undefined;
|
||||
return relative.replaceAll("\\", "/");
|
||||
}
|
||||
|
||||
export interface BuildWorkerPromptPipelineInput {
|
||||
artifactsRoot: string;
|
||||
taskId: string;
|
||||
promptArtifact: ArtifactDescriptor;
|
||||
inputsArtifact: ArtifactDescriptor;
|
||||
skillArtifact?: ArtifactDescriptor;
|
||||
capabilityArtifact: ArtifactDescriptor;
|
||||
coordinationArtifact: ArtifactDescriptor;
|
||||
skillInstructionCount: number;
|
||||
skillsDisabled: boolean;
|
||||
}
|
||||
|
||||
export function buildWorkerPromptPipeline(input: BuildWorkerPromptPipelineInput): WorkerPromptPipelineArtifact {
|
||||
return {
|
||||
schemaVersion: 1,
|
||||
taskId: input.taskId,
|
||||
stages: [
|
||||
{ name: "task-packet-built", references: [`metadata/${input.taskId}.task-packet.json`] },
|
||||
{ name: "dependency-context-collected", references: [artifactReference(input.artifactsRoot, input.inputsArtifact) ?? `metadata/${input.taskId}.inputs.json`] },
|
||||
{
|
||||
name: "skills-rendered-or-disabled",
|
||||
references: input.skillArtifact ? [artifactReference(input.artifactsRoot, input.skillArtifact) ?? `metadata/${input.taskId}.skills.md`] : [],
|
||||
details: { disabled: input.skillsDisabled, skillInstructionCount: input.skillInstructionCount },
|
||||
},
|
||||
{ name: "capability-inventory-recorded", references: [artifactReference(input.artifactsRoot, input.capabilityArtifact) ?? `metadata/${input.taskId}.capabilities.json`] },
|
||||
{ name: "coordination-bridge-attached", references: [artifactReference(input.artifactsRoot, input.coordinationArtifact) ?? `metadata/${input.taskId}.coordination-bridge.md`] },
|
||||
{ name: "prompt-rendered", references: [] },
|
||||
{ name: "prompt-artifact-written", references: [artifactReference(input.artifactsRoot, input.promptArtifact) ?? `prompts/${input.taskId}.md`] },
|
||||
],
|
||||
};
|
||||
}
|
||||
14
extensions/pi-crew/src/runtime/task-runner/result-utils.ts
Normal file
14
extensions/pi-crew/src/runtime/task-runner/result-utils.ts
Normal file
@@ -0,0 +1,14 @@
|
||||
export function cleanResultText(text: string | undefined): string | undefined {
|
||||
const trimmed = text?.trim();
|
||||
if (!trimmed) return undefined;
|
||||
const doneIndex = trimmed.lastIndexOf("\nDONE\n");
|
||||
if (doneIndex >= 0) return trimmed.slice(doneIndex + 1).trim();
|
||||
if (trimmed === "DONE" || trimmed.startsWith("DONE\n")) return trimmed;
|
||||
const fencedPromptIndex = trimmed.lastIndexOf("</file>");
|
||||
if (fencedPromptIndex >= 0 && fencedPromptIndex < trimmed.length - 7) return trimmed.slice(fencedPromptIndex + 7).trim() || trimmed;
|
||||
return trimmed;
|
||||
}
|
||||
|
||||
export function isFinalChildEvent(event: unknown): boolean {
|
||||
return Boolean(event && typeof event === "object" && !Array.isArray(event) && (event as Record<string, unknown>).type === "message_end");
|
||||
}
|
||||
22
extensions/pi-crew/src/runtime/task-runner/state-helpers.ts
Normal file
22
extensions/pi-crew/src/runtime/task-runner/state-helpers.ts
Normal file
@@ -0,0 +1,22 @@
|
||||
import type { TaskCheckpointState, TeamRunManifest, TeamTaskState } from "../../state/types.ts";
|
||||
import { loadRunManifestById, saveRunTasks } from "../../state/state-store.ts";
|
||||
import { recordFromTask, upsertCrewAgent } from "../crew-agent-records.ts";
|
||||
|
||||
export function updateTask(tasks: TeamTaskState[], updated: TeamTaskState): TeamTaskState[] {
|
||||
return tasks.map((task) => task.id === updated.id ? updated : task);
|
||||
}
|
||||
|
||||
export function persistSingleTaskUpdate(manifest: TeamRunManifest, fallbackTasks: TeamTaskState[], updated: TeamTaskState): TeamTaskState[] {
|
||||
const latest = loadRunManifestById(manifest.cwd, manifest.runId)?.tasks ?? fallbackTasks;
|
||||
const merged = updateTask(latest, updated);
|
||||
saveRunTasks(manifest, merged);
|
||||
return merged;
|
||||
}
|
||||
|
||||
export function checkpointTask(manifest: TeamRunManifest, tasks: TeamTaskState[], task: TeamTaskState, phase: TaskCheckpointState["phase"], childPid?: number): { task: TeamTaskState; tasks: TeamTaskState[] } {
|
||||
const checkpoint: TaskCheckpointState = { phase, updatedAt: new Date().toISOString(), ...(childPid ? { childPid } : task.checkpoint?.childPid ? { childPid: task.checkpoint.childPid } : {}) };
|
||||
const nextTask = { ...task, checkpoint };
|
||||
const nextTasks = persistSingleTaskUpdate(manifest, updateTask(tasks, nextTask), nextTask);
|
||||
upsertCrewAgent(manifest, recordFromTask(manifest, nextTask, "child-process"));
|
||||
return { task: nextTask, tasks: nextTasks };
|
||||
}
|
||||
774
extensions/pi-crew/src/runtime/team-runner.ts
Normal file
774
extensions/pi-crew/src/runtime/team-runner.ts
Normal file
@@ -0,0 +1,774 @@
|
||||
import * as fs from "node:fs";
|
||||
import type { AgentConfig } from "../agents/agent-config.ts";
|
||||
import type { CrewLimitsConfig, CrewRuntimeConfig, CrewReliabilityConfig } from "../config/config.ts";
|
||||
import type { CrewRuntimeCapabilities } from "./runtime-resolver.ts";
|
||||
import { writeArtifact } from "../state/artifact-store.ts";
|
||||
import { appendEvent } from "../state/event-log.ts";
|
||||
import type { TeamConfig } from "../teams/team-config.ts";
|
||||
import type { ArtifactDescriptor, PolicyDecision, TeamRunManifest, TaskAttemptState, TeamTaskState } from "../state/types.ts";
|
||||
import { loadRunManifestById, saveRunManifest, saveRunManifestAsync, saveRunTasksAsync, updateRunStatus } from "../state/state-store.ts";
|
||||
import { aggregateUsage, formatUsage } from "../state/usage.ts";
|
||||
import type { WorkflowConfig, WorkflowStep } from "../workflows/workflow-config.ts";
|
||||
import { evaluateCrewPolicy, summarizePolicyDecisions } from "./policy-engine.ts";
|
||||
import { buildRecoveryLedger } from "./recovery-recipes.ts";
|
||||
import { buildTaskGraphIndex, refreshTaskGraphQueues, taskGraphSnapshot } from "./task-graph-scheduler.ts";
|
||||
import { checkBranchFreshness } from "../worktree/branch-freshness.ts";
|
||||
import { aggregateTaskOutputs } from "./task-output-context.ts";
|
||||
import { saveCrewAgents } from "./crew-agent-records.ts";
|
||||
import { recordsForMaterializedTasks } from "./task-display.ts";
|
||||
import { deliverGroupJoin, resolveGroupJoinMode } from "./group-join.ts";
|
||||
import { runTeamTask } from "./task-runner.ts";
|
||||
import { executeWithRetry, DEFAULT_RETRY_POLICY, type RetryPolicy } from "./retry-executor.ts";
|
||||
import { appendDeadletter } from "./deadletter.ts";
|
||||
import type { MetricRegistry } from "../observability/metric-registry.ts";
|
||||
import { childCorrelation, withCorrelation } from "../observability/correlation.ts";
|
||||
import { resolveBatchConcurrency } from "./concurrency.ts";
|
||||
import { mapConcurrent } from "./parallel-utils.ts";
|
||||
import { permissionForRole } from "./role-permission.ts";
|
||||
import { CrewCancellationError, cancellationReasonFromSignal } from "./cancellation.ts";
|
||||
import { effectivenessPolicyDecision, evaluateRunEffectiveness, formatRunEffectivenessLines } from "./effectiveness.ts";
|
||||
|
||||
export interface ExecuteTeamRunInput {
|
||||
manifest: TeamRunManifest;
|
||||
tasks: TeamTaskState[];
|
||||
team: TeamConfig;
|
||||
workflow: WorkflowConfig;
|
||||
agents: AgentConfig[];
|
||||
executeWorkers: boolean;
|
||||
limits?: CrewLimitsConfig;
|
||||
runtime?: CrewRuntimeCapabilities;
|
||||
runtimeConfig?: CrewRuntimeConfig;
|
||||
parentContext?: string;
|
||||
parentModel?: unknown;
|
||||
modelRegistry?: unknown;
|
||||
modelOverride?: string;
|
||||
signal?: AbortSignal;
|
||||
reliability?: CrewReliabilityConfig;
|
||||
metricRegistry?: MetricRegistry;
|
||||
/** Skill override from the team tool. false disables skill injection for this run. */
|
||||
skillOverride?: string[] | false;
|
||||
/** Optional callback for JSON events from child Pi. Used for overflow recovery tracking. */
|
||||
onJsonEvent?: (taskId: string, runId: string, event: unknown) => void;
|
||||
}
|
||||
|
||||
function findStep(workflow: WorkflowConfig, task: TeamTaskState): WorkflowStep {
|
||||
const step = workflow.steps.find((candidate) => candidate.id === task.stepId);
|
||||
if (!step) throw new Error(`Workflow step '${task.stepId}' not found for task '${task.id}'.`);
|
||||
return step;
|
||||
}
|
||||
|
||||
function findAgent(agents: AgentConfig[], task: TeamTaskState): AgentConfig {
|
||||
const agent = agents.find((candidate) => candidate.name === task.agent);
|
||||
if (!agent) throw new Error(`Agent '${task.agent}' not found for task '${task.id}'.`);
|
||||
return agent;
|
||||
}
|
||||
|
||||
function markBlocked(tasks: TeamTaskState[], reason: string): TeamTaskState[] {
|
||||
return tasks.map((task) => task.status === "queued" ? { ...task, status: "skipped", error: reason, finishedAt: new Date().toISOString(), graph: task.graph ? { ...task.graph, queue: "blocked" } : undefined } : task);
|
||||
}
|
||||
|
||||
function mergeArtifacts(items: ArtifactDescriptor[]): ArtifactDescriptor[] {
|
||||
const byPath = new Map<string, ArtifactDescriptor>();
|
||||
for (const item of items) byPath.set(item.path, item);
|
||||
return [...byPath.values()];
|
||||
}
|
||||
|
||||
function isNonTerminalTaskStatus(status: TeamTaskState["status"]): boolean {
|
||||
return status === "queued" || status === "running" || status === "waiting";
|
||||
}
|
||||
|
||||
function shouldMergeTaskUpdate(current: TeamTaskState, updated: TeamTaskState): boolean {
|
||||
// Parallel workers receive the same input snapshot. A later result may still
|
||||
// contain stale queued/running copies of tasks that another worker already
|
||||
// completed. Never let those stale snapshots regress durable task state.
|
||||
if (!isNonTerminalTaskStatus(current.status) && isNonTerminalTaskStatus(updated.status)) return false;
|
||||
return updated.status !== current.status || updated.finishedAt !== current.finishedAt || updated.startedAt !== current.startedAt || Boolean(updated.resultArtifact) || Boolean(updated.error) || Boolean(updated.modelAttempts?.length) || Boolean(updated.usage) || Boolean(updated.attempts?.length);
|
||||
}
|
||||
|
||||
export function __test__mergeTaskUpdates(base: TeamTaskState[], results: Array<{ tasks: TeamTaskState[] }>): TeamTaskState[] {
|
||||
let merged = base;
|
||||
for (const result of results) {
|
||||
for (const updated of result.tasks) {
|
||||
const current = merged.find((task) => task.id === updated.id);
|
||||
if (!current || !shouldMergeTaskUpdate(current, updated)) continue;
|
||||
merged = merged.map((task) => task.id === updated.id ? updated : task);
|
||||
}
|
||||
}
|
||||
return refreshTaskGraphQueues(merged);
|
||||
}
|
||||
|
||||
interface AdaptivePlanTask {
|
||||
role: string;
|
||||
title?: string;
|
||||
task: string;
|
||||
}
|
||||
|
||||
interface AdaptivePlanPhase {
|
||||
name: string;
|
||||
tasks: AdaptivePlanTask[];
|
||||
}
|
||||
|
||||
interface AdaptivePlan {
|
||||
phases: AdaptivePlanPhase[];
|
||||
}
|
||||
|
||||
const MAX_ADAPTIVE_TASKS = 12;
|
||||
|
||||
function slug(value: string): string {
|
||||
return value.toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 32) || "task";
|
||||
}
|
||||
|
||||
function extractAdaptivePlanJson(text: string): string | undefined {
|
||||
const markerMatch = text.match(/ADAPTIVE_PLAN_JSON_START\s*([\s\S]*?)\s*ADAPTIVE_PLAN_JSON_END/);
|
||||
if (markerMatch?.[1]) return markerMatch[1];
|
||||
const startIndex = text.indexOf("ADAPTIVE_PLAN_JSON_START");
|
||||
if (startIndex >= 0) return text.slice(startIndex + "ADAPTIVE_PLAN_JSON_START".length).trim();
|
||||
const fencedMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/i);
|
||||
return fencedMatch?.[1];
|
||||
}
|
||||
|
||||
export function __test__parseAdaptivePlan(text: string, allowedRoles: string[]): AdaptivePlan | undefined {
|
||||
const raw = extractAdaptivePlanJson(text);
|
||||
if (!raw) return undefined;
|
||||
let parsed: unknown;
|
||||
try { parsed = JSON.parse(raw); } catch { return undefined; }
|
||||
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return undefined;
|
||||
const phasesRaw = Array.isArray((parsed as { phases?: unknown }).phases) ? (parsed as { phases: unknown[] }).phases : Array.isArray((parsed as { tasks?: unknown }).tasks) ? [{ name: "adaptive", tasks: (parsed as { tasks: unknown[] }).tasks }] : undefined;
|
||||
if (!phasesRaw) return undefined;
|
||||
const allowed = new Set(allowedRoles);
|
||||
const phases: AdaptivePlanPhase[] = [];
|
||||
let total = 0;
|
||||
for (const [phaseIndex, phaseRaw] of phasesRaw.entries()) {
|
||||
if (!phaseRaw || typeof phaseRaw !== "object" || Array.isArray(phaseRaw)) return undefined;
|
||||
const phaseObj = phaseRaw as { name?: unknown; tasks?: unknown };
|
||||
if (!Array.isArray(phaseObj.tasks) || phaseObj.tasks.length === 0) return undefined;
|
||||
const tasks: AdaptivePlanTask[] = [];
|
||||
for (const taskRaw of phaseObj.tasks) {
|
||||
if (!taskRaw || typeof taskRaw !== "object" || Array.isArray(taskRaw)) return undefined;
|
||||
const taskObj = taskRaw as { role?: unknown; title?: unknown; task?: unknown };
|
||||
if (typeof taskObj.role !== "string" || !allowed.has(taskObj.role)) return undefined;
|
||||
if (typeof taskObj.task !== "string" || !taskObj.task.trim()) return undefined;
|
||||
if (total >= MAX_ADAPTIVE_TASKS) return undefined;
|
||||
tasks.push({ role: taskObj.role, title: typeof taskObj.title === "string" ? taskObj.title : undefined, task: taskObj.task.trim() });
|
||||
total++;
|
||||
}
|
||||
phases.push({ name: typeof phaseObj.name === "string" && phaseObj.name.trim() ? phaseObj.name.trim() : `phase-${phaseIndex + 1}`, tasks });
|
||||
}
|
||||
return phases.length ? { phases } : undefined;
|
||||
}
|
||||
|
||||
function closeUnbalancedJson(raw: string): string {
|
||||
let result = raw.trim();
|
||||
const stack: string[] = [];
|
||||
let inString = false;
|
||||
let escaped = false;
|
||||
for (const char of result) {
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
if (char === "\\" && inString) {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
if (char === '"') {
|
||||
inString = !inString;
|
||||
continue;
|
||||
}
|
||||
if (inString) continue;
|
||||
if (char === "{") stack.push("}");
|
||||
else if (char === "[") stack.push("]");
|
||||
else if ((char === "}" || char === "]") && stack.at(-1) === char) stack.pop();
|
||||
}
|
||||
while (stack.length) result += stack.pop();
|
||||
return result;
|
||||
}
|
||||
|
||||
function salvageCompletePhaseObjects(raw: string): unknown | undefined {
|
||||
const phasesIndex = raw.indexOf('"phases"');
|
||||
if (phasesIndex < 0) return undefined;
|
||||
const arrayStart = raw.indexOf("[", phasesIndex);
|
||||
if (arrayStart < 0) return undefined;
|
||||
const phases: unknown[] = [];
|
||||
let objectStart = -1;
|
||||
let depth = 0;
|
||||
let inString = false;
|
||||
let escaped = false;
|
||||
for (let index = arrayStart + 1; index < raw.length; index++) {
|
||||
const char = raw[index];
|
||||
if (escaped) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
if (char === "\\" && inString) {
|
||||
escaped = true;
|
||||
continue;
|
||||
}
|
||||
if (char === '"') {
|
||||
inString = !inString;
|
||||
continue;
|
||||
}
|
||||
if (inString) continue;
|
||||
if (char === "{") {
|
||||
if (depth === 0) objectStart = index;
|
||||
depth++;
|
||||
continue;
|
||||
}
|
||||
if (char === "}") {
|
||||
if (depth <= 0) continue;
|
||||
depth--;
|
||||
if (depth === 0 && objectStart >= 0) {
|
||||
try {
|
||||
phases.push(JSON.parse(raw.slice(objectStart, index + 1)));
|
||||
} catch {
|
||||
// Ignore malformed trailing phase objects and keep earlier complete phases.
|
||||
}
|
||||
objectStart = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return phases.length ? { phases } : undefined;
|
||||
}
|
||||
|
||||
function adaptiveRoleAlias(role: string, allowed: Set<string>): string | undefined {
|
||||
if (allowed.has(role)) return role;
|
||||
const normalized = slug(role);
|
||||
const aliases: Record<string, string[]> = {
|
||||
reviewer: ["code-reviewer", "review", "code-review", "critic"],
|
||||
"security-reviewer": ["security", "security-review", "sec-review"],
|
||||
"test-engineer": ["tester", "qa", "test"],
|
||||
executor: ["developer", "implementer", "coder", "engineer"],
|
||||
explorer: ["researcher", "scout"],
|
||||
analyst: ["analysis", "analyzer"],
|
||||
};
|
||||
for (const [target, names] of Object.entries(aliases)) if (allowed.has(target) && names.includes(normalized)) return target;
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export function __test__repairAdaptivePlan(text: string, allowedRoles: string[]): { plan?: AdaptivePlan; repaired: boolean; reason?: string } {
|
||||
const raw = extractAdaptivePlanJson(text);
|
||||
if (!raw) return { repaired: false, reason: "missing-json" };
|
||||
const candidates = [raw, closeUnbalancedJson(raw)];
|
||||
let parsed: unknown;
|
||||
let salvageUsed = false;
|
||||
for (const candidate of candidates) {
|
||||
try {
|
||||
parsed = JSON.parse(candidate);
|
||||
break;
|
||||
} catch {
|
||||
// Try the next repair candidate.
|
||||
}
|
||||
}
|
||||
if (!parsed) {
|
||||
parsed = salvageCompletePhaseObjects(raw);
|
||||
salvageUsed = parsed !== undefined;
|
||||
}
|
||||
if (!parsed || typeof parsed !== "object" || Array.isArray(parsed)) return { repaired: false, reason: "invalid-json" };
|
||||
const phasesRaw = Array.isArray((parsed as { phases?: unknown }).phases) ? (parsed as { phases: unknown[] }).phases : Array.isArray((parsed as { tasks?: unknown }).tasks) ? [{ name: "adaptive", tasks: (parsed as { tasks: unknown[] }).tasks }] : undefined;
|
||||
if (!phasesRaw) return { repaired: false, reason: "missing-phases" };
|
||||
const allowed = new Set(allowedRoles);
|
||||
const phases: AdaptivePlanPhase[] = [];
|
||||
let total = 0;
|
||||
let repaired = salvageUsed || raw !== closeUnbalancedJson(raw);
|
||||
for (const [phaseIndex, phaseRaw] of phasesRaw.entries()) {
|
||||
if (!phaseRaw || typeof phaseRaw !== "object" || Array.isArray(phaseRaw)) continue;
|
||||
const phaseObj = phaseRaw as { name?: unknown; tasks?: unknown };
|
||||
if (!Array.isArray(phaseObj.tasks)) continue;
|
||||
const tasks: AdaptivePlanTask[] = [];
|
||||
for (const taskRaw of phaseObj.tasks) {
|
||||
if (total >= MAX_ADAPTIVE_TASKS) {
|
||||
repaired = true;
|
||||
break;
|
||||
}
|
||||
if (!taskRaw || typeof taskRaw !== "object" || Array.isArray(taskRaw)) {
|
||||
repaired = true;
|
||||
continue;
|
||||
}
|
||||
const taskObj = taskRaw as { role?: unknown; title?: unknown; task?: unknown };
|
||||
const role = typeof taskObj.role === "string" ? adaptiveRoleAlias(taskObj.role, allowed) : undefined;
|
||||
const taskText = typeof taskObj.task === "string" ? taskObj.task.trim() : "";
|
||||
if (!role || !taskText) {
|
||||
repaired = true;
|
||||
continue;
|
||||
}
|
||||
tasks.push({ role, title: typeof taskObj.title === "string" ? taskObj.title : undefined, task: taskText });
|
||||
total++;
|
||||
}
|
||||
if (tasks.length) phases.push({ name: typeof phaseObj.name === "string" && phaseObj.name.trim() ? phaseObj.name.trim() : `phase-${phaseIndex + 1}`, tasks });
|
||||
if (total >= MAX_ADAPTIVE_TASKS) break;
|
||||
}
|
||||
return phases.length ? { plan: { phases }, repaired: true, reason: repaired ? "repaired" : "normalized" } : { repaired: false, reason: "empty-plan" };
|
||||
}
|
||||
|
||||
function reconstructAdaptiveWorkflow(workflow: WorkflowConfig, tasks: TeamTaskState[]): WorkflowConfig {
|
||||
const existing = new Set(workflow.steps.map((step) => step.id));
|
||||
const steps: WorkflowStep[] = [];
|
||||
for (const task of tasks) {
|
||||
if (!task.stepId?.startsWith("adaptive-") || !task.adaptive?.task || existing.has(task.stepId)) continue;
|
||||
steps.push({ id: task.stepId, role: task.role, dependsOn: task.graph?.dependencies ?? task.dependsOn, parallelGroup: `adaptive-${slug(task.adaptive.phase)}`, task: task.adaptive.task });
|
||||
}
|
||||
return steps.length ? { ...workflow, steps: [...workflow.steps, ...steps] } : workflow;
|
||||
}
|
||||
|
||||
function injectAdaptivePlanIfReady(input: { manifest: TeamRunManifest; tasks: TeamTaskState[]; workflow: WorkflowConfig; team: TeamConfig }): { tasks: TeamTaskState[]; workflow: WorkflowConfig; injected: boolean; missingPlan: boolean } {
|
||||
if (input.workflow.name !== "implementation") return { tasks: input.tasks, workflow: input.workflow, injected: false, missingPlan: false };
|
||||
if (input.tasks.some((task) => task.stepId?.startsWith("adaptive-"))) return { tasks: input.tasks, workflow: reconstructAdaptiveWorkflow(input.workflow, input.tasks), injected: false, missingPlan: false };
|
||||
const completedAssess = input.tasks.find((task) => task.stepId === "assess" && task.status === "completed");
|
||||
if (!completedAssess) return { tasks: input.tasks, workflow: input.workflow, injected: false, missingPlan: false };
|
||||
if (!completedAssess.resultArtifact?.path) {
|
||||
appendEvent(input.manifest.eventsPath, { type: "adaptive.plan_missing", runId: input.manifest.runId, taskId: completedAssess.id, message: "Adaptive planner result artifact is missing." });
|
||||
return { tasks: input.tasks, workflow: input.workflow, injected: false, missingPlan: true };
|
||||
}
|
||||
const assessTask = completedAssess;
|
||||
const resultPath = completedAssess.resultArtifact.path;
|
||||
let text = "";
|
||||
try { text = fs.readFileSync(resultPath, "utf-8"); } catch {
|
||||
appendEvent(input.manifest.eventsPath, { type: "adaptive.plan_missing", runId: input.manifest.runId, taskId: assessTask.id, message: "Adaptive planner result artifact could not be read." });
|
||||
return { tasks: input.tasks, workflow: input.workflow, injected: false, missingPlan: true };
|
||||
}
|
||||
const allowedRoles = input.team.roles.map((role) => role.name);
|
||||
let plan = __test__parseAdaptivePlan(text, allowedRoles);
|
||||
if (!plan) {
|
||||
const repair = process.env.PI_CREW_ADAPTIVE_REPAIR === "0" || process.env.PI_TEAMS_ADAPTIVE_REPAIR === "0" ? { repaired: false, reason: "disabled" } : __test__repairAdaptivePlan(text, allowedRoles);
|
||||
if (repair.plan) {
|
||||
plan = repair.plan;
|
||||
const repairArtifact = writeArtifact(input.manifest.artifactsRoot, { kind: "metadata", relativePath: "metadata/adaptive-repair.json", producer: assessTask.id, content: `${JSON.stringify({ reason: repair.reason, phases: repair.plan.phases.map((phase) => ({ name: phase.name, count: phase.tasks.length, roles: phase.tasks.map((task) => task.role) })) }, null, 2)}\n` });
|
||||
saveRunManifest({ ...input.manifest, updatedAt: new Date().toISOString(), artifacts: [...input.manifest.artifacts, repairArtifact] });
|
||||
appendEvent(input.manifest.eventsPath, { type: "adaptive.plan_repaired", runId: input.manifest.runId, taskId: assessTask.id, message: "Adaptive planner output was repaired before dynamic subagents were spawned.", data: { reason: repair.reason } });
|
||||
} else {
|
||||
appendEvent(input.manifest.eventsPath, { type: "adaptive.plan_repair_failed", runId: input.manifest.runId, taskId: assessTask.id, message: "Adaptive planner output could not be repaired.", data: { reason: repair.reason } });
|
||||
appendEvent(input.manifest.eventsPath, { type: "adaptive.plan_missing", runId: input.manifest.runId, taskId: assessTask.id, message: "Adaptive planner did not produce a valid plan; no dynamic subagents were spawned." });
|
||||
return { tasks: input.tasks, workflow: input.workflow, injected: false, missingPlan: true };
|
||||
}
|
||||
}
|
||||
const steps: WorkflowStep[] = [];
|
||||
const tasks: TeamTaskState[] = [];
|
||||
let previousStepIds = ["assess"];
|
||||
let counter = 0;
|
||||
for (const [phaseIndex, phase] of plan.phases.entries()) {
|
||||
const currentStepIds: string[] = [];
|
||||
for (const [taskIndex, planned] of phase.tasks.entries()) {
|
||||
counter++;
|
||||
const stepId = `adaptive-${phaseIndex + 1}-${taskIndex + 1}-${slug(planned.role)}`;
|
||||
const taskId = `adaptive-${String(counter).padStart(2, "0")}-${slug(planned.role)}`;
|
||||
steps.push({ id: stepId, role: planned.role, dependsOn: previousStepIds, parallelGroup: `adaptive-${slug(phase.name)}`, task: planned.task });
|
||||
tasks.push({
|
||||
id: taskId,
|
||||
runId: input.manifest.runId,
|
||||
stepId,
|
||||
role: planned.role,
|
||||
agent: input.team.roles.find((role) => role.name === planned.role)?.agent ?? planned.role,
|
||||
title: planned.title ?? stepId,
|
||||
status: "queued",
|
||||
dependsOn: previousStepIds,
|
||||
cwd: input.manifest.cwd,
|
||||
adaptive: { phase: phase.name, task: planned.task },
|
||||
graph: { taskId, dependencies: previousStepIds, children: [], queue: "blocked" },
|
||||
});
|
||||
currentStepIds.push(stepId);
|
||||
}
|
||||
previousStepIds = currentStepIds;
|
||||
}
|
||||
const dependencyTaskIdByStep = new Map<string, string>([["assess", assessTask.id], ...tasks.map((task) => [task.stepId ?? task.id, task.id] as const)]);
|
||||
const withGraph = tasks.map((task) => ({
|
||||
...task,
|
||||
dependsOn: task.dependsOn.map((dep) => dependencyTaskIdByStep.get(dep) ?? dep),
|
||||
graph: task.graph ? { ...task.graph, dependencies: task.dependsOn.map((dep) => dependencyTaskIdByStep.get(dep) ?? dep), queue: "blocked" as const } : task.graph,
|
||||
}));
|
||||
const allTasks = refreshTaskGraphQueues([...input.tasks, ...withGraph]);
|
||||
appendEvent(input.manifest.eventsPath, { type: "adaptive.plan_injected", runId: input.manifest.runId, taskId: assessTask.id, message: `Injected ${withGraph.length} adaptive subagent task(s) across ${plan.phases.length} phase(s).`, data: { phases: plan.phases.map((phase) => ({ name: phase.name, count: phase.tasks.length, roles: phase.tasks.map((task) => task.role) })) } });
|
||||
return { tasks: allTasks, workflow: { ...input.workflow, steps: [...input.workflow.steps, ...steps] }, injected: true, missingPlan: false };
|
||||
}
|
||||
|
||||
function formatTaskProgress(task: TeamTaskState): string {
|
||||
return `- ${task.id}: ${task.status} (${task.role} -> ${task.agent})${task.taskPacket ? ` scope=${task.taskPacket.scope}` : ""}${task.verification ? ` green=${task.verification.observedGreenLevel}/${task.verification.requiredGreenLevel}` : ""}${task.error ? ` - ${task.error}` : ""}`;
|
||||
}
|
||||
|
||||
function runEffectivenessLines(manifest: TeamRunManifest, tasks: TeamTaskState[], executeWorkers: boolean, runtimeConfig?: CrewRuntimeConfig): string[] {
|
||||
return formatRunEffectivenessLines(evaluateRunEffectiveness({ manifest, tasks, executeWorkers, runtimeConfig }));
|
||||
}
|
||||
|
||||
function writeProgress(manifest: TeamRunManifest, tasks: TeamTaskState[], producer: string, executeWorkers = true, runtimeConfig?: CrewRuntimeConfig): TeamRunManifest {
|
||||
const counts = new Map<string, number>();
|
||||
for (const task of tasks) counts.set(task.status, (counts.get(task.status) ?? 0) + 1);
|
||||
const queue = taskGraphSnapshot(tasks);
|
||||
const progress = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "progress",
|
||||
relativePath: "progress.md",
|
||||
producer,
|
||||
content: [
|
||||
`# pi-crew progress ${manifest.runId}`,
|
||||
"",
|
||||
`Status: ${manifest.status}`,
|
||||
`Team: ${manifest.team}`,
|
||||
`Workflow: ${manifest.workflow ?? "(none)"}`,
|
||||
`Updated: ${new Date().toISOString()}`,
|
||||
`Task counts: ${[...counts.entries()].map(([status, count]) => `${status}=${count}`).join(", ") || "none"}`,
|
||||
`Queue: ready=${queue.ready.length}, blocked=${queue.blocked.length}, running=${queue.running.length}, done=${queue.done.length}, failed=${queue.failed.length}, cancelled=${queue.cancelled.length}`,
|
||||
"",
|
||||
"## Tasks",
|
||||
...tasks.map(formatTaskProgress),
|
||||
"",
|
||||
"## Effectiveness",
|
||||
...runEffectivenessLines(manifest, tasks, executeWorkers, runtimeConfig),
|
||||
"",
|
||||
].join("\n"),
|
||||
});
|
||||
return { ...manifest, updatedAt: new Date().toISOString(), artifacts: [...manifest.artifacts.filter((artifact) => !(artifact.kind === "progress" && artifact.path === progress.path)), progress] };
|
||||
}
|
||||
|
||||
function applyPolicy(manifest: TeamRunManifest, tasks: TeamTaskState[], limits?: CrewLimitsConfig): TeamRunManifest {
|
||||
const branchFreshness = checkBranchFreshness(manifest.cwd);
|
||||
const branchArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: "metadata/branch-freshness.json",
|
||||
producer: "branch-freshness",
|
||||
content: `${JSON.stringify(branchFreshness, null, 2)}\n`,
|
||||
});
|
||||
let decisions: PolicyDecision[] = evaluateCrewPolicy({ manifest, tasks, limits });
|
||||
if (branchFreshness.status === "stale" || branchFreshness.status === "diverged") {
|
||||
const branchDecision: PolicyDecision = {
|
||||
action: "notify",
|
||||
reason: "branch_stale",
|
||||
message: branchFreshness.message,
|
||||
createdAt: new Date().toISOString(),
|
||||
};
|
||||
decisions = [...decisions, branchDecision];
|
||||
appendEvent(manifest.eventsPath, { type: "branch.stale", runId: manifest.runId, message: branchFreshness.message, data: { branchFreshness } });
|
||||
}
|
||||
const policyArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: "policy-decisions.json",
|
||||
producer: "policy-engine",
|
||||
content: `${JSON.stringify(decisions, null, 2)}\n`,
|
||||
});
|
||||
const recoveryLedger = buildRecoveryLedger(decisions);
|
||||
const recoveryArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "metadata",
|
||||
relativePath: "recovery-ledger.json",
|
||||
producer: "recovery-engine",
|
||||
content: `${JSON.stringify(recoveryLedger, null, 2)}\n`,
|
||||
});
|
||||
for (const item of decisions) appendEvent(manifest.eventsPath, { type: item.action === "escalate" ? "policy.escalated" : "policy.action", runId: manifest.runId, taskId: item.taskId, message: item.message, data: { action: item.action, reason: item.reason } });
|
||||
for (const item of recoveryLedger.entries) appendEvent(manifest.eventsPath, { type: item.state === "escalation_required" ? "recovery.escalated" : "recovery.attempted", runId: manifest.runId, taskId: item.taskId, message: item.message, data: { scenario: item.scenario, steps: item.steps, attempt: item.attempt, state: item.state } });
|
||||
return { ...manifest, updatedAt: new Date().toISOString(), policyDecisions: decisions, artifacts: [...manifest.artifacts.filter((artifact) => !(artifact.kind === "metadata" && (artifact.path.endsWith("policy-decisions.json") || artifact.path.endsWith("recovery-ledger.json") || artifact.path.endsWith("branch-freshness.json")))), branchArtifact, policyArtifact, recoveryArtifact] };
|
||||
}
|
||||
|
||||
function retryPolicyFromConfig(config: CrewReliabilityConfig | undefined): RetryPolicy {
|
||||
return { ...DEFAULT_RETRY_POLICY, ...(config?.retryPolicy ?? {}) };
|
||||
}
|
||||
|
||||
function failedTaskFrom(result: { tasks: TeamTaskState[] }, taskId: string): TeamTaskState | undefined {
|
||||
return result.tasks.find((item) => item.id === taskId && item.status === "failed");
|
||||
}
|
||||
|
||||
function requiresPlanApproval(workflow: WorkflowConfig, runtimeConfig: CrewRuntimeConfig | undefined): boolean {
|
||||
return workflow.name === "implementation" && runtimeConfig?.requirePlanApproval === true;
|
||||
}
|
||||
|
||||
function isPlanApprovalPending(manifest: TeamRunManifest): boolean {
|
||||
return manifest.planApproval?.required === true && manifest.planApproval.status === "pending";
|
||||
}
|
||||
|
||||
function isMutatingTask(task: TeamTaskState): boolean {
|
||||
return permissionForRole(task.role) !== "read_only";
|
||||
}
|
||||
|
||||
function ensurePlanApprovalRequested(manifest: TeamRunManifest, tasks: TeamTaskState[]): TeamRunManifest {
|
||||
if (manifest.planApproval) return manifest;
|
||||
const assessTask = tasks.find((task) => task.stepId === "assess" && task.status === "completed");
|
||||
const now = new Date().toISOString();
|
||||
const updated: TeamRunManifest = {
|
||||
...manifest,
|
||||
updatedAt: now,
|
||||
planApproval: {
|
||||
required: true,
|
||||
status: "pending",
|
||||
requestedAt: now,
|
||||
updatedAt: now,
|
||||
planTaskId: assessTask?.id,
|
||||
planArtifactPath: assessTask?.resultArtifact?.path,
|
||||
},
|
||||
};
|
||||
saveRunManifest(updated);
|
||||
appendEvent(updated.eventsPath, { type: "plan.approval_required", runId: updated.runId, taskId: assessTask?.id, message: "Adaptive implementation plan requires explicit approval before mutating tasks run.", data: { planArtifactPath: assessTask?.resultArtifact?.path } });
|
||||
return updated;
|
||||
}
|
||||
|
||||
function cancelPlanTasks(tasks: TeamTaskState[], reason: string): TeamTaskState[] {
|
||||
return tasks.map((task) => task.status === "queued" || task.status === "running" || task.status === "waiting" ? { ...task, status: "cancelled", finishedAt: new Date().toISOString(), error: reason, graph: task.graph ? { ...task.graph, queue: "done" } : undefined } : task);
|
||||
}
|
||||
|
||||
function hasPendingMutatingAdaptiveTask(tasks: TeamTaskState[]): boolean {
|
||||
return tasks.some((task) => task.status === "queued" && task.adaptive && isMutatingTask(task));
|
||||
}
|
||||
|
||||
export async function executeTeamRun(input: ExecuteTeamRunInput): Promise<{ manifest: TeamRunManifest; tasks: TeamTaskState[] }> {
|
||||
let workflow = input.workflow;
|
||||
let manifest = updateRunStatus(input.manifest, "running", input.executeWorkers ? "Executing team workflow." : "Creating workflow prompts and placeholder results.");
|
||||
let tasks = refreshTaskGraphQueues(input.tasks);
|
||||
let queueIndex = buildTaskGraphIndex(tasks);
|
||||
const canInjectAdaptivePlan = workflow.name === "implementation";
|
||||
let adaptivePlanInjected = false;
|
||||
let adaptivePlanMissing = false;
|
||||
const attemptAdaptivePlan = () => {
|
||||
if (!canInjectAdaptivePlan || adaptivePlanInjected || adaptivePlanMissing) return { injected: false, missing: false };
|
||||
const adaptivePlan = injectAdaptivePlanIfReady({ manifest, tasks, workflow, team: input.team });
|
||||
adaptivePlanInjected = adaptivePlanInjected || adaptivePlan.injected;
|
||||
adaptivePlanMissing = adaptivePlan.missingPlan;
|
||||
workflow = adaptivePlan.workflow;
|
||||
if (adaptivePlan.injected) tasks = adaptivePlan.tasks;
|
||||
return { injected: adaptivePlan.injected, missing: adaptivePlan.missingPlan };
|
||||
};
|
||||
const initialAdaptive = attemptAdaptivePlan();
|
||||
if (initialAdaptive.missing) {
|
||||
tasks = markBlocked(tasks, "Adaptive planner did not produce a valid subagent plan.");
|
||||
await saveRunTasksAsync(manifest, tasks);
|
||||
manifest = updateRunStatus(manifest, "blocked", "Adaptive planner did not produce a valid subagent plan.");
|
||||
return { manifest, tasks };
|
||||
}
|
||||
if (initialAdaptive.injected) {
|
||||
manifest = requiresPlanApproval(workflow, input.runtimeConfig) ? ensurePlanApprovalRequested(manifest, tasks) : manifest;
|
||||
queueIndex = buildTaskGraphIndex(tasks);
|
||||
} else if (requiresPlanApproval(workflow, input.runtimeConfig) && hasPendingMutatingAdaptiveTask(tasks)) {
|
||||
manifest = ensurePlanApprovalRequested(manifest, tasks);
|
||||
}
|
||||
if (manifest.planApproval?.status === "cancelled") {
|
||||
tasks = cancelPlanTasks(tasks, "Plan approval was cancelled.");
|
||||
await saveRunTasksAsync(manifest, tasks);
|
||||
manifest = updateRunStatus(manifest, "cancelled", "Plan approval was cancelled.");
|
||||
return { manifest, tasks };
|
||||
}
|
||||
manifest = writeProgress(manifest, tasks, "team-runner", input.executeWorkers, input.runtimeConfig);
|
||||
await saveRunManifestAsync(manifest);
|
||||
const runtimeKind = input.runtime?.kind ?? (input.executeWorkers ? "child-process" : "scaffold");
|
||||
saveCrewAgents(manifest, recordsForMaterializedTasks(manifest, tasks, runtimeKind));
|
||||
|
||||
while (tasks.some((task) => task.status === "queued")) {
|
||||
if (input.signal?.aborted) {
|
||||
const cancelReason = cancellationReasonFromSignal(input.signal);
|
||||
const message = `${cancelReason.message} (${cancelReason.code})`;
|
||||
const cancelledTaskIds: string[] = [];
|
||||
tasks = tasks.map((task) => {
|
||||
if (task.status !== "queued" && task.status !== "running" && task.status !== "waiting") return task;
|
||||
cancelledTaskIds.push(task.id);
|
||||
return { ...task, status: "cancelled", finishedAt: new Date().toISOString(), error: message };
|
||||
});
|
||||
await saveRunTasksAsync(manifest, tasks);
|
||||
for (const taskId of cancelledTaskIds) appendEvent(manifest.eventsPath, { type: "task.cancelled", runId: manifest.runId, taskId, message, data: { reason: cancelReason.code } });
|
||||
manifest = updateRunStatus(manifest, "cancelled", message, { data: { reason: cancelReason.code, cancelledTaskIds } });
|
||||
return { manifest, tasks };
|
||||
}
|
||||
|
||||
const failed = tasks.find((task) => task.status === "failed");
|
||||
if (failed) {
|
||||
tasks = markBlocked(tasks, `Blocked by failed task '${failed.id}'.`);
|
||||
await saveRunTasksAsync(manifest, tasks);
|
||||
saveCrewAgents(manifest, recordsForMaterializedTasks(manifest, tasks, runtimeKind));
|
||||
manifest = updateRunStatus(manifest, "failed", `Failed at task '${failed.id}'.`);
|
||||
return { manifest, tasks };
|
||||
}
|
||||
|
||||
const snapshot = taskGraphSnapshot(tasks, queueIndex);
|
||||
const readyRoles = snapshot.ready.map((taskId) => tasks.find((task) => task.id === taskId)?.role).filter((role): role is string => Boolean(role));
|
||||
const concurrency = resolveBatchConcurrency({ workflowName: workflow.name, workflowMaxConcurrency: workflow.maxConcurrency, teamMaxConcurrency: input.team.maxConcurrency, limitMaxConcurrentWorkers: input.limits?.maxConcurrentWorkers, allowUnboundedConcurrency: input.limits?.allowUnboundedConcurrency, readyCount: snapshot.ready.length, workspaceMode: manifest.workspaceMode, readyRoles });
|
||||
if (concurrency.reason.includes(";unbounded:")) {
|
||||
appendEvent(manifest.eventsPath, { type: "limits.unbounded", runId: manifest.runId, message: "Unbounded worker concurrency was explicitly enabled for this run.", data: { concurrencyReason: concurrency.reason, maxConcurrent: concurrency.maxConcurrent } });
|
||||
}
|
||||
const approvalPending = isPlanApprovalPending(manifest);
|
||||
const readyIds = approvalPending ? snapshot.ready : snapshot.ready.slice(0, concurrency.selectedCount);
|
||||
const candidateBatch = readyIds.map((id) => tasks.find((task) => task.id === id)).filter((task): task is TeamTaskState => Boolean(task));
|
||||
const readyBatch = approvalPending ? candidateBatch.filter((task) => !isMutatingTask(task)).slice(0, concurrency.selectedCount) : candidateBatch;
|
||||
if (readyBatch.length === 0) {
|
||||
if (approvalPending && candidateBatch.some(isMutatingTask)) {
|
||||
await saveRunTasksAsync(manifest, tasks);
|
||||
saveCrewAgents(manifest, recordsForMaterializedTasks(manifest, tasks, runtimeKind));
|
||||
manifest = updateRunStatus(manifest, "blocked", "Plan approval required before mutating implementation tasks run.");
|
||||
return { manifest, tasks };
|
||||
}
|
||||
tasks = markBlocked(tasks, "No ready queued task; dependency graph may be invalid.");
|
||||
await saveRunTasksAsync(manifest, tasks);
|
||||
saveCrewAgents(manifest, recordsForMaterializedTasks(manifest, tasks, runtimeKind));
|
||||
manifest = updateRunStatus(manifest, "blocked", "No ready queued task.");
|
||||
return { manifest, tasks };
|
||||
}
|
||||
|
||||
appendEvent(manifest.eventsPath, { type: "task.progress", runId: manifest.runId, message: `Starting ready batch with ${readyBatch.length} task(s).`, data: { taskIds: readyBatch.map((task) => task.id), readyCount: snapshot.ready.length, blockedCount: snapshot.blocked.length, runningCount: snapshot.running.length, doneCount: snapshot.done.length, selectedCount: readyBatch.length, maxConcurrent: concurrency.maxConcurrent, defaultConcurrency: concurrency.defaultConcurrency, concurrencyReason: approvalPending ? `${concurrency.reason};plan-approval-read-only` : concurrency.reason } });
|
||||
const results = await mapConcurrent(
|
||||
readyBatch,
|
||||
concurrency.selectedCount,
|
||||
async (task) => {
|
||||
const step = findStep(workflow, task);
|
||||
const agent = findAgent(input.agents, task);
|
||||
const teamRole = input.team.roles.find((role) => role.name === task.role);
|
||||
const baseInput = { manifest, tasks, task, step, agent, signal: input.signal, executeWorkers: input.executeWorkers, runtimeKind: input.runtime?.kind, runtimeConfig: input.runtimeConfig, parentContext: input.parentContext, parentModel: input.parentModel, modelRegistry: input.modelRegistry, modelOverride: input.modelOverride, teamRoleModel: teamRole?.model, teamRoleSkills: teamRole?.skills, skillOverride: input.skillOverride, limits: input.limits, onJsonEvent: input.onJsonEvent };
|
||||
if (input.reliability?.autoRetry !== true) return withCorrelation(childCorrelation(manifest.runId, task.id), () => runTeamTask(baseInput));
|
||||
let lastFailed: { manifest: TeamRunManifest; tasks: TeamTaskState[] } | undefined;
|
||||
const attemptsSoFar: TaskAttemptState[] = [...(task.attempts ?? [])];
|
||||
const policy = retryPolicyFromConfig(input.reliability);
|
||||
try {
|
||||
return await executeWithRetry(async (attempt, info) => {
|
||||
const startedAt = new Date().toISOString();
|
||||
const inFlightAttempts: TaskAttemptState[] = [...attemptsSoFar, { attemptId: info.attemptId, startedAt }];
|
||||
input.metricRegistry?.counter("crew.task.retry_attempt_total", "Retry attempts by run and task").inc({ runId: manifest.runId, taskId: task.id });
|
||||
const fresh = loadRunManifestById(manifest.cwd, manifest.runId);
|
||||
const freshManifest = fresh?.manifest ?? manifest;
|
||||
const freshTasks = fresh?.tasks ?? tasks;
|
||||
const freshTask = freshTasks.find((item) => item.id === task.id) ?? task;
|
||||
if (freshTask.status !== "queued" && freshTask.status !== "running") return { manifest: freshManifest, tasks: freshTasks };
|
||||
const taskWithAttempt: TeamTaskState = { ...freshTask, attempts: inFlightAttempts };
|
||||
const result = await withCorrelation(childCorrelation(freshManifest.runId, task.id), () => runTeamTask({ ...baseInput, manifest: freshManifest, tasks: freshTasks, task: taskWithAttempt }));
|
||||
const failed = failedTaskFrom(result, task.id);
|
||||
const endedAt = new Date().toISOString();
|
||||
const finishedAttempt: TaskAttemptState = { attemptId: info.attemptId, startedAt, endedAt, ...(failed?.error ? { error: failed.error } : {}) };
|
||||
attemptsSoFar.push(finishedAttempt);
|
||||
const withAttempt = result.tasks.map((item) => item.id === task.id ? { ...item, attempts: [...attemptsSoFar] } : item);
|
||||
const enriched = { manifest: result.manifest, tasks: withAttempt };
|
||||
if (failed) {
|
||||
lastFailed = enriched;
|
||||
throw new Error(failed.error ?? `Task ${task.id} failed.`);
|
||||
}
|
||||
input.metricRegistry?.histogram("crew.task.retry_count", "Retries per task", [0, 1, 2, 3, 5, 10]).observe({ runId: manifest.runId, team: input.team.name }, Math.max(0, attempt - 1));
|
||||
return enriched;
|
||||
}, policy, {
|
||||
signal: input.signal,
|
||||
attemptId: (attempt) => `${manifest.runId}:${task.id}:attempt-${attempt}`,
|
||||
onAttemptFailed: (attempt, error, delayMs, info) => {
|
||||
appendEvent(manifest.eventsPath, { type: "crew.task.retry_attempt", runId: manifest.runId, taskId: task.id, message: error.message, data: { attempt, attemptId: info.attemptId, delayMs } });
|
||||
input.metricRegistry?.histogram("crew.task.retry_delay_ms", "Retry backoff delay, milliseconds").observe({ runId: manifest.runId, taskId: task.id }, delayMs);
|
||||
},
|
||||
onRetryGivenUp: (attempts, error, info) => {
|
||||
appendDeadletter(manifest, { runId: manifest.runId, taskId: task.id, reason: "max-retries", attempts, attemptId: info.attemptId, lastError: error.message, timestamp: new Date().toISOString() });
|
||||
input.metricRegistry?.counter("crew.task.deadletter_total", "Deadletter triggers by reason").inc({ reason: "max-retries" });
|
||||
input.metricRegistry?.histogram("crew.task.retry_count", "Retries per task", [0, 1, 2, 3, 5, 10]).observe({ runId: manifest.runId, team: input.team.name }, Math.max(0, attempts - 1));
|
||||
},
|
||||
});
|
||||
} catch (retryError) {
|
||||
if (retryError instanceof CrewCancellationError || input.signal?.aborted) {
|
||||
const reason = retryError instanceof CrewCancellationError ? retryError.reason : cancellationReasonFromSignal(input.signal);
|
||||
const fresh = loadRunManifestById(manifest.cwd, manifest.runId);
|
||||
const freshManifest = fresh?.manifest ?? manifest;
|
||||
const freshTasks = fresh?.tasks ?? tasks;
|
||||
const cancelledTasks = freshTasks.map((item) => item.id === task.id && (item.status === "queued" || item.status === "running") ? { ...item, status: "cancelled" as const, finishedAt: new Date().toISOString(), error: `${reason.message} (${reason.code})` } : item);
|
||||
appendEvent(freshManifest.eventsPath, { type: "task.cancelled", runId: freshManifest.runId, taskId: task.id, message: reason.message, data: { reason, phase: "retry" } });
|
||||
return { manifest: updateRunStatus(freshManifest, "cancelled", reason.message), tasks: cancelledTasks };
|
||||
}
|
||||
if (lastFailed) return lastFailed;
|
||||
const fresh = loadRunManifestById(manifest.cwd, manifest.runId);
|
||||
const freshManifest = fresh?.manifest ?? manifest;
|
||||
const freshTasks = fresh?.tasks ?? tasks;
|
||||
const freshTask = freshTasks.find((item) => item.id === task.id) ?? task;
|
||||
if (freshTask.status !== "queued" && freshTask.status !== "running") return { manifest: freshManifest, tasks: freshTasks };
|
||||
return withCorrelation(childCorrelation(freshManifest.runId, task.id), () => runTeamTask({ ...baseInput, manifest: freshManifest, tasks: freshTasks, task: freshTask }));
|
||||
}
|
||||
},
|
||||
);
|
||||
manifest = { ...results.at(-1)!.manifest, artifacts: mergeArtifacts([manifest.artifacts, ...results.map((item) => item.manifest.artifacts)].flat()) };
|
||||
tasks = __test__mergeTaskUpdates(tasks, results);
|
||||
const cancelledResult = results.find((item) => item.manifest.status === "cancelled");
|
||||
if (cancelledResult || input.signal?.aborted) {
|
||||
const reason = input.signal?.aborted ? cancellationReasonFromSignal(input.signal) : undefined;
|
||||
const message = reason?.message ?? cancelledResult?.manifest.summary ?? "Run cancelled during task execution.";
|
||||
manifest = { ...manifest, status: "running" };
|
||||
manifest = updateRunStatus(manifest, "cancelled", message);
|
||||
await saveRunTasksAsync(manifest, tasks);
|
||||
saveCrewAgents(manifest, recordsForMaterializedTasks(manifest, tasks, runtimeKind));
|
||||
await saveRunManifestAsync(manifest);
|
||||
appendEvent(manifest.eventsPath, { type: "run.cancelled", runId: manifest.runId, message, data: { reason, phase: "task-batch", cancelledResultRunId: cancelledResult?.manifest.runId } });
|
||||
return { manifest, tasks };
|
||||
}
|
||||
queueIndex = buildTaskGraphIndex(tasks);
|
||||
const injectedAfterBatch = attemptAdaptivePlan();
|
||||
if (injectedAfterBatch.missing) {
|
||||
tasks = markBlocked(tasks, "Adaptive planner did not produce a valid subagent plan.");
|
||||
await saveRunTasksAsync(manifest, tasks);
|
||||
saveCrewAgents(manifest, recordsForMaterializedTasks(manifest, tasks, runtimeKind));
|
||||
manifest = updateRunStatus(manifest, "blocked", "Adaptive planner did not produce a valid subagent plan.");
|
||||
return { manifest, tasks };
|
||||
}
|
||||
if (injectedAfterBatch.injected) {
|
||||
manifest = requiresPlanApproval(workflow, input.runtimeConfig) ? ensurePlanApprovalRequested(manifest, tasks) : manifest;
|
||||
queueIndex = buildTaskGraphIndex(tasks);
|
||||
} else if (requiresPlanApproval(workflow, input.runtimeConfig) && hasPendingMutatingAdaptiveTask(tasks)) {
|
||||
manifest = ensurePlanApprovalRequested(manifest, tasks);
|
||||
}
|
||||
if (manifest.planApproval?.status === "cancelled") {
|
||||
tasks = cancelPlanTasks(tasks, "Plan approval was cancelled.");
|
||||
await saveRunTasksAsync(manifest, tasks);
|
||||
saveCrewAgents(manifest, recordsForMaterializedTasks(manifest, tasks, runtimeKind));
|
||||
manifest = updateRunStatus(manifest, "cancelled", "Plan approval was cancelled.");
|
||||
return { manifest, tasks };
|
||||
}
|
||||
await saveRunTasksAsync(manifest, tasks);
|
||||
saveCrewAgents(manifest, recordsForMaterializedTasks(manifest, tasks, runtimeKind));
|
||||
const completedBatch = readyBatch.map((task) => tasks.find((item) => item.id === task.id) ?? task);
|
||||
const batchArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "summary",
|
||||
relativePath: `batches/${readyBatch.map((task) => task.id).join("+")}.md`,
|
||||
producer: "team-runner",
|
||||
content: aggregateTaskOutputs(completedBatch, manifest),
|
||||
});
|
||||
const groupDelivery = deliverGroupJoin({ manifest, mode: resolveGroupJoinMode(input.runtimeConfig), batch: readyBatch, allTasks: tasks });
|
||||
manifest = { ...manifest, artifacts: mergeArtifacts([...manifest.artifacts, batchArtifact, ...(groupDelivery?.artifact ? [groupDelivery.artifact] : [])]) };
|
||||
manifest = writeProgress(manifest, tasks, "team-runner", input.executeWorkers, input.runtimeConfig);
|
||||
await saveRunManifestAsync(manifest);
|
||||
}
|
||||
|
||||
const failed = tasks.find((task) => task.status === "failed");
|
||||
const waiting = tasks.find((task) => task.status === "waiting");
|
||||
const running = tasks.find((task) => task.status === "running");
|
||||
manifest = applyPolicy(manifest, tasks, input.limits);
|
||||
const effectiveness = evaluateRunEffectiveness({ manifest, tasks, executeWorkers: input.executeWorkers, runtimeConfig: input.runtimeConfig });
|
||||
const effectivenessDecision = effectivenessPolicyDecision(effectiveness);
|
||||
if (effectivenessDecision) {
|
||||
manifest = { ...manifest, policyDecisions: [...(manifest.policyDecisions ?? []), effectivenessDecision], updatedAt: new Date().toISOString() };
|
||||
appendEvent(manifest.eventsPath, { type: "run.effectiveness", runId: manifest.runId, message: effectivenessDecision.message, data: { effectiveness, policyDecision: effectivenessDecision } });
|
||||
}
|
||||
const blockingDecision = manifest.policyDecisions?.find((item) => item.action === "block" || item.action === "escalate");
|
||||
if (failed) {
|
||||
manifest = updateRunStatus(manifest, "failed", `Failed at task '${failed.id}'.`);
|
||||
} else if (waiting) {
|
||||
manifest = updateRunStatus(manifest, "blocked", `Waiting for response to task '${waiting.id}'.`);
|
||||
} else if (running) {
|
||||
manifest = updateRunStatus(manifest, "blocked", `Task '${running.id}' is still running.`);
|
||||
} else if (effectiveness.severity === "failed") {
|
||||
manifest = updateRunStatus(manifest, "failed", effectivenessDecision?.message ?? "Run effectiveness guard failed.");
|
||||
} else if (effectiveness.severity === "blocked") {
|
||||
manifest = updateRunStatus(manifest, "blocked", effectivenessDecision?.message ?? "Run effectiveness guard blocked completion.");
|
||||
} else if (blockingDecision) {
|
||||
manifest = updateRunStatus(manifest, "blocked", blockingDecision.message);
|
||||
} else {
|
||||
manifest = updateRunStatus(manifest, "completed", input.executeWorkers ? "Team workflow completed." : "Team workflow scaffold completed without launching child workers.");
|
||||
}
|
||||
manifest = writeProgress(manifest, tasks, "team-runner", input.executeWorkers, input.runtimeConfig);
|
||||
await saveRunManifestAsync(manifest);
|
||||
const usage = aggregateUsage(tasks);
|
||||
const summaryArtifact = writeArtifact(manifest.artifactsRoot, {
|
||||
kind: "summary",
|
||||
relativePath: "summary.md",
|
||||
producer: "team-runner",
|
||||
content: [
|
||||
`# pi-crew run ${manifest.runId}`,
|
||||
"",
|
||||
`Status: ${manifest.status}`,
|
||||
`Team: ${manifest.team}`,
|
||||
`Workflow: ${manifest.workflow ?? "(none)"}`,
|
||||
`Goal: ${manifest.goal}`,
|
||||
`Usage: ${formatUsage(usage)}`,
|
||||
"",
|
||||
"## Tasks",
|
||||
...tasks.map(formatTaskProgress),
|
||||
"",
|
||||
"## Effectiveness",
|
||||
...runEffectivenessLines(manifest, tasks, input.executeWorkers, input.runtimeConfig),
|
||||
"",
|
||||
"## Policy decisions",
|
||||
...(manifest.policyDecisions?.length ? summarizePolicyDecisions(manifest.policyDecisions) : ["- (none)"]),
|
||||
"",
|
||||
].join("\n"),
|
||||
});
|
||||
manifest = { ...manifest, updatedAt: new Date().toISOString(), artifacts: [...manifest.artifacts, summaryArtifact] };
|
||||
await saveRunManifestAsync(manifest);
|
||||
await saveRunTasksAsync(manifest, tasks);
|
||||
return { manifest, tasks };
|
||||
}
|
||||
21
extensions/pi-crew/src/runtime/worker-heartbeat.ts
Normal file
21
extensions/pi-crew/src/runtime/worker-heartbeat.ts
Normal file
@@ -0,0 +1,21 @@
|
||||
export interface WorkerHeartbeatState {
|
||||
workerId: string;
|
||||
pid?: number;
|
||||
lastSeenAt: string;
|
||||
lastStdoutAt?: string;
|
||||
lastEventAt?: string;
|
||||
turnCount?: number;
|
||||
alive?: boolean;
|
||||
}
|
||||
|
||||
export function createWorkerHeartbeat(workerId: string, pid?: number, now = new Date()): WorkerHeartbeatState {
|
||||
return { workerId, pid, lastSeenAt: now.toISOString(), alive: true };
|
||||
}
|
||||
|
||||
export function touchWorkerHeartbeat(heartbeat: WorkerHeartbeatState, updates: Partial<Omit<WorkerHeartbeatState, "workerId">> = {}, now = new Date()): WorkerHeartbeatState {
|
||||
return { ...heartbeat, ...updates, lastSeenAt: now.toISOString() };
|
||||
}
|
||||
|
||||
export function isWorkerHeartbeatStale(heartbeat: WorkerHeartbeatState, staleMs: number, now = new Date()): boolean {
|
||||
return now.getTime() - Date.parse(heartbeat.lastSeenAt) > staleMs;
|
||||
}
|
||||
57
extensions/pi-crew/src/runtime/worker-startup.ts
Normal file
57
extensions/pi-crew/src/runtime/worker-startup.ts
Normal file
@@ -0,0 +1,57 @@
|
||||
export type WorkerLifecycleState = "spawning" | "trust_required" | "ready_for_prompt" | "running" | "finished" | "failed";
|
||||
export type StartupFailureClassification = "trust_required" | "prompt_misdelivery" | "prompt_acceptance_timeout" | "transport_dead" | "worker_crashed" | "unknown";
|
||||
|
||||
export interface WorkerStartupEvidence {
|
||||
lastLifecycleState: WorkerLifecycleState;
|
||||
command: string;
|
||||
promptSentAt?: string;
|
||||
promptAccepted: boolean;
|
||||
trustPromptDetected: boolean;
|
||||
transportHealthy: boolean;
|
||||
childProcessAlive: boolean;
|
||||
elapsedMs: number;
|
||||
classification: StartupFailureClassification;
|
||||
stderrPreview?: string;
|
||||
}
|
||||
|
||||
export function detectTrustPrompt(text: string): boolean {
|
||||
const lowered = text.toLowerCase();
|
||||
return lowered.includes("do you trust") || lowered.includes("trust this") || lowered.includes("untrusted") || lowered.includes("workspace trust") || lowered.includes("allow this folder");
|
||||
}
|
||||
|
||||
export function classifyStartupFailure(evidence: Omit<WorkerStartupEvidence, "classification">): StartupFailureClassification {
|
||||
if (!evidence.transportHealthy) return "transport_dead";
|
||||
if (evidence.trustPromptDetected || evidence.lastLifecycleState === "trust_required") return "trust_required";
|
||||
if (evidence.promptSentAt && !evidence.promptAccepted && evidence.childProcessAlive) return "prompt_acceptance_timeout";
|
||||
if (evidence.promptSentAt && !evidence.promptAccepted && !evidence.childProcessAlive) return "worker_crashed";
|
||||
if (evidence.stderrPreview?.toLowerCase().includes("command not found") || evidence.stderrPreview?.toLowerCase().includes("not recognized")) return "prompt_misdelivery";
|
||||
if (!evidence.childProcessAlive && evidence.lastLifecycleState !== "finished") return "worker_crashed";
|
||||
return "unknown";
|
||||
}
|
||||
|
||||
export function createStartupEvidence(input: {
|
||||
command: string;
|
||||
startedAt: Date;
|
||||
finishedAt?: Date;
|
||||
promptSentAt?: Date;
|
||||
promptAccepted?: boolean;
|
||||
stderr?: string;
|
||||
error?: string;
|
||||
exitCode?: number | null;
|
||||
}): WorkerStartupEvidence {
|
||||
const stderrPreview = (input.error || input.stderr || "").slice(0, 500) || undefined;
|
||||
const trustPromptDetected = detectTrustPrompt(stderrPreview ?? "");
|
||||
const childProcessAlive = input.exitCode === undefined || input.exitCode === null ? !input.finishedAt : false;
|
||||
const base: Omit<WorkerStartupEvidence, "classification"> = {
|
||||
lastLifecycleState: input.error || (input.exitCode !== undefined && input.exitCode !== null && input.exitCode !== 0) ? "failed" : input.finishedAt ? "finished" : "running",
|
||||
command: input.command,
|
||||
promptSentAt: input.promptSentAt?.toISOString(),
|
||||
promptAccepted: input.promptAccepted ?? !input.error,
|
||||
trustPromptDetected,
|
||||
transportHealthy: !input.error || !/enoent|spawn|transport/i.test(input.error),
|
||||
childProcessAlive,
|
||||
elapsedMs: Math.max(0, (input.finishedAt ?? new Date()).getTime() - input.startedAt.getTime()),
|
||||
stderrPreview,
|
||||
};
|
||||
return { ...base, classification: classifyStartupFailure(base) };
|
||||
}
|
||||
Reference in New Issue
Block a user