mirror of
https://github.com/instructkr/claw-code.git
synced 2026-06-12 03:25:01 -04:00
Compare commits
142 Commits
9b06c98bd6
...
95fc007f6a
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
95fc007f6a | ||
|
|
313c840974 | ||
|
|
37ce63134a | ||
|
|
643ac8bc76 | ||
|
|
9189bfb816 | ||
|
|
5e5b3bdbc6 | ||
|
|
bd6622b85c | ||
|
|
d145429c96 | ||
|
|
0eabf20389 | ||
|
|
65411000c5 | ||
|
|
0da15c2e07 | ||
|
|
4af2fb6622 | ||
|
|
43ce1f527b | ||
|
|
329d0ffcc8 | ||
|
|
716d17e229 | ||
|
|
3f41341d4a | ||
|
|
702f2fb9ef | ||
|
|
476a1a467e | ||
|
|
f640139b31 | ||
|
|
2f428e249b | ||
|
|
d155a2fd72 | ||
|
|
9999c0fb3a | ||
|
|
65d9b1a362 | ||
|
|
b860f5657b | ||
|
|
71131932de | ||
|
|
4ced37897c | ||
|
|
897055a455 | ||
|
|
84a89f7e07 | ||
|
|
c01b47036e | ||
|
|
ca2085cb95 | ||
|
|
0121f20a09 | ||
|
|
9acd4f14da | ||
|
|
d46c423c1d | ||
|
|
2858aeccff | ||
|
|
116a95a253 | ||
|
|
91e290526a | ||
|
|
ceb092abd7 | ||
|
|
2da12117eb | ||
|
|
959bdf8491 | ||
|
|
347102d83b | ||
|
|
c00981896f | ||
|
|
f004f74ffa | ||
|
|
02252a8585 | ||
|
|
134e945a01 | ||
|
|
c20d0330c1 | ||
|
|
ba3a34d6fe | ||
|
|
0e9cff588d | ||
|
|
dba4f281f0 | ||
|
|
1c59e869e0 | ||
|
|
604bf389b6 | ||
|
|
0730183f35 | ||
|
|
5e0228dce0 | ||
|
|
b780c808d1 | ||
|
|
6948b20d74 | ||
|
|
c48c9134d9 | ||
|
|
215318410a | ||
|
|
59acc60eb5 | ||
|
|
3497851259 | ||
|
|
d93957de35 | ||
|
|
86e88c2fcd | ||
|
|
94bd6f13a7 | ||
|
|
d1fa484afd | ||
|
|
eb0356e92c | ||
|
|
7a1e9854c2 | ||
|
|
70bea57de3 | ||
|
|
3bbaefcf3e | ||
|
|
c0ab7a4d5f | ||
|
|
046bf6cedc | ||
|
|
66eeed82ca | ||
|
|
b139b10499 | ||
|
|
6e6f99e57e | ||
|
|
eb957a512c | ||
|
|
fcb9d18899 | ||
|
|
d03f33b119 | ||
|
|
6bd69d55bc | ||
|
|
e470e614d5 | ||
|
|
1494a94423 | ||
|
|
8efcec32d7 | ||
|
|
1afe145db8 | ||
|
|
7b3abfd49a | ||
|
|
2c004eb884 | ||
|
|
22cc8effbb | ||
|
|
a14977a866 | ||
|
|
e84424a2d3 | ||
|
|
de5384c8f0 | ||
|
|
93cfdbabeb | ||
|
|
efc59ab17e | ||
|
|
635f1145a2 | ||
|
|
a8fc17cdee | ||
|
|
28102af64a | ||
|
|
df148f1a3e | ||
|
|
3a2dddd1ca | ||
|
|
ce352f4750 | ||
|
|
d9b61cc4dc | ||
|
|
fbb0ab4be7 | ||
|
|
5736f364a9 | ||
|
|
6212f17c93 | ||
|
|
0f023665ae | ||
|
|
1a4d0e4676 | ||
|
|
b8984e515b | ||
|
|
834b0a91fe | ||
|
|
80f9914353 | ||
|
|
94f9540333 | ||
|
|
e1b0dbf860 | ||
|
|
90c4fd0b66 | ||
|
|
6870b0f985 | ||
|
|
3311266b59 | ||
|
|
cd6e1cea6f | ||
|
|
f30aa0b239 | ||
|
|
7f63e22f29 | ||
|
|
771d2ffd04 | ||
|
|
562f19bcff | ||
|
|
43bbf43f01 | ||
|
|
8322bb8ec6 | ||
|
|
4c9a0a9992 | ||
|
|
86db2e0b03 | ||
|
|
1a03359bb4 | ||
|
|
b34f370645 | ||
|
|
a9e87de905 | ||
|
|
0929180ba8 | ||
|
|
98c675b33b | ||
|
|
afc792f1a5 | ||
|
|
5b9097a7ac | ||
|
|
69a15bd707 | ||
|
|
41c87309f3 | ||
|
|
a02527826e | ||
|
|
a52a361e16 | ||
|
|
d5373ac5d6 | ||
|
|
a6f4e0d8d1 | ||
|
|
378b9bf533 | ||
|
|
66765ea96d | ||
|
|
c5b6fa5be3 | ||
|
|
48da1904e0 | ||
|
|
92a79b5276 | ||
|
|
553893410b | ||
|
|
0aa0d3f7cf | ||
|
|
9dd7e79eb2 | ||
|
|
0ca034472b | ||
|
|
19638a015e | ||
|
|
83f744adf0 | ||
|
|
d49a75cad5 | ||
|
|
dc274a0f96 |
466
ROADMAP.md
466
ROADMAP.md
@@ -16785,469 +16785,3 @@ Plus introduces the **NEW `same-request-shape-but-different-response-shape` axis
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 10:32 KST. HEAD: `313c840` (post-#251 fast-forward verification onto gaebal-gajae's 10:30 KST cycle ExternalPatchIntake pinpoint at `313c840` — NINTH consecutive concurrent-dogfood rebase verification cycle, three-way parity confirmed local == origin == fork at HEAD `313c840` with no race detected, demonstrating both gaps #239 catalogues at the dogfood-coordination layer and #243 catalogues at the canonical-ordering layer for the NINTH cycle in a row, confirming concurrent-dogfood-rebase as a stable operational pattern that has now held for NINE cycles in a row — Jobdori files the next-monotonic-id directly atop the prior tip rather than racing for a reservation gap, while gaebal-gajae continues to file pinpoints in numeric order based on the live channel's nudge stream). Branch: feat/jobdori-168c-emission-routing. Sibling-shape cluster: 43 pinpoints (grows by +1 with #252). Pre-flight-cost-prediction cluster: 1 member (#252 alone, founder). Token-accounting-without-message-emission cluster: 1 member (#252 alone, founder). Server-side-pre-execution-counting cluster: 1 member (#252 alone, founder). Same-request-shape-but-different-response-shape sub-cluster: 1 member (#252 alone, founder). Two-member-major-provider-only-no-third-party-partner-set sub-cluster: 7 members (#240+#241+#247+#248+#249+#250+#252) — grows from 6 to 7 confirming continuing-pattern-status across SIX distinct axis-classes (TOOL-COMPANION-BUNDLE / COMPOUND-INPUT / COMPOUND-OUTPUT / QUAD-MODALITY-TURN / SERVER-MANAGED-WEB-SEARCH-WITH-TOOL-CHOICE-DISCRIMINATOR / SERVER-SIDE-PRE-EXECUTION-COUNTING). Eight-layer fusion shape (smaller than #241/#247/#248/#249's twelve-layer count and smaller than #250's ten-layer count, reflecting the smaller-scope-but-novel-axis-class trade-off for daily-driver-impact pinpoints). **NEW META-pattern introduced**: NEW-SOLO-CLUSTER-FOUNDING-WITH-DAILY-DRIVER-IMPACT discovery-pattern — distinct from META-cluster-growth (continuous or discontinuous) and distinct from complementary-pinpoint-pair-bundle (paired halves of a tool-subsystem). #252 founds the THIRD distinct discovery-pattern in the audit catalog, the audit now spans THREE structurally distinct discovery-patterns rather than two, demonstrating audit-breadth-across-discovery-pattern-classes alongside audit-balance-across-META-clusters. **PIVOT signal**: #252 deliberately PIVOTS AWAY from BOTH Cross-pinpoint-synthesis-fusion-shape META-cluster (intentionally not extending the +1-per-cycle synthesis chain) AND Tool-locality-axis META-cluster (already extended by #250 cycle #393), founding NEW solo clusters with daily-driver-impact instead. Distinct from #251's contributor-friction/external-patch-intake axis (clawability-coordination layer) by being a daily-clawing-cost-gate workflow primitive (clawability-runtime layer). Linked to #221 (batch-dispatch async pattern, prior closest-shape neighbor with synchronous-batch-via-Files-API-prerequisite, distinct dispatch shape), #224 (Voyage-AI partner-asymmetric, prior provider-asymmetric pattern), #225 (audio partner-asymmetric, prior provider-asymmetric pattern), #226 (image partner-asymmetric, prior provider-asymmetric pattern), #227 (video partner-asymmetric, prior provider-asymmetric pattern with async-task-polling-primitive — closest neighbor in the workflow-primitive-axis sense), and #239/#243 (dogfood-coordination/canonical-ordering, the operational-layer pinpoints that #252's NINTH consecutive concurrent-dogfood rebase cycle continues to demonstrate).
|
||||
|
||||
🪨
|
||||
|
||||
## Pinpoint #253 — Dogfood cycle state-vector grows without compaction/budgeting until peer-agent context windows overflow
|
||||
|
||||
Dogfooded 2026-04-26 11:00 KST after cycle #394: the public dogfood loop had accumulated long state vectors, commit histories, cluster deltas, and repeated parity/rebase summaries across cycles #389-#394. A peer agent explicitly paused cycle #395 because the cumulative dogfood-cycle state vector was overflowing another agent's context window mid-cycle. That pause is direct product evidence: claw-code can keep discovering and appending valid pinpoints, but the coordination transcript has no typed compaction boundary, no rolling state-vector budget, and no canonical short form that lets multiple agents continue the same branch without re-sending the full audit history every turn.
|
||||
|
||||
Verified operational surface: the branch already contains #239 for branch leases and #243 for canonical ordering, but those protect write coordination, not cognitive/context coordination. The live dogfood channel currently relies on humans/agents manually deciding when to shorten, pause, or restate. There is no `DogfoodCycleSummary` / `StateVectorCompact` artifact that records `{cycle, head, parent, branch, active_pinpoint, cluster_delta, race_state, next_owner, blockers}` in a bounded token shape; no max-token policy for public nudge payloads; no rolling cluster ledger that can be referenced by id instead of repeated; no continuation token that says “resume from compact #N”; and no warning when a generated update exceeds a peer-agent budget. This is distinct from `/compact` conversation summarization because the missing primitive is branch/project-level coordination state shared across agents, not a single chat-session memory summary.
|
||||
|
||||
Required fix shape: (a) define a compact dogfood state-vector schema with hard field limits and stable ids for cluster ledgers; (b) emit one canonical compact artifact per cycle, committed or otherwise addressable, so future cycles cite `compact:#394` rather than replaying the full narrative; (c) add a context-budget guard to nudge/report generation that warns or truncates before posting huge state vectors; (d) add a `claw dogfood status --compact` / `claw roadmap compact-state` surface that reconstructs the current branch state from git + ROADMAP markers; (e) teach peer agents to request/reply with compact state by default and expand only on demand. Acceptance: after 10+ consecutive dogfood cycles, a new agent can recover active branch/head/pinpoint/cluster trajectory/blocker state from a bounded compact artifact under a fixed token budget, and the loop does not have to pause cycle spawning just to protect peer-agent context windows. **Status:** Open. No source code changed. Filed as ROADMAP-only dogfood pinpoint from the 2026-04-26 02:00 UTC nudge. Cluster delta: context-budgeting +1, dogfood-state-compaction +1, multi-agent-continuation-token cluster founded, bounded-state-vector-for-branch-coordination cluster founded; linked to #239/#243 as the cognitive coordination complement to write/order coordination.
|
||||
|
||||
## Pinpoint #254 — MCP Resources lifecycle (subscribe / list_changed / updated / session-bound registry) is structurally absent from the runtime
|
||||
|
||||
Dogfooded 2026-04-26 11:02 KST on `feat/jobdori-168c-emission-routing` after #253's context-budget-discipline pivot: claw-code wires `resources/list` and `resources/read` over JSON-RPC stdio (`rust/crates/runtime/src/mcp_stdio.rs:874-981, 1330-1346`) but the MCP Resources lifecycle surface beyond one-shot list+read is structurally absent. Resources read at session start become stale during the session with no detection path, and resources that an MCP server creates/destroys during operation never reach the parent claw session as typed events.
|
||||
|
||||
Verified concrete surface (rg across `rust/crates/`): zero `resources/subscribe` / `resources/unsubscribe` request method, zero `notifications/resources/updated` / `notifications/resources/list_changed` notification handlers, zero `ResourceUpdated` / `ResourceListChanged` / `ResourceCreated` / `ResourceDestroyed` typed lifecycle events, zero `version` / `valid_until` / `etag` / `staleness` field on `McpResource` at `rust/crates/runtime/src/mcp_stdio.rs:175-188`, zero session-bound `ResourceRegistry` primitive that tracks per-session resource handles with create/destroy lifecycle, zero `subscribe` capability advertisement in the initialize handshake at line 1400, zero `/resources` / `/resource-list` / `/resource-refresh` slash command in `SlashCommandSpec`, zero `claw mcp resources` CLI subcommand. The hardened lifecycle phase enum at `mcp_lifecycle_hardened.rs:16-28` enumerates `ResourceDiscovery` once at startup but has no `ResourceRefresh` / `ResourceLifecycleChange` mid-session phase. Server-emitted JSON-RPC notifications between requests are dropped silently at the transport layer because the stdio reader only correlates by `id` and has no notification dispatch table.
|
||||
|
||||
Gap. The MCP spec defines resources as a long-lived discovery-and-subscribe primitive: clients can `resources/subscribe` to a `uri`, receive `notifications/resources/updated { uri }` when content changes, and receive `notifications/resources/list_changed` when the available resource set itself changes (resources created/destroyed by the server). claw-code treats resources as a one-shot snapshot: list once, read on demand, never re-validate. Concrete dogfood friction: an MCP server that exposes a live database table, a watched file, or an LLM-generated artifact has no way to tell claw the row/file/artifact has changed; the agent silently reasons over a stale snapshot until the user notices the divergence.
|
||||
|
||||
Cluster shape novelty. Founds **two** new clusters with #254 as solo founder: (1) **Session-bound-resource-tracking-registry cluster** — typed primitives that track resource handles created/destroyed/updated within a session boundary, distinct from the existing one-shot `list_resources_once` snapshot pattern; (2) **Resource-lifecycle-event-opacity axis** — server→client lifecycle notifications dropped at the transport layer because the JSON-RPC reader has no notification dispatch separate from id-correlated responses, distinct from #229/#238/#244 persistent-WebSocket cluster (those are bidirectional client-driven streams; this is a server-pushed-notification dispatch gap on stdio JSON-RPC).
|
||||
|
||||
Introduces the **FOURTH distinct discovery-pattern** in the audit catalog: **PURE-CLAWABILITY-FRICTION-FROM-DOGFOODING** — pinpoints whose primary novelty is dogfood-observed friction in the agent's own runtime rather than missing API/typed-shape coverage of an external provider surface. Distinct from META-cluster-growth (#244/#247/#248/#249/#250), complementary-pinpoint-pair-bundle (#245+#250), and NEW-SOLO-CLUSTER-FOUNDING-WITH-DAILY-DRIVER-IMPACT (#252). Sibling to #239/#243/#251/#253 which are operational/coordination-layer pinpoints, but #254 is at the **protocol-runtime layer** rather than the dogfood-coordination layer — the agent's own MCP transport silently swallows lifecycle signals it should be surfacing.
|
||||
|
||||
Required fix shape: (a) add `resources/subscribe` + `resources/unsubscribe` request methods on `McpStdioProcess` parallel to `list_resources` / `read_resource`; (b) add a notification dispatch path on the stdio reader that routes `notifications/resources/updated` and `notifications/resources/list_changed` to a per-server channel rather than dropping them; (c) add `pub enum ResourceLifecycleEvent { Created(McpResource) | Updated { uri } | Destroyed { uri } | ListChanged }` typed event surfaced through `LaneEvents`; (d) add a session-bound `ResourceRegistry` in the runtime that tracks active subscriptions, applies updates, and fires staleness warnings; (e) add `version` / `etag` optional fields on `McpResource`; (f) advertise `resources.subscribe = true` in the initialize handshake when the runtime supports it; (g) expose `/resources`, `/resources refresh`, `/resources subscribe <uri>` slash commands and `claw mcp resources [list|read|subscribe]` CLI subcommands. Acceptance: an MCP server that emits `notifications/resources/updated { uri: "db://orders/42" }` mid-session causes claw to update its `ResourceRegistry`, fire a `ResourceUpdated` lane event, and either re-read the resource on next reference or surface a staleness marker — instead of the agent silently reasoning over a stale snapshot.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 11:04 KST. HEAD: `17efd95` (post-#253 fast-forward verification onto gaebal-gajae's 11:00 KST DogfoodCycleSummary/StateVectorCompact pinpoint at `17efd95` — TENTH consecutive concurrent-dogfood rebase cycle, three-way parity local==origin==fork at `17efd95` confirmed before filing). Branch: feat/jobdori-168c-emission-routing. Cluster delta: session-bound-resource-tracking-registry 0→1 (founder), Resource-lifecycle-event-opacity 0→1 (founder), Pure-clawability-friction-from-dogfooding discovery-pattern 0→1 (founder, FOURTH distinct discovery-pattern after META-cluster-growth + complementary-pinpoint-pair-bundle + NEW-SOLO-CLUSTER-FOUNDING-WITH-DAILY-DRIVER-IMPACT). Smaller-scope by design (matches #253's context-budget-discipline). Distinct from #252's API-shape gap (this is runtime-protocol gap), distinct from #229/#238/#244 persistent-WebSocket cluster (this is stdio JSON-RPC notification dispatch), distinct from #239/#243/#253 dogfood-coordination layer (this is protocol-runtime layer). Linked to #253 as the discipline-pivot enabler that allowed this smaller-scope pinpoint to be foregrounded over the larger META-cluster-growth options.
|
||||
|
||||
## Pinpoint #255 — hikaMaeng fork proves local WebSearch needs a provider/spec registry intake lane, but the safe landing shape is design-first rather than blind cherry-pick
|
||||
|
||||
Dogfooded 2026-04-26 02:12 UTC on `feat/jobdori-168c-emission-routing` by fetching and statically reviewing the public fork `https://github.com/hikaMaeng/claw-code` at `/tmp/hikaMaeng-claw-code-read`. Interesting fork commits inspected: `262405e` (pluggable Tavily/Brave/Bing/custom/DDG fallback), `bd11289` (settings.json-only websearch config), `fa93cd3` (startup banner provider line), `5f2540a` (Firecrawl plus Brave gzip handling), `7f34d91` (external `searchProvider.json` specs), and `535be97` (web-search integration guide). Attribution: implementation ideas are from hikaMaeng / Sigrid Jin's fork work and should remain credited in any follow-up implementation commit.
|
||||
|
||||
Safe intake finding: do **not** cherry-pick the fork wholesale. The useful distilled ideas are smaller and align with #245/#250/#251: (1) separate provider selection from provider mechanics (`settings.json` chooses `websearch.provider` + secret; `searchProvider.json` describes endpoint/method/auth/result paths); (2) keep DDG/HTML parsing as the fallback path while JSON API providers use a generic executor; (3) make provider status visible in startup/UI so operators know which search backend is active; (4) preserve domain allow/block filtering, dedupe, and result truncation after provider-specific parsing; (5) handle provider transport quirks centrally (for example Brave gzip / response decoding) rather than in ad hoc call sites; (6) document custom-provider extension without requiring rebuilds.
|
||||
|
||||
Why ROADMAP-only in this branch: current `rust/crates/tools/src/lib.rs` already has a local DDG-backed `WebSearch` tool with tests and `CLAWD_WEB_SEARCH_BASE_URL` mock support, but the fork's later commits add config-schema surface, runtime config validation, root-level provider spec files, and CLI banner wiring as one cross-crate feature. Landing that feature safely needs an implementation lane with tests for config precedence, provider-spec parsing, no-secret logging, mock provider HTTP, and backward-compatible DDG behavior. A minimal cherry-pick would either break the existing test contract or introduce an unreviewed external spec file/runtime path.
|
||||
|
||||
Required fix shape for follow-up: (a) add a `WebSearchConfig { provider, api_key_ref/api_key }` runtime config view using existing settings precedence; (b) add a provider-spec schema with explicit allowlisted auth modes and result-path parsing, searched from project/user/system locations; (c) build a generic JSON provider executor plus a preserved DDG HTML executor; (d) keep post-parse domain filters/dedupe/truncate common; (e) add provider-status display only after config parse is non-fatal and redacts secrets; (f) add docs derived from `535be97` but rewritten in this repo's style and language; (g) add an external-patch-intake packet under #251 recording fork URL, commit range, diffstat, reviewed files, accepted/rejected ideas, and attribution.
|
||||
|
||||
Acceptance: `WebSearch` continues to pass existing DDG/mock tests with no settings file; setting `.claw/settings.json` to a supported provider plus a local mock `searchProvider.json` routes through the generic executor; missing API keys fail with typed/actionable errors before dispatch; provider name is visible without leaking secrets; and the commit body/ROADMAP preserves attribution to hikaMaeng's fork commits above. **Status:** Open. No source code changed in this intake commit. Cluster delta: #245 client-side configurable provider/parser registry gains concrete external implementation evidence; #251 external-fork intake gains its first reviewed fork packet; #250 server-managed search remains the complementary server-side half and is intentionally not mixed into this local-provider implementation lane.
|
||||
|
||||
## Pinpoint #257 — Completed OMX sessions can keep emitting stale alerts because clawhip does not read terminal workflow state
|
||||
|
||||
Dogfooded 2026-04-26 11:30 KST immediately after #256 landed. The OMX session `clawcode-issue-256-tool-use-result-contiguity` had already fixed, tested, committed, pushed, and marked its ralplan state terminal (`active: false`, `current_phase: complete`, blocker none). Despite that, clawhip emitted a 10-minute stale pane alert against the idle Codex prompt because it only saw pane inactivity and did not correlate the tmux session with the `.omx/state/sessions/.../ralplan-state.json` terminal state or the recent git push event.
|
||||
|
||||
Concrete failure mode: completed sessions become false-positive stale work. Operators have to manually capture the pane, verify commit/push/test status, and kill the session. This creates alert fatigue and can mask real stuck sessions. It is distinct from #253 context compaction and #239 branch leases: the missing primitive is stale-monitor lifecycle integration between tmux pane state, OMX workflow state, and git/event completion receipts.
|
||||
|
||||
Required fix shape: clawhip stale detection should classify sessions as `complete-idle` when (a) the associated OMX/Codex workflow state is terminal, (b) the pane contains a final status block with commit/pushed/tests/blocker none, or (c) a matching git event has landed for the session branch after the prompt started. For `complete-idle`, clawhip should auto-suppress stale alerts and optionally auto-retire/kill the tmux session after a grace period, emitting a compact cleanup receipt instead of repeated stale warnings. Acceptance: after a session lands a commit and marks workflow complete, no further stale alerts are emitted for that tmux session; it is either auto-killed or reported once as completed and retired. **Status:** Open. Filed as ROADMAP-only dogfood pinpoint from the 2026-04-26 02:30 UTC nudge; live cleanup performed by killing `clawcode-issue-256-tool-use-result-contiguity` after verifying branch clean at `56f7f2e6`.
|
||||
|
||||
## Pinpoint #258 — `--allowedTools ""` (empty value) silently coerces to zero-tool agent with no warning, joining the silent-fallback / silent-coercion family at the CLI parse boundary
|
||||
|
||||
Dogfooded 2026-04-26 11:32 KST on `feat/jobdori-168c-emission-routing` at HEAD `a3f5a83` (post-#257 fast-forward verification onto gaebal-gajae's clawhip-stale-monitor pinpoint). Reproduction: `claw --allowedTools "" --output-format json -p "test"` accepts the empty value with zero warnings/errors and proceeds to dispatch — there is no diagnostic event distinguishing "operator typed `--allowedTools \"\"` by accident" from "operator wants every tool disabled." The CLI parse arm at `rust/crates/rusty-claude-cli/src/main.rs:979` accepts whatever the next argv slot contains, including `""`, and pushes it into `allowed_tool_values`. The wrapper `normalize_allowed_tools` at `main.rs:1826` checks `values.is_empty()` (returns `None` when zero `--allowedTools` flags were passed entirely) but does not check whether each individual `value` is empty/whitespace-only, so the empty string flows into `current_tool_registry()?.normalize_allowed_tools(values)` at `rust/crates/tools/src/lib.rs:192`. There the `for value in values` loop applies `value.split(|ch: char| ch == ',' || ch.is_whitespace()).filter(|token| !token.is_empty())` which yields zero tokens for `""`, the inner `for token in ...` loop never executes, the unsupported-tool error path is skipped, and the function returns `Ok(Some(BTreeSet::new()))` — a `Some(empty)` distinct from the omit-the-flag-entirely `None` case. Downstream, `tool_registry.definitions(allowed_tools)` at `lib.rs:248` filters every spec/runtime/plugin tool by `allowed_tools.is_none_or(|allowed| allowed.contains(...))`, and because `allowed.contains(...)` returns false for every name against an empty set, the agent receives **zero tools** — no read_file, no write_file, no bash, no grep, no glob, no MCP, no plugins. The agent boots fully, the wire request is dispatched normally (no early return), and the model receives the user prompt with an empty tool list and either hallucinates without tools or stalls when it tries to call one — meanwhile the operator sees no signal that they've just asked for a tool-less agent.
|
||||
|
||||
Gap. The CLI parse layer treats `--allowedTools ""` as semantically equivalent to "explicitly enumerate zero tools" rather than as "operator passed a malformed empty value." This is silent-empty-coercion at the CLI parse boundary: the input string was clearly an accident (no shell idiom passes an intentional empty argument that means the same as explicitly disabling everything), but the parser, the registry, and the dispatcher all fail to surface a single diagnostic. The behavior compounds with #201/#202/#203/#206/#207/#208 (silent-fallback at the provider boundary) and #213 (silent-zero-coercion on cached_tokens) at a structurally distinct layer — the CLI parse layer rather than the provider boundary.
|
||||
|
||||
Cluster delta: joins the silent-fallback / silent-drop / silent-strip / silent-coercion sibling-shape cluster at the CLI-parse-boundary axis, extending it across one more structural layer. Distinct from #213's silent-zero-coercion (response-side wire deserialization) by being request-side CLI input parse, distinct from #211's silent-misnomer (parameter rename) by being empty-value-acceptance rather than name-mismatch, and distinct from #29's silent fallback (provider routing) by being a tool-allowlist permissive-vs-restrictive boundary rather than a provider-routing fallback. Does NOT found a new cluster (per #253 context-budget discipline preferring extension over founding).
|
||||
|
||||
Required fix shape: (a) in `normalize_allowed_tools` (`rusty-claude-cli/src/main.rs:1826`), reject empty/whitespace-only values with a typed `AllowedToolsParseError::EmptyValue { flag_position }` returning a non-zero exit and an actionable error message ("--allowedTools requires at least one tool name; pass `--allowedTools none` to explicitly disable all tools, or omit the flag to enable all"); (b) introduce an explicit `none` literal token (or `--allowedTools=none` / `--no-tools`) for the legitimate "every tool disabled" use case so the empty-string accident is structurally distinct from the intentional-disable; (c) emit a `CliFlagWarning` structured event when `--output-format json` is active so downstream consumers can surface the diagnostic; (d) add tests covering `[""]`, `[" "]`, `["", "read"]`, `["read", ""]`, and the new `["none"]` literal. Acceptance: `claw --allowedTools "" -p "x"` exits non-zero with a typed error; `claw --allowedTools none -p "x"` proceeds with explicit zero-tool intent; existing `claw --allowedTools read,glob -p "x"` and `claw -p "x"` (no flag) behaviors are preserved.
|
||||
|
||||
Security relevance: the inverse failure mode (empty `--disallowedTools` or empty deny-list silently permitting all tools) is the exact shape upstream PR claw-code#2806 attempted to address (empty-config permission fallback safety, opened+closed within 3min on 2026-04-26). #258 catalogues the symmetric allow-list side at the CLI flag layer rather than the config layer, complementing the upstream PR's config-layer focus.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 11:32 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `a3f5a83` (post-#257 fast-forward verification). Cluster delta: silent-fallback-family extension (no new cluster founded, per #253 context-budget discipline). Smaller-scope by design (matches #253/#254/#257's discipline). Sibling: #201/#202/#203/#206/#207/#208/#213 (silent-fallback-family) at the provider boundary; #258 extends the family to the CLI parse boundary as the first member where the silent-coercion happens before any wire dispatch. Linked to upstream PR claw-code#2806 (empty-config permission fallback safety) as the symmetric config-layer half of the same anti-pattern.
|
||||
|
||||
## Pinpoint #259 — Dogfood status reports can publish stale branch/phase facts without provenance or freshness checks
|
||||
|
||||
Dogfooded 2026-04-26 12:00 KST after cycle #396: a dogfood status report posted minutes after commits #254-#258 had landed, but claimed the branch was only four commits ahead of `dev`, last commit `94f9540`, no new commits since 2026-04-23, no active session today, and no new pinpoints filed on 2026-04-26. The live branch at the same time already contained `70058a0` #254, `62adbf4` #255, `56f7f2e` #256 real code fix, `a3f5a83` #257, and `a07c0b7` #258. The report looked authoritative but was generated from stale memory rather than a fresh git/ROADMAP read.
|
||||
|
||||
Concrete failure mode: multi-agent dogfood coordination can regress to outdated phase summaries even while the branch is actively moving. Operators then have to manually cross-check `git log`, ROADMAP markers, and chat history to decide whether the report is actionable. This is distinct from #253 compact state-vector budgeting: #253 bounds context size; #259 requires freshness/provenance assertions before publishing a compact status.
|
||||
|
||||
Required fix shape: every dogfood status report should include machine-checked provenance fields (`generated_at`, `repo`, `branch`, `head`, `head_timestamp`, `roadmap_last_pinpoint`, `git_fetch_time`, `source=git+ROADMAP`, `staleness_seconds`) and refuse/label reports when the source snapshot is older than a small threshold. `claw dogfood status --compact` should fetch, parse latest ROADMAP pinpoint id, compare against local chat-memory claims, and emit `STALE_STATUS_SOURCE` if they disagree. Acceptance: a report cannot claim “no new commits/new pinpoints” while `origin/feat/jobdori-168c-emission-routing` contains newer commits/pinpoints than its own provenance head. **Status:** Open. Filed as ROADMAP-only dogfood pinpoint from the 2026-04-26 03:00 UTC nudge; live branch was verified before filing and pushed on top of #258.
|
||||
|
||||
## Pinpoint #260 — `--compact --output-format json` envelope silently strips six observability fields (auto_compaction, iterations, tool_uses, tool_results, prompt_cache_events, estimated_cost) that the non-compact JSON envelope emits, with no diagnostic, no marker delta beyond `compact: true`, and no documentation that the strip occurs
|
||||
|
||||
Dogfooded 2026-04-26 12:05 KST on `feat/jobdori-168c-emission-routing` at HEAD `1daf636` (post-#259 fast-forward verification). The dispatch in `LiveCli::run_turn_with_output` (`rust/crates/rusty-claude-cli/src/main.rs:4637-4650`) routes `CliOutputFormat::Json if compact` to `run_prompt_compact_json` (`main.rs:4665-4688`) and `CliOutputFormat::Json` (no compact) to `run_prompt_json` (`main.rs:4690-4729`). Both paths receive the SAME `runtime::TurnSummary` from `runtime.run_turn(...)`, but the two envelopes serialize wildly different field sets. `run_prompt_json` emits eleven top-level keys: `message`, `model`, `iterations`, `auto_compaction`, `tool_uses`, `tool_results`, `prompt_cache_events`, `usage`, `estimated_cost`. `run_prompt_compact_json` emits four: `message`, `compact: true`, `model`, `usage`. **Six observability-critical fields are dropped silently** — most notably `auto_compaction` (the structured signal that the runtime auto-compacted the session mid-turn, removing N messages from history) and `iterations` (turn-loop iteration count, the only non-summary signal of how the agent reached the final assistant text). The `compact: true` marker is the ONLY diff a downstream JSON consumer can observe; nothing in the envelope, the help text, or any structured-error stream tells the operator that adding `--compact` discarded the auto-compaction event, the iteration count, the tool-use trace, the tool-result trace, the prompt-cache events, and the cost estimate. Operators who script `claw -p "x" --compact --output-format json | jq` to keep wire size small unknowingly lose the only signal that auto-compaction fired, and the only way to recover it is to remove `--compact` and re-run the prompt.
|
||||
|
||||
Gap. This is **silent-strip-on-response-envelope at the CLI output layer**, distinct from #136 (which only verified that `run_prompt_compact_json` exists and emits valid JSON with `compact: true`, never auditing what the envelope drops vs. its non-compact sibling) and distinct from #98 (which audited `--compact` being silently *ignored* outside the prompt-text path; #136 closed that by adding the dispatch arm — but the new envelope itself is the gap). The compact-JSON path was added to honor the flag, but the envelope was hand-coded with a minimal field set that omits exactly the fields a JSON-mode operator most needs (auto_compaction event, iteration count, cost). Worse, `auto_compaction` is the documented mechanism by which #134/#135's session-identity signals propagate — stripping it silently disables that downstream observability for any consumer that scripted around `--compact --output-format json`.
|
||||
|
||||
Cluster delta: joins the silent-fallback / silent-drop / silent-strip / silent-coercion sibling-shape cluster, extending it from 8 to 9 members. Distinct from #258 (CLI parse boundary, request-side), distinct from #213/#207/#208 (provider boundary, response-side wire deserialization), distinct from #203 (no streaming auto_compaction event at all). #260 is the FIRST member where the silent-strip happens at the **CLI response-envelope serialization layer** — after the runtime has fully populated the summary, the CLI itself drops the fields when assembling the JSON. Founds the **CLI-response-envelope-silent-strip sub-shape** within the silent-fallback family: the runtime computes the signal correctly; the CLI envelope serializer chooses not to surface it; no diagnostic surfaces the choice. Sibling-shape with #258 in that both extend the silent-fallback cluster at the CLI boundary, but #258 is request-side parse and #260 is response-side serialize — together they bracket the full CLI I/O perimeter for the silent-fallback family. Does NOT found a new top-level cluster (per #253 context-budget discipline preferring extension over founding).
|
||||
|
||||
Required fix shape: (a) align `run_prompt_compact_json` envelope so it emits the SAME field set as `run_prompt_json` minus only the fields whose value is genuinely stripped by the compact intent (the documented intent is "strip tool call details; print only the final assistant text" — so dropping `tool_uses`/`tool_results` is intentional, but dropping `auto_compaction`/`iterations`/`prompt_cache_events`/`estimated_cost` is not); concretely, add `iterations`, `auto_compaction`, `prompt_cache_events`, and `estimated_cost` to the compact-JSON envelope; (b) document the field-set delta in `--help` for `--compact` ("in JSON mode, strips `tool_uses` and `tool_results`; preserves `auto_compaction`, `iterations`, `prompt_cache_events`, `usage`, `estimated_cost`"); (c) add a regression test `run_prompt_compact_json_preserves_auto_compaction_signal` that asserts the compact-JSON envelope contains the `auto_compaction` key (null or populated) so future envelope edits cannot silently regress; (d) optionally emit a structured `EnvelopeFieldStrip` event listing dropped fields when `--output-format json` is active so downstream consumers can self-discover what the compact lane drops. Acceptance: `claw -p "x" --compact --output-format json | jq 'keys'` returns at least `["auto_compaction", "compact", "estimated_cost", "iterations", "message", "model", "prompt_cache_events", "usage"]`; the only fields stripped relative to non-compact are the documented `tool_uses`/`tool_results`; a synthetic auto-compaction event surfaces under `--compact` identically to non-compact.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 12:05 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `1daf636` (post-#259 fast-forward verification onto gaebal-gajae's stale-status-report-provenance pinpoint). Cluster delta: silent-fallback-family extension 8→9 (no new top-level cluster founded, per #253 context-budget discipline). CLI-flag-interaction-silent-precedence sub-shape introduced (response-envelope strip layer, sibling to #258's request-parse layer). Sibling: #98 (silent-flag-no-op class, predecessor at the dispatch layer, closed by #136), #136 (compact-JSON envelope existence, closed without auditing field-set parity), #203 (auto_compaction summary-only, no streaming event — #260 escalates: even the summary signal is dropped under `--compact`), #258 (CLI parse boundary silent-coercion, request-side complement to #260's response-side strip). Does not duplicate #98/#136: those audited dispatch and envelope-existence; #260 audits envelope-content-parity vs. its non-compact sibling — a structurally distinct surface.
|
||||
|
||||
## Pinpoint #261 — Compact dogfood summaries lack internal consistency checks for counted ranges and enumerated items
|
||||
|
||||
Dogfooded 2026-04-26 12:30 KST against the live status summaries after #260. Multiple compact reports correctly switched to fresh-HEAD style after #259, but still published internally inconsistent arithmetic: one report said “8 new pinpoints filed today (#252–#260)” while enumerating #252, #253, #254, #255, #256, #257, #258, #259, and #260 — nine items. Another nearby report said “9 items across cycles #394–#400” while also listing #252–#260, again mixing range count, cycle count, and bullet count without validation. The report can be fresh and provenance-backed yet still self-contradictory.
|
||||
|
||||
Concrete failure mode: downstream agents use compact summaries to decide whether a cycle was handled, but a range/count mismatch forces manual recounting and can cause skipped cycle numbers or duplicate filings. This is distinct from #259: #259 verifies source freshness against git/ROADMAP; #261 verifies the report's own derived fields after freshness is established.
|
||||
|
||||
Required fix shape: `claw dogfood status --compact` should compute and validate `pinpoint_range_start`, `pinpoint_range_end`, `pinpoint_count`, `cycle_range`, and `listed_items_count` from the same parsed ledger, not freeform text. If the rendered text contains a numeric count or range, a pre-send validator should assert `end-start+1 == listed_items_count == pinpoint_count` and emit `STATUS_COUNT_MISMATCH` instead of publishing. Acceptance: a status report cannot say “8 items (#252–#260)” while listing nine bullets; the command either corrects the count or refuses the report with the mismatched fields. **Status:** Open. Filed as ROADMAP-only dogfood pinpoint from the 2026-04-26 03:30 UTC nudge; branch verified and pushed on top of #260.
|
||||
|
||||
## Pinpoint #262 — `--max-turns N` is structurally absent from the CLI surface AND fails with two different silent shapes depending on argv position relative to `-p`: pre-`-p` raises `unknown option`, post-`-p` is silently absorbed into the prompt body via `args[index+1..].join(" ")` greedy slurp, with no diagnostic and no help-text mention
|
||||
|
||||
Dogfooded 2026-04-26 12:32 KST on `feat/jobdori-168c-emission-routing` at HEAD `2a0e5de` (post-#261 fast-forward verification onto gaebal-gajae's compact-summary self-consistency-check pinpoint). Reproduction matrix against `./rust/target/release/claw`:
|
||||
|
||||
- `claw -p "say hi" --max-turns 0` → exits with `[error-kind: missing_credentials]` (the prompt body becomes `"say hi --max-turns 0"` and dispatch proceeds normally; no flag was rejected, no diagnostic about `--max-turns`, the credential error is downstream of an already-mangled prompt).
|
||||
- `claw --max-turns 0 -p "say hi"` → exits with `[error-kind: cli_parse]` `error: unknown option: --max-turns` (rejected by `format_unknown_option` at `rust/crates/rusty-claude-cli/src/main.rs:1565` because the parse loop sees `--max-turns` BEFORE `-p` and falls into the catch-all `other if rest.is_empty() && other.starts_with('-')` arm at `main.rs:993`).
|
||||
- `claw --max-turns=0 -p "say hi"` → same `unknown option: --max-turns=0` rejection.
|
||||
- `claw "hello world" --max-turns 0` → bare-prompt branch silently accepts (positional rest collects `["hello world", "--max-turns", "0"]` because the `other` arm at `main.rs:996-999` pushes any non-flag-after-rest onto `rest`).
|
||||
|
||||
Gap. **`--max-turns` does not exist** in the claw-code CLI surface: zero entries in `CLI_OPTION_SUGGESTIONS` (`main.rs:176-194`), zero match arms in `parse_args` (`main.rs:811-1004`), zero mention in `--help` output, zero typed `MaxTurns` field on `CliAction::Prompt` (`main.rs:696-749`), zero turn-budget plumbing into `LiveCli::run_turn_with_output` or `runtime.run_turn`. The only `turns` accounting in the runtime is the post-hoc `UsageTracker::turns()` counter (`main.rs:3156, 4915, 5762`) — a read-only odometer, not an enforced ceiling. This contrasts with Claude Code (the upstream CLI) which exposes `--max-turns N` as a documented turn-budget enforcement flag and which is the canonical way operators bound runaway tool-use loops in non-interactive `-p` mode.
|
||||
|
||||
Worse, the failure mode is **structurally asymmetric depending on argv position relative to `-p`** — a property no other silent-fallback family member exhibits. The `-p` arm at `main.rs:944` does `let prompt = args[index + 1..].join(" ")`, a greedy-slurp design that consumes EVERYTHING after `-p` as prompt body without re-entering the flag-parse loop. Any unknown flag passed AFTER `-p` is silently absorbed into the user's prompt. A `--max-turns 0` passed after `-p` is not just unsupported; it is invisibly mutated into prompt content, polluting the model input with operator-intended-machine-control-tokens that the model will see as natural language. A `--max-turns 0` passed BEFORE `-p` is at least surfaced as `unknown option`. The two outcomes — silent-prompt-pollution vs. typed-cli_parse-error — for the SAME flag differ ONLY by argv position, with no documentation that the boundary exists. The `-p` greedy-slurp is the actual silent-fallback site; `--max-turns` is just one observable instance of the class.
|
||||
|
||||
Cluster delta: joins the silent-fallback / silent-drop / silent-strip / silent-coercion / silent-prompt-absorption sibling-shape cluster, **extending it from 9 to 10 members** (#258 CLI-parse empty-coercion → #260 response-envelope strip → #262 CLI-parse position-sensitive-prompt-absorption). #262 is the FIRST member where the silent shape is **conditional on argv position relative to a sibling flag**, founding the **position-sensitive-parse-asymmetry sub-shape** within the silent-fallback family: the same input text produces a typed error or silent prompt-pollution depending only on argv ordering. Distinct from #258 (`--allowedTools ""` empty-string-coercion: silent always, regardless of position) by being position-conditional. Distinct from #260 (compact-JSON envelope strip: silent always, response-side) by being request-side argv-parse. Distinct from prior unknown-option behavior (which is not silent) because the silent path is reached only when the unknown flag arrives after `-p`. Audit-completeness for the silent-fallback chain at the **numeric-flag boundary** AND the **position-sensitive-CLI-parse boundary** simultaneously — two structurally distinct surfaces audited in one pinpoint. Does NOT found a new top-level cluster (per #253 context-budget discipline preferring extension over founding); the position-sensitive-parse-asymmetry is registered as a sub-shape inside the existing silent-fallback family.
|
||||
|
||||
Required fix shape: (a) declare `--max-turns N` as a typed CLI flag with `validate_max_turns` accepting `u32` (rejecting negative and non-numeric values with a typed `MaxTurnsParseError`), thread `max_turns: Option<u32>` through `CliAction::Prompt` and `LiveCli::run_turn_with_output`, and pass it as a hard ceiling into the runtime turn loop so `runtime.run_turn` returns a typed `TurnBudgetExhausted` event when the count is reached; (b) add `--max-turns` to `CLI_OPTION_SUGGESTIONS` (`main.rs:176-194`) and to `--help` output; (c) restructure the `-p` arm at `main.rs:944` so it does NOT greedily slurp `args[index+1..].join(" ")` but instead consumes only the next argv slot as the prompt and continues the flag-parse loop, OR explicitly require `-p` to be the LAST flag (rejecting any token starting with `-` after `-p` with `error: -p must be the final flag; saw '--max-turns' after the prompt`); (d) treat `--max-turns 0` semantically as "return immediately after dispatch with `iterations: 0` and no model call" (matching upstream Claude Code's documented zero-turn behavior, useful for cost-zero parse-validation runs); (e) emit a `CliFlagWarning` structured event when `--output-format json` is active and an unknown flag is detected after `-p`, so downstream consumers can surface the would-have-been-silent prompt-pollution diagnostic; (f) add tests covering `["-p", "x", "--max-turns", "0"]`, `["--max-turns", "0", "-p", "x"]`, `["-p", "x", "--unknown-flag"]`, `["--max-turns=5", "-p", "x"]`, `["-p", "x", "--max-turns=-1"]`, and `["hello", "--max-turns", "0"]` (bare-prompt rest-positional case). Acceptance: `claw -p "x" --max-turns 0` either rejects with a typed error OR enforces the turn budget without silently mutating the prompt; `claw --max-turns 0 -p "x"` and `claw -p "x" --max-turns 0` produce structurally equivalent outcomes (no position-sensitive divergence); `claw --help` lists `--max-turns N`; downstream JSON consumers can detect the would-have-been-silent absorption case.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 12:32 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `2a0e5de` (post-#261 fast-forward verification onto gaebal-gajae's compact-summary self-consistency-check pinpoint). Cluster delta: silent-fallback-family extension 9→10 (no new top-level cluster founded, per #253 context-budget discipline). Position-sensitive-parse-asymmetry sub-shape introduced (request-side argv-parse layer, sibling to #258's empty-value-coercion at the same layer and #260's response-envelope-strip at the response-side serialize layer). Sibling: #258 (`--allowedTools ""` empty-coercion at CLI parse boundary, position-invariant), #260 (`--compact --output-format json` envelope strip at CLI response-envelope layer, position-invariant), #98/#136 (predecessor `--compact` silent-no-op family at the dispatch layer). Together with #258 and #260, #262 brackets the CLI parse boundary across THREE structurally distinct silent-fallback shapes: empty-value-coercion (#258), response-envelope-strip (#260), and position-sensitive-prompt-absorption-of-unknown-flag (#262). Tenth-cycle concurrent-dogfood-rebase parity confirmed local==origin==fork at HEAD `2a0e5de` before filing.
|
||||
|
||||
## Pinpoint #263 — `--compact` help text says "text mode only" even though the CLI has a live compact-JSON dispatch path, creating a stale-contract trap for operators auditing JSON observability
|
||||
|
||||
Dogfooded 2026-04-26 13:02 KST on `feat/jobdori-168c-emission-routing` at HEAD `0e4fd38` (post-#262). Fresh `cargo run --quiet --bin claw -- --help` from `rust/` prints `--compact Strip tool call details; print only the final assistant text (text mode only; useful for piping)`. That help text is now false/stale: #260 already verified that `LiveCli::run_turn_with_output` dispatches `CliOutputFormat::Json if compact` into `run_prompt_compact_json`, so `--compact --output-format json` is a real live mode, not text-only. The product surface therefore gives operators the wrong contract at exactly the place they would look before testing the compact JSON envelope.
|
||||
|
||||
Concrete failure mode: an operator trying to inspect or script compact JSON observability sees help text claiming `--compact` is text-mode-only, while the runtime actually accepts compact JSON and emits a reduced JSON envelope. This can cause two bad outcomes: (1) users avoid testing/using compact JSON because the help says it should not exist; or (2) downstream claws treat compact JSON behavior as accidental/unsupported even though there is a dedicated code path. That stale help text also masked #260's more serious envelope-field strip: the documented contract never states which JSON fields compact mode preserves or drops because it incorrectly says JSON mode is out of scope.
|
||||
|
||||
Gap. This is a **help-contract drift / doc-to-runtime divergence** at the CLI surface, distinct from #260. #260 is about the runtime compact-JSON envelope silently stripping observability fields after dispatch; #263 is about the advertised CLI contract being stale before dispatch. The runtime has a feature the help denies exists. It is also distinct from #262's `--max-turns` absence: #262 is a missing flag plus position-sensitive parse asymmetry; #263 is an existing flag whose mode matrix is documented incorrectly.
|
||||
|
||||
Required fix shape: (a) update `--compact` help text to describe the actual mode matrix: text mode strips tool-call detail into final assistant text; JSON mode emits a compact JSON envelope; (b) document the compact-JSON field contract after #260 is fixed, explicitly naming preserved fields (`iterations`, `auto_compaction`, `prompt_cache_events`, `usage`, `estimated_cost`) and intentionally stripped fields (`tool_uses`, `tool_results`); (c) add a help-output regression that fails if `--compact` still says `text mode only` while `CliOutputFormat::Json if compact` remains supported; (d) optionally add `claw --help --json` / structured flag metadata later so mode compatibility can be generated from the same source as parser dispatch instead of hand-written prose. Acceptance: `claw --help` no longer claims `--compact` is text-only; compact JSON's supported status and field delta are discoverable before running a prompt; help output and dispatch matrix cannot drift silently.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 13:02 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `0e4fd38` before filing. Cluster delta: help-contract-drift +1; CLI-contract-observability gap adjacent to #260 but not a duplicate. Concrete delta this cycle: ROADMAP-only pinpoint appended after fresh help-output verification; no implementation landed.
|
||||
|
||||
## Pinpoint #264 — Turn-budget primitive is structurally absent from the runtime: `max_iterations` exists as an untyped builder knob with `usize::MAX` default, the live CLI never calls it, and exhaustion produces a bare string `RuntimeError` with no typed `TurnBudgetExhausted` event, no session-state turn counter, no warning event before the cap, and no zero-turn semantics — sister pinpoint to #262 covering the runtime-side of the same turn-budget concern
|
||||
|
||||
Dogfooded 2026-04-26 13:05 KST on `feat/jobdori-168c-emission-routing` at HEAD `d0aa18e` (post-#263 fast-forward verification onto gaebal-gajae's `--compact` help-text-vs-dispatch-mismatch pinpoint). #262 audited the CLI **parse-side** of the turn-budget gap (no `--max-turns` flag, plus position-sensitive prompt absorption when the unknown flag arrives after `-p`). #264 audits the **runtime-side**: even if the CLI flag existed and were wired through `CliAction::Prompt`, the runtime layer it would have to plumb into has no typed turn-budget primitive at all. The existing `max_iterations: usize` field at `rust/crates/runtime/src/conversation.rs:132` is a flat untyped builder knob, the `with_max_iterations` builder at line 192 takes only a raw `usize` with no validation, the field defaults to `usize::MAX` at line 181, the cap check at lines 343-352 returns a bare string-bodied `RuntimeError::new("conversation loop exceeded the maximum number of iterations")` with no typed discriminant, and `RuntimeError` itself at lines 87-93 is a single-variant struct holding only `message: String` with no `kind` enum / no `RuntimeErrorKind::TurnBudgetExhausted` variant / no machine-readable reason field. The post-completion `TurnSummary` at lines 110-118 carries `iterations: usize` as a passive odometer but has no `budget: Option<u32>` / `budget_remaining: Option<u32>` / `budget_exhausted: bool` companion fields and no in-loop event surface that warns before the cap is reached.
|
||||
|
||||
Verified concrete surface (rg across `rust/crates/`): zero `TurnBudget`, zero `TurnBudgetExhausted`, zero `TurnBudgetWarning`, zero `TurnsExhausted`, zero `TurnLimit`, zero `TurnCap`, zero `turn_budget`, zero `turn_limit`, zero `turn_cap` symbols anywhere in the workspace. Only two `with_max_iterations` callers exist: `conversation.rs:1768` (one unit test setting `1`) and `tools/lib.rs:3589` (subagent runtime with the hardcoded `DEFAULT_AGENT_MAX_ITERATIONS: usize = 32` const at `tools/lib.rs:3475`). The live primary CLI dispatch in `rusty-claude-cli/src/main.rs:7705` constructs `ConversationRuntime::new_with_features(...)` and **never** chains `.with_max_iterations(...)`, so the main interactive and non-interactive CLI always run with `usize::MAX` as the ceiling — the cap check on line 344 is dead code in the primary product surface and only triggers in the subagent dispatch and the one unit test. There is no telemetry/event/log emitted as the iteration count grows; `record_assistant_iteration` at line 593 records each iteration but emits no warning when iteration count crosses (say) 50% / 75% / 90% of the configured budget. There is no zero-turn semantic (a `max_iterations = 0` config would fail-fast on the first iteration with the same generic string error rather than returning a typed `iterations: 0, no_model_call: true` outcome useful for cost-zero parse-validation runs).
|
||||
|
||||
Gap. The runtime treats turn-count as an **untyped odometer plus a single emergency tripwire**, not as a first-class budget primitive. There is no `pub struct TurnBudget { max_turns: u32, warn_at_pct: Option<u8>, on_exhaust: ExhaustionPolicy }` shape, no `pub enum ExhaustionPolicy { Error, ReturnPartial, RequestExtension }`, no `pub enum RuntimeErrorKind { TurnBudgetExhausted { iterations: usize, max: usize }, ApiError, SessionError, HookError, … }`, no in-loop `TurnBudgetEvent::ApproachingLimit { iterations, max }` lane event, no per-session `Session::turn_counter` field that persists across turns (the iteration counter resets to 0 at the top of every `run_turn`, so a long session can run 100 turns with 32 iterations each — 3,200 model calls — without any cumulative budget tripping). The downstream effect: even if #262's CLI parse-side fix lands and a `--max-turns N` flag becomes plumbable, the runtime has no typed surface to plumb it INTO; the only available landing site is the same untyped `with_max_iterations(usize)` builder, which conflates per-turn iteration cap (the existing knob) with cumulative-session turn cap (the upstream Claude Code semantic), gives no typed exhaustion event, and silently disagrees with the subagent-only `DEFAULT_AGENT_MAX_ITERATIONS = 32` ceiling.
|
||||
|
||||
Cluster shape novelty: founds the **NEW Turn-budget-primitive cluster** with #264 as solo founder. The cluster catalogues missing typed primitives at the iteration/turn/session-budget axis: `TurnBudget` config struct, `TurnBudgetExhausted` typed exhaustion event, `TurnBudgetWarning` pre-exhaustion event, `Session::turn_counter` cumulative state, `ExhaustionPolicy` enum, and zero-turn semantics. Distinct from #262 (CLI parse-side; #262 is the request-shape gap, #264 is the type-shape gap underneath it) — together #262+#264 bracket the full turn-budget concern across the CLI parse boundary and the runtime primitive boundary. Distinct from the silent-fallback family (#258/#260/#262): the silent-fallback family is about silent input mutation and silent output stripping at boundaries; #264 is about a **missing typed primitive** layer that prevents typed errors from existing at all, regardless of whether the boundary is silent or loud. Distinct from #239 branch leases / #243 canonical ordering / #253 context-budget compaction (those are coordination/operational primitives at the cycle/branch level; #264 is a runtime-loop primitive at the model-iteration level). Distinct from #229/#238/#244 persistent-WebSocket cluster (those are bidirectional client-driven streams; #264 is a synchronous loop counter). #264 sits in the **runtime-typed-primitive layer**, parallel to #254 (MCP Resources lifecycle, also runtime-protocol layer) — both pinpoints catalogue missing typed primitives the runtime should expose to higher layers.
|
||||
|
||||
Discovery-pattern continuation: #264 is the first member of a **complementary-pinpoint-pair-bundle at the same turn-budget concern across two structural layers**, sister-shaped to #245+#250 (client/server complementary pair at the WebSearch concern). The pair #262 (CLI-parse layer) + #264 (runtime-primitive layer) catalogues both halves of the same operator capability gap and demonstrates that audit-completeness for a single user-facing flag often requires pinpointing TWO distinct internal layers rather than a single dispatch site. Does NOT extend the silent-fallback family (10 members at the close of #262); founds a fresh top-level cluster instead because the missing primitive is the **prerequisite layer** silent-fallback siblings would land typed errors INTO.
|
||||
|
||||
Required fix shape: (a) introduce `pub struct TurnBudget { pub max_turns: u32, pub max_iterations_per_turn: Option<u32>, pub warn_at_pct: Option<u8>, pub on_exhaust: ExhaustionPolicy }` with `Default::default()` returning unbounded; (b) introduce `pub enum ExhaustionPolicy { Error, ReturnPartial }` defaulting to `Error`; (c) replace `RuntimeError { message: String }` with `RuntimeError { kind: RuntimeErrorKind, message: String }` where `RuntimeErrorKind` is a typed enum including `TurnBudgetExhausted { iterations: usize, max: usize }`, `IterationBudgetExhausted { iterations: usize, max: usize }`, `ApiError`, `SessionError`, `HookError`, `HealthProbeFailed`, `Other`; (d) replace `with_max_iterations(usize)` with `with_turn_budget(TurnBudget)` (keep old builder as `#[deprecated]` alias to preserve subagent compatibility), wire the new builder through `ConversationRuntime` and `BuiltRuntime`; (e) add `Session::turn_counter: u64` persisted across turns and increment in `run_turn` before iteration loop; (f) add `pub enum RuntimeEvent { … TurnBudgetWarning { iterations, max, pct }, TurnBudgetExhausted { iterations, max }, IterationBudgetWarning { … } }` lane events emitted from inside the iteration loop at `warn_at_pct` and at exhaustion (so `--output-format json`/`stream-json` consumers can observe the budget approach before the typed error fires); (g) define zero-turn semantics: `TurnBudget { max_turns: 0, … }` returns `Ok(TurnSummary { iterations: 0, assistant_messages: vec![], … })` immediately without an API call, useful for parse-validation/cost-zero runs and matching the upstream Claude Code zero-turn contract that #262's CLI flag would expose; (h) wire `LiveCli::run_turn_with_output` (`main.rs:7705`) to pass a `TurnBudget` derived from the new `--max-turns` flag (#262 fix) plus a default sensible ceiling for the primary CLI surface; (i) add tests for (1) iteration cap typed error, (2) cumulative turn cap typed error across multiple `run_turn` calls on the same runtime, (3) warn-at-pct event firing exactly once per turn, (4) zero-turn fast-return, (5) `RuntimeErrorKind::TurnBudgetExhausted` round-trips through `--output-format json` `error.kind` field instead of being string-only.
|
||||
|
||||
Acceptance: a downstream caller can pattern-match on `RuntimeErrorKind::TurnBudgetExhausted { iterations, max }` instead of substring-matching on a generic string; `--output-format json` emits `{ "error": { "kind": "turn_budget_exhausted", "iterations": 33, "max": 32 }, … }` instead of a bare error message; `TurnBudget { max_turns: 0 }` returns immediately with `iterations: 0`; a 90%-of-budget warning event fires before exhaustion; the subagent runtime keeps its `DEFAULT_AGENT_MAX_ITERATIONS = 32` semantic via the new `TurnBudget` builder; the primary CLI runtime gains a default budget instead of `usize::MAX`; #262's CLI flag fix has a typed runtime surface to land on.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 13:05 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `d0aa18e` (post-#263 fast-forward verification onto gaebal-gajae's `--compact` help-text-vs-dispatch-mismatch pinpoint). Cluster delta: Turn-budget-primitive cluster 0→1 (founder, NEW SOLO CLUSTER); complementary-pinpoint-pair-bundle discovery-pattern extended (sister to #245+#250 WebSearch client/server pair, now #262+#264 turn-budget CLI-parse/runtime-primitive pair). Smaller-scope by design (matches #253/#254/#257/#258/#260/#261/#262/#263 context-budget discipline). Sister: #262 (CLI parse-side; #262+#264 bracket the full turn-budget concern across two structural layers). Distinct from silent-fallback family (#258/#260/#262 catalogue silent-mutation at boundaries; #264 catalogues a missing typed primitive layer that those boundaries would land typed errors INTO). Distinct from #254 MCP Resources lifecycle (also runtime-protocol layer but resource-handle axis, not iteration/turn axis). Eleventh-cycle concurrent-dogfood-rebase parity will be confirmed local==origin==fork at HEAD `d0aa18e+#264` after push.
|
||||
|
||||
|
||||
## Pinpoint #265 — Non-interactive output has no `stream-json` event lane even though the provider path already streams internally, forcing automation to choose between human text streaming and one-shot summary JSON
|
||||
|
||||
Dogfooded 2026-04-26 13:30 KST on `feat/jobdori-168c-emission-routing` at HEAD `d5568eb` (post-#264). Fresh CLI verification shows `claw --help` advertises only `--output-format text|json`, and `cargo run --quiet --bin claw -- --output-format stream-json -p "noop"` fails immediately with `[error-kind: cli_parse] error: unsupported value for --output-format: stream-json (expected text or json)`. Source inspection confirms the mode boundary is structural: `CliOutputFormat` has only `Text` and `Json` (`rust/crates/rusty-claude-cli/src/main.rs:793-805`), `run_turn_with_output` only dispatches `Text`, `Json`, and compact variants (`main.rs:4638-4648`), while the actual Anthropic client path always uses streaming internally (`MessageRequest { stream: true }` at `main.rs:7928`) and converts provider stream chunks into `AssistantEvent` values in `consume_stream` (`main.rs:7966-8095`). Those events are accumulated and returned, not exposed as a line-delimited machine stream.
|
||||
|
||||
Concrete failure mode: automation and downstream claws cannot observe turn progress as typed JSON events while a prompt is running. In `text` mode the operator sees live human-rendered Markdown/tool output, but parsers have to scrape terminal prose. In `json` mode the consumer receives one final envelope only after the turn completes, so long-running tool loops, post-tool stalls, prompt-cache events, tool starts/results, auto-compaction, and future #264 budget warnings/exhaustion cannot be routed until the end (or at all, depending on envelope fields). This is exactly the surface that would need to carry #264's `TurnBudgetWarning` before exhaustion and #260's compact-envelope observability fields during execution; without a stream-json lane, those typed runtime events have nowhere deterministic to go.
|
||||
|
||||
Gap. This is an **event/log opacity gap at the CLI output layer**, distinct from #260 and #263. #260 is about fields silently missing from the final compact JSON envelope; #263 is stale help text for an existing compact-JSON path; #265 is the absence of a machine-readable streaming output mode despite the provider stream and internal `AssistantEvent` pipeline already existing. It is also distinct from #264: #264 defines the missing turn-budget primitive/events; #265 identifies the CLI event lane those events need to surface through in non-interactive automation.
|
||||
|
||||
Required fix shape: (a) add `CliOutputFormat::StreamJson` parsed from `--output-format stream-json` and documented in help; (b) add a `run_prompt_stream_json` dispatch path that emits JSON Lines with stable event names (`message_start`, `text_delta`, `tool_use`, `tool_result`, `usage_delta`, `prompt_cache`, `auto_compaction`, `turn_budget_warning`, `turn_budget_exhausted`, `message_stop`, `error`, `final_summary`); (c) ensure human Markdown rendering is disabled or explicitly separated when `stream-json` is active so stdout remains valid JSONL; (d) include stable sequence numbers and timestamps so consumers can reconstruct order without scraping; (e) add tests that `--output-format stream-json` is accepted, stdout is JSONL-only, tool-use and final-summary events both appear, and runtime errors are emitted as typed `error` events before non-zero exit. Acceptance: a downstream claw can run `claw --output-format stream-json -p "..."` and react to tool/budget/compaction/error events before the final assistant message, with no terminal-prose scraping.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 13:30 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `d5568eb` before filing. Cluster delta: CLI-event-stream-observability +1; prerequisite output lane for #260/#264 follow-up implementation. Concrete delta this cycle: ROADMAP-only pinpoint appended after live help/parse/source verification.
|
||||
|
||||
|
||||
## Pinpoint #266 — `RuntimeErrorKind` typed-error-kind enum is structurally absent: `RuntimeError` is a single-field `{ message: String }` newtype with zero typed discriminants, and the CLI compensates by reverse-engineering the discriminant downstream via a 22-branch substring-matching `classify_error_kind(message: &str) -> &'static str` function — the runtime throws away typed information at construction, then the CLI scrapes it back via prose-pattern-matching
|
||||
|
||||
Dogfooded 2026-04-26 13:35 KST on `feat/jobdori-168c-emission-routing` at HEAD `8975354` (post-rebase fast-forward onto gaebal-gajae's #265 `--output-format stream-json` lane-absent pinpoint). #264 audited the **Turn-budget primitive** runtime layer and noted in passing that `RuntimeError` lacks a `kind: RuntimeErrorKind` field and named `TurnBudgetExhausted` as a future variant alongside `ApiError`/`SessionError`/`HookError`. #266 is the **dedicated structural audit of that typed-error-taxonomy gap itself** — sister pinpoint to #264, founding the **Typed-error-kind-enumeration cluster** with #266 as solo founder.
|
||||
|
||||
Verified concrete surface (all paths absolute from `rust/crates/`): `RuntimeError` is defined at `runtime/src/conversation.rs:87-93` as `pub struct RuntimeError { message: String }` with a single `RuntimeError::new(impl Into<String>)` constructor at lines 91-97 and `Display`/`std::error::Error` impls at lines 100-106. There is **zero** `RuntimeErrorKind` enum, **zero** `RuntimeError::kind()` accessor, **zero** `kind: RuntimeErrorKind` field, **zero** typed discriminant, **zero** machine-readable reason, and **zero** structured payload (`iterations`, `max`, `path`, `operation`, `retryable`, etc.). `rg "pub enum RuntimeError\|RuntimeErrorKind" rust/crates/` returns no matches anywhere in the workspace. The sibling type `ToolError` at `conversation.rs:64-83` shares the same single-field `{ message: String }` shape — the typed-error gap is symmetric across both runtime error types.
|
||||
|
||||
Construction-site count: `rg 'RuntimeError::new' rust/crates/` returns **20 call sites** total (12 inside `runtime/src/conversation.rs`, 8 inside `rusty-claude-cli/src/main.rs`). Every single one passes a free-form `String` or `format!(...)` expression — no construction site emits a typed discriminant. Representative examples: `conversation.rs:324` (`format!("conversation loop exceeded the maximum number of iterations")`), `conversation.rs:740` (`"assistant stream produced no content"`), `main.rs:7976`/`7997`/`8007`/`8125` (`format_user_visible_api_error(...)` — API failures collapsed into prose), `main.rs:8000` (`"post-tool continuation nudge exhausted"`), `main.rs:8951`/`8968` (filesystem operations: `error.to_string()`).
|
||||
|
||||
Downstream counter-evidence — `classify_error_kind(message: &str) -> &'static str` at `rusty-claude-cli/src/main.rs:270-348` (78 lines, **22 substring-match branches**, called from `main.rs:215` (panic-handler tagging), `:243`/`:245`/`:249` (top-level error printer for both `text` and `json` modes), and `:2982` (mid-run error envelope). Branch enumeration: `missing_credentials`, `filesystem_io_error`, `missing_manifests`, `missing_worker_state`, `session_not_found`, `session_load_failed`, `no_managed_sessions`, `cli_parse` (×7 distinct substring patterns: `unrecognized argument`, `unknown option`, `prompt subcommand requires`, starts-with `empty prompt:`, `unsupported value for --`, `missing value for --`, `unsupported permission mode`, `invalid value for --`, `model string cannot be empty`, `unexpected extra arguments after \\`claw`), `slash_command_requires_repl`, `invalid_model_syntax`, `unsupported_command`, `unsupported_resumed_command`, `confirmation_required`, `api_http_error` (substring-OR over `api failed` / `api returned`), and a fall-through to `unknown`. Each branch carries inline comments referencing the historical pinpoint that motivated it (`// #169`, `// #170`, `// #171`, `// #247`, `// #130b`) — proving the function has accreted patterns one-pinpoint-at-a-time as new error prose shapes leaked in.
|
||||
|
||||
Downstream consumption: `--output-format json` envelope at `main.rs:215`/`:245`/`:249` emits `{ "type": "error", "error": "<bare prose>", "kind": "<classify_error_kind result>" }` where the `kind` field is recovered AT THE CLI BOUNDARY via the substring scrape, **not propagated from a typed runtime field**. The runtime never had the discriminant; the CLI invents it back. ROADMAP §4.44 (lines 758-785) and ROADMAP #130 (lines 4978-5122) both explicitly note this gap as a typed-error contract debt — #130's New evidence section at line 5120 calls out exactly this: "the typed-error contract is thus twice-broken on this path: (a) the io::ErrorKind information is discarded at the `?` in `run_export()`, AND (b) the flat `io::Error::Display` string is then fed to a classifier that has no patterns for filesystem errno strings." Neither §4.44 nor #130 audited the **`RuntimeErrorKind` enum itself** as the structurally-absent primitive; both treated the gap as classifier-pattern-missing rather than as **typed-discriminant-missing-at-the-source**.
|
||||
|
||||
Gap. The runtime treats the error class as a **stringly-typed value** rather than a typed enum. The classifier function is a **lossy reverse-decompiler** of information that should have been carried as a typed field from `RuntimeError` construction through the CLI emit. Two structural failures: (1) **forward-direction loss** — every `RuntimeError::new(...)` call site already knows the kind at the source (e.g., `conversation.rs:324` knows it's iteration-exhaustion, `main.rs:8000` knows it's post-tool-nudge-exhaustion, `main.rs:7976` knows it's API failure) but throws that knowledge away by collapsing into a `String`; (2) **reverse-direction fragility** — the CLI substring-scrape can mis-classify any error whose prose accidentally matches a pattern from a different error class (e.g., a legitimate API error containing the literal text `"unknown option"` would be misclassified as `cli_parse`), and silently degrades to `"unknown"` for any error class that has not yet been patched into the classifier. The 22-branch accretion is itself counter-evidence: every new error class needs both a `RuntimeError::new("<unique prose>")` site AND a corresponding `classify_error_kind` substring branch, with no compiler enforcement that the two stay in sync. New error classes that ship without a classifier branch silently fall through to `"unknown"`.
|
||||
|
||||
Cluster shape novelty: founds the **NEW Typed-error-kind-enumeration cluster** with #266 as solo founder. The cluster catalogues missing typed discriminants at the runtime-error-taxonomy axis: `RuntimeErrorKind` enum, `ToolErrorKind` enum, per-variant typed payloads (`TurnBudgetExhausted { iterations, max }`, `IterationBudgetExhausted { iterations, max }`, `ApiError { status, retryable }`, `SessionError { kind, path }`, `HookError { hook_name, exit_code }`, `FilesystemError { path, operation, errno }`, `ParseError { argv_index, raw }`), and the structural removal of `classify_error_kind` as a CLI-side reverse-decompiler in favor of typed field propagation. Sister to #264 (Turn-budget primitive cluster); the two pinpoints form a **second complementary-pinpoint-pair-bundle** following the #245+#250 (WebSearch client/server) and #262+#264 (turn-budget CLI-parse/runtime-primitive) pattern. #264 catalogues a single missing typed-event primitive; #266 catalogues the missing typed-discriminant-axis that ALL runtime errors (turn-budget, API, session, hook, filesystem, etc.) need in order to express themselves typedly. #266 is the **prerequisite layer** #264's `TurnBudgetExhausted` variant would land into.
|
||||
|
||||
Distinct from #260/#263/#265 (output-mode/help-text-contract gaps at the CLI output layer). Distinct from §4.44 (which proposed a typed envelope at the JSON boundary but did not audit the absent runtime enum at the source). Distinct from #130/#130b (which catalogued context-loss at filesystem `?` propagation and classifier-pattern-missing at the CLI, but did not catalogue the absent enum). Distinct from the silent-fallback family (#207/#208/#222/#231/#236/#246/#249/#258/#260/#262 — silent input/output mutation at boundaries; #266 is about the **type system itself missing a discriminant axis**).
|
||||
|
||||
Discovery-pattern continuation: this is the **third complementary-pinpoint-pair-bundle** in the dogfood corpus (#245+#250, #262+#264, now #264+#266), and the **second consecutive cycle pair-bundle** (#264 filed 13:05 KST, #266 filed 13:35 KST same cycle-day) — confirming complementary-pair-bundles as a stable discovery-pattern that systematically expands when a single-layer pinpoint is filed. Pair-bundle ratio: 3 of 67 pinpoints in the #200-range (≈4.5%) are bundled — small but consistent. Founds NEW cluster (Typed-error-kind-enumeration) rather than extending silent-fallback (which closes at 11 with #265).
|
||||
|
||||
Required fix shape: (a) introduce `pub enum RuntimeErrorKind { TurnBudgetExhausted { iterations: u32, max: u32 }, IterationBudgetExhausted { iterations: u32, max: u32 }, ApiError { status: Option<u16>, retryable: bool }, SessionError { kind: SessionErrorKind, path: Option<PathBuf> }, HookError { hook_name: String, exit_code: Option<i32> }, FilesystemError { path: PathBuf, operation: FilesystemOp, errno: Option<i32> }, ParseError { argv_index: Option<usize>, raw: Option<String> }, ToolStreamExhausted, EmptyAssistantStream, PostToolNudgeExhausted, ConfirmationRequired, Other }` at `runtime/src/conversation.rs`; (b) replace `RuntimeError { message: String }` with `RuntimeError { kind: RuntimeErrorKind, message: String }` adding `RuntimeError::kind(&self) -> &RuntimeErrorKind` accessor; (c) add typed constructors `RuntimeError::turn_budget_exhausted(iterations, max)`, `::api(status, retryable, message)`, `::session(kind, path, message)`, etc., and keep `RuntimeError::new(message)` as a `#[deprecated]` alias that constructs `kind: RuntimeErrorKind::Other` so existing call sites compile while the migration proceeds; (d) audit all 20 `RuntimeError::new` call sites and migrate each to a typed constructor — `conversation.rs:324` → `turn_budget_exhausted(...)`, `conversation.rs:740` → `tool_stream_exhausted()`, `conversation.rs:745` → `empty_assistant_stream()`, `main.rs:7976`/`:7997`/`:8007`/`:8125` → `api(...)`, `main.rs:8000` → `post_tool_nudge_exhausted()`, etc.; (e) extend `--output-format json` envelope to emit `error.kind` from `RuntimeError::kind()` with stable serde-renamed snake_case discriminant strings (`turn_budget_exhausted`, `api_error`, `session_error`, etc.), and emit per-variant typed fields (`error.iterations`, `error.max`, `error.path`, `error.retryable`, etc.); (f) replace `classify_error_kind(message: &str)` with `classify_error_kind(error: &RuntimeError) -> &'static str { error.kind().as_str() }` — the function survives as a serde-rename helper but no longer substring-scrapes prose; (g) add `RuntimeErrorKind::as_str()` and `FromStr` round-trip and golden-fixture tests for every variant proving the JSON envelope round-trips through `error.kind`; (h) deprecate the substring branches in `classify_error_kind` over a one-cycle window to give external consumers time to migrate from prose-scraping to typed-field-reading.
|
||||
|
||||
Acceptance: a downstream caller can pattern-match on `error.kind()` returning `RuntimeErrorKind::TurnBudgetExhausted { iterations, max }` instead of substring-matching `message.contains("conversation loop exceeded")`; `--output-format json` emits `{ "error": { "kind": "turn_budget_exhausted", "iterations": 33, "max": 32, "message": "…" }, … }` with typed payload fields per variant; the 22-branch substring classifier shrinks to a single `error.kind().as_str()` call; new error classes added to the runtime are compile-time-visible at every emit point because the enum requires exhaustive matching; a new error variant added without a classifier branch becomes a compiler error rather than silently degrading to `"unknown"`; #264's `TurnBudgetExhausted` variant has a typed home; #130/#130b's filesystem context-loss has a `RuntimeErrorKind::FilesystemError { path, operation, errno }` typed home rather than a classifier substring branch.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 13:35 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `8975354` before filing (post-rebase fast-forward onto gaebal-gajae's #265 `--output-format stream-json` lane-absent pinpoint). Cluster delta: Typed-error-kind-enumeration cluster 0→1 (founder, NEW SOLO CLUSTER); complementary-pinpoint-pair-bundle discovery-pattern extended to 3 bundles total (#245+#250 WebSearch, #262+#264 turn-budget, #264+#266 turn-budget-runtime/typed-error-axis). Smaller-scope by design (matches #253/#254/#257/#258/#260/#261/#262/#263/#264/#265 context-budget discipline). Sister: #264 (Turn-budget primitive runtime-side; #264 names `RuntimeErrorKind` as a future variant; #266 is the dedicated structural audit of that absent enum itself). Distinct from §4.44 typed-error contract proposal (which targets the JSON envelope boundary; #266 targets the runtime enum that the envelope would serialize FROM). Distinct from #130/#130b classifier-pattern-missing (which is downstream of the absent enum; #266 catalogues the upstream root cause). Concurrent-dogfood-rebase parity will be confirmed local==origin==fork at HEAD `8975354+#266` after push.
|
||||
|
||||
## Pinpoint #267 — `prompt TEXT` subcommand has the same post-prompt greedy-slurp control-token absorption shape as `-p`, so flags after `prompt` become model input instead of parse errors
|
||||
|
||||
Dogfooded 2026-04-26 14:02 KST on `feat/jobdori-168c-emission-routing` at HEAD `fae9fd9` (post-#266). While probing the help-advertised `claw [--model MODEL] [--output-format text|json] prompt TEXT` path, source inspection showed the `prompt` subcommand arm does `let prompt = rest[1..].join(" ")` at `rust/crates/rusty-claude-cli/src/main.rs:1239`, then immediately returns `CliAction::Prompt` without validating whether any later token is a flag-looking control token. This mirrors #262's `-p` greedy slurp (`args[index+1..].join(" ")`) but on the documented `prompt TEXT` subcommand surface rather than the short `-p` compat alias.
|
||||
|
||||
Concrete failure mode: `claw prompt "say hi" --max-turns 0`, `claw prompt "say hi" --output-format json`, or `claw prompt "say hi" --definitely-unknown` are structurally parsed as prompt text (`"say hi --max-turns 0"`, etc.) rather than as either (a) recognized flags that continue parsing, or (b) typed `cli_parse` errors for unsupported trailing flags. In contrast, the same unknown flag before the first positional token is rejected by the global `other if rest.is_empty() && other.starts_with('-')` arm. The help text advertises `prompt TEXT` as the safe explicit non-interactive form, but the explicit form still makes the boundary after `TEXT` invisible: machine-control tokens after the prompt become model input.
|
||||
|
||||
Gap. This is distinct from #262 but sibling-shaped. #262 filed the missing `--max-turns` flag plus position-sensitive absorption after `-p` and bare prompt forms. #267 covers the long-form documented `prompt TEXT` subcommand. The `prompt` arm is a separate parser site (`rest[1..].join(" ")`) with separate acceptance criteria and deserves its own regression because a fix that only rewrites the `-p` arm would leave the documented subcommand silently absorbing flags. This extends the position-sensitive-parse-asymmetry sub-shape from short-option prompt mode to the explicit `prompt` subcommand surface.
|
||||
|
||||
Required fix shape: (a) define a delimiter contract for `prompt TEXT`: either require all flags before `prompt` and reject any `rest[1..]` token that starts with `-` unless escaped via `--`, or parse `prompt` as consuming exactly one TEXT argv and then resume global flag parsing; (b) support `--` as an explicit literal-prompt delimiter so users can intentionally include flag-looking text (`claw prompt -- "explain --max-turns"`); (c) emit a typed `CliFlagWarning`/`cli_parse` JSON error when a flag-looking token appears after the prompt without `--`; (d) add parser tests for `prompt x --max-turns 0`, `prompt x --output-format json`, `prompt x --definitely-unknown`, `prompt -- "x --max-turns 0"`, and `--output-format json prompt x`. Acceptance: the documented `prompt TEXT` path no longer silently mutates trailing control tokens into model input; fixes for #262 cannot pass while leaving this long-form parser site greedy.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 14:02 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `fae9fd9` before filing. Cluster delta: position-sensitive-parse-asymmetry sub-shape +1 documented-subcommand member; sibling to #262, not duplicate. Concrete delta this cycle: ROADMAP-only pinpoint appended after source verification of the `prompt` arm.
|
||||
|
||||
## Pinpoint #268 — MCP `tools/list` is never re-fetched on session resume: the runtime trusts the static `.claw.json` server list at `/mcp` time and the cached/qualified-name tool catalog at runtime build time, with zero staleness detection or live refresh path between server-restart events that change the tool catalog and the next `claw --resume` invocation
|
||||
|
||||
Dogfooded 2026-04-26 14:08 KST on `feat/jobdori-168c-emission-routing` at HEAD `d90b5f0` (post-rebase fast-forward onto gaebal-gajae's #267 `prompt TEXT` greedy-slurp pinpoint). #254 audited the **MCP Resources lifecycle** absence (`subscribe`/`list_changed`/`updated` for resources). #268 is the **sister pinpoint on the tool axis**: even the existing one-shot `tools/list` discovery is structurally bound to runtime startup and is never re-fetched on resume, so a session that restarts an MCP server (adding/removing/renaming tools) and then runs `claw --resume <session>` proceeds against the previous boot's stale tool catalog with no staleness signal. Founds the **NEW Session-resume-tool-catalog-staleness cluster** with #268 as solo founder, complementary to #254's resource-axis lifecycle gap.
|
||||
|
||||
Verified concrete surface (all paths absolute from `rust/crates/`): the resume entrypoint at `rusty-claude-cli/src/main.rs:2974` (`fn resume_session(session_path, commands, output_format)`) loads the persisted `Session` via `current_session_store().load_session(reference)` at `:5620-5634` (`fn load_session_reference`) and dispatches each `/<cmd>` through `run_resume_command(session_path, &session, &command)` at `:3467`. **The resume path never calls `build_runtime_mcp_state` and never instantiates `RuntimeMcpState::new` and never calls `manager.discover_tools_best_effort()`** — `rg "build_runtime_mcp_state|RuntimeMcpState::new" rust/crates/rusty-claude-cli/src/main.rs` returns the only two construction sites at `:7267` (`build_runtime_plugin_state_with_loader`) and `:4311` (the impl), and neither is reachable from `resume_session`. The resume-mode `/mcp` slash arm at `main.rs:3596-3613` calls `commands::handle_mcp_slash_command(args, &cwd)` which at `commands/src/lib.rs:2341-2347` calls `loader.load()` and then `render_mcp_summary_report(cwd, runtime_config.mcp().servers())` — i.e., it dumps the **configured-server list from `.claw.json`** without spawning any MCP process or issuing any `tools/list` request. The function never touches `McpServerManager`, never spawns stdio, never sends an `initialize` handshake, never sends `tools/list`. There is **zero** `tool_catalog`/`tool_snapshot`/`cached_tools`/`tool_list_at`/`tool_revision`/`tool_etag` field on `Session` at `runtime/src/session.rs:91-105` (the persisted struct fields are: `version`, `session_id`, `created_at_ms`, `updated_at_ms`, `messages`, `compaction`, `fork`, `workspace_root`, `prompt_history`, `last_health_check_ms`, `model`, `persistence`). The startup-time discovery report at `main.rs:4119-4205` (`impl RuntimeMcpState { fn new(...) }`) calls `runtime.block_on(manager.discover_tools_best_effort())` ONCE and stores the result in `RuntimeMcpState { runtime, manager, pending_servers, degraded_report }` — the in-memory snapshot is held for the lifetime of the process and never re-issued. There is **zero** `refresh_tools` / `reload_tools` / `refetch_catalog` / `recheck_servers` method on `RuntimeMcpState` or `McpServerManager`; `rg "refresh_tool|reload_tool|refetch|recheck_server" rust/crates/runtime/src/` returns no matches.
|
||||
|
||||
Downstream symptom matrix: (1) **Server tools changed between sessions** — user adds/removes a tool on an MCP server (e.g., `git`/`gh`/local-tooling MCP servers commonly add tools across versions). On `claw --resume <session>` the resumed `/mcp` view shows the old configured-server list with no tool count and no live `tools/list` cross-check; on continued prompts the tool registry built at runtime startup contains either the now-stale tool set OR the freshly-discovered set with no audit trail of which prompts ran against which catalog. (2) **MCP server replaced with a different binary at the same `command:` path** — the new binary advertises a different tool set; the resumed session has zero detection path. (3) **MCP server now-unavailable** — a server that was reachable at session-start but is offline at resume: there is no liveness probe in resume mode, only a configured-list dump, so `/mcp` reports the server as configured without flagging it as unreachable until a `tools/call` fails downstream. (4) **Tool descriptor drift** (description, JSON-schema input shape, qualified name): a tool that kept its name but changed its `input_schema` between server versions: the runtime tool registry built at the FIRST session start at `main.rs:4129-4133` (`mcp_runtime_tool_definition`) snapshots `tool.tool.input_schema.clone()` once; subsequent re-builds at the NEXT session start would pick up the new schema, but mid-session the agent is reasoning over the boot-time schema with no `version`/`etag`/`schema_revision` field on `ManagedMcpTool` to detect drift.
|
||||
|
||||
Gap. Three structural absences on the same axis: (a) **resume-mode tool-list refresh** — `run_resume_command` never instantiates `RuntimeMcpState`, so resumed sessions cannot even attempt a fresh `tools/list`; the resumed `/mcp` slash command at `main.rs:3596-3613` dispatches to a config-only renderer rather than a live-MCP renderer. (b) **mid-session tool-list refresh** — even the long-running session at first start instantiates `RuntimeMcpState` exactly once at `main.rs:7267` (`build_runtime_plugin_state_with_loader`) and never re-calls `discover_tools_best_effort()` afterwards; if the agent is alive when an MCP server's `tools/list_changed` notification fires (per the MCP spec's `notifications/tools/list_changed`), there is no notification-dispatch path on the JSON-RPC reader (the same notification-dispatch absence #254 catalogues for `notifications/resources/list_changed`). (c) **persisted tool-catalog snapshot** — the `Session` JSONL file at `.claw/sessions/<fingerprint>/<id>.jsonl` does not record which tool catalog was active when each turn ran, so post-hoc audit cannot tell which catalog version the assistant assumed. The composite gap means an MCP server that legitimately advertises `notifications/tools/list_changed` per the MCP 2025-03-26 spec is silently treated as having a frozen tool catalog from process boot.
|
||||
|
||||
Cluster shape novelty. Founds the **NEW Session-resume-tool-catalog-staleness cluster** with #268 as solo founder, distinct from #254's resource-axis lifecycle absence (which targets `resources/subscribe`+`resources/list_changed`+`resources/updated`+`ResourceRegistry` on the data-handle axis). #268 targets the **tool-handle axis**: `tools/list_changed` notification handler, `RuntimeMcpState::refresh_tool_catalog`, resume-mode `RuntimeMcpState` re-instantiation, persisted `tool_catalog_revision` on `Session`. #254 and #268 form the **fourth complementary-pinpoint-pair-bundle** in the dogfood corpus (after #245+#250 WebSearch, #262+#264 turn-budget, #264+#266 typed-error-axis), tracking the **two missing axes of MCP capability lifecycle** — resources and tools — that the spec advertises as live-subscribable but the runtime treats as one-shot-snapshot.
|
||||
|
||||
Distinct from #207/#208/#222/#231/#236/#246/#249/#258/#260/#262/#265 silent-fallback-input-mutation (those are CLI-layer prompt/output silent mutation; #268 is missing-refresh-of-discovery-data at the protocol-runtime layer). Distinct from #254 (resources axis vs tools axis). Distinct from #266 (typed-error enum vs missing-refresh primitive — orthogonal axes). Distinct from #259 session-state schema gaps (which catalogue what's in the JSONL; #268 catalogues what `Session` should have but does not — the tool-catalog-revision field). Distinct from #229/#238/#244 persistent-WebSocket-stream cluster (those are bidirectional client-driven streams; #268 is server-pushed-notification-handler absence on stdio JSON-RPC, structurally identical to #254's gap but on the tool axis).
|
||||
|
||||
Discovery-pattern continuation: this is the **fourth complementary-pinpoint-pair-bundle** (#245+#250, #262+#264, #264+#266, now #254+#268). #254 was filed earlier this dogfood-day at 11:02 KST; #268 closes the resources↔tools axis-pair as both being structurally one-shot. Pair-bundle ratio: 4 of 68 pinpoints in the #200-range (≈5.9%) bundled — confirms complementary-pair-bundles as a **stable discovery-pattern that systematically expands when an axis gap is filed and an orthogonal sister axis exists**. #268 also extends the **PURE-CLAWABILITY-FRICTION-FROM-DOGFOODING** discovery-pattern (#254's founding pattern) — the agent's own MCP runtime treats catalog discovery as boot-once rather than spec-compliant subscribe/refresh, so the agent silently reasons over a boot-time tool view that diverges from server reality across process lifetimes.
|
||||
|
||||
Required fix shape: (a) add `notifications/tools/list_changed` notification handler on the JSON-RPC stdio reader (parallel to #254's resources handler) routing to a per-server channel; (b) add `pub enum ToolCatalogLifecycleEvent { ToolListChanged | ToolAdded(McpTool) | ToolRemoved { qualified_name: String } | ToolSchemaChanged { qualified_name: String, old_schema: JsonValue, new_schema: JsonValue } }` typed event surfaced through `LaneEvents`; (c) add `RuntimeMcpState::refresh_tool_catalog(&mut self) -> Result<McpToolDiscoveryReport, ...>` that re-runs `manager.discover_tools_best_effort()` and diffs against the previous snapshot, emitting `ToolCatalogLifecycleEvent`s for the delta; (d) instrument the resume entrypoint at `main.rs:2974` (`resume_session`) to instantiate `RuntimeMcpState` (or a lightweight liveness-only variant) when the session has any MCP server configured, refresh the catalog, and surface the diff in the resume-mode `/mcp` output rather than dumping the static config; (e) add `revision: u64` and optional `etag: Option<String>` to `ManagedMcpTool`/`McpTool` so persisted session JSONL turns can record `tool_catalog_revision` per turn; (f) extend `Session` with `pub last_tool_catalog_revision: Option<u64>` (and bump `SESSION_VERSION` from `1` to `2` per #259); (g) advertise `tools.listChanged = true` in the initialize handshake at `mcp_stdio.rs:1400` when the runtime supports it; (h) expose `/mcp tools refresh` slash command and `claw mcp tools refresh` CLI subcommand; (i) emit a typed `mcp_tool_catalog_stale` warning to `--output-format json` when resume detects the catalog has diverged from the snapshot embedded in the last session turn. Acceptance: an MCP server that adds a tool between session-end and `claw --resume <session>` causes the resumed `/mcp` output to show `+1 tool added: srv__new_tool` rather than the static configured-server list with no live cross-check; an MCP server that emits `notifications/tools/list_changed` mid-session causes a `ToolListChanged` lane event and refreshes the tool registry rather than being silently dropped; the persisted session JSONL records `tool_catalog_revision` per turn so post-hoc audit can identify which catalog snapshot the assistant reasoned over.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 14:08 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `d90b5f0` before filing (post-rebase fast-forward onto gaebal-gajae's #267 `prompt TEXT` greedy-slurp pinpoint). Cluster delta: Session-resume-tool-catalog-staleness cluster 0→1 (founder, NEW SOLO CLUSTER); complementary-pinpoint-pair-bundle discovery-pattern extended to 4 bundles total (#245+#250 WebSearch, #262+#264 turn-budget, #264+#266 typed-error-axis, #254+#268 MCP-resources/tools-lifecycle-axis-pair). Sister: #254 (MCP Resources lifecycle on the data-handle axis; #268 is the tool-handle axis sister). Smaller-scope by design (matches #253/#254/#257/#258/#260/#261/#262/#263/#264/#265/#266/#267 context-budget discipline). Distinct from #266 (typed-error enum vs missing-refresh primitive — orthogonal axes). Distinct from #259 session-state schema gaps (#259 catalogues what's in JSONL; #268 catalogues what `Session` should have but does not — the `last_tool_catalog_revision` field). Concurrent-dogfood-rebase parity will be confirmed local==origin==fork at HEAD `d90b5f0+#268` after push.
|
||||
|
||||
## Pinpoint #269 — Dogfood status transport lacks channel-aware payload budgeting and delivery receipts: long compact reports can truncate mid-stanza or fail as `Message failed` while the cycle still treats the report as posted
|
||||
|
||||
Dogfooded 2026-04-26 14:30 KST from the live `#clawcode-building-in-public` dogfood loop immediately after #268. The status reporter attempted to publish a growing same-day summary; the visible Discord output truncated mid-sentence at #263 (`**#263** — \`--compact\` help text claims text…`), then a later helper message self-reported `Truncated mid-sentence at #263`, and the cron emitted `Cron job "clawcode-dogfood-cycle-reminder" failed: ⚠️ ✉️ Message failed` followed by another timeout. The loop still printed meta-prose saying the Discord report was posted, even though the transport evidence showed partial delivery/failure.
|
||||
|
||||
Concrete failure mode: the dogfood status path can exceed a channel/provider payload limit and either (a) deliver only a prefix, cutting a pinpoint stanza in half, or (b) fail after attempting delivery, while the cycle's own state/reporting does not carry a typed `delivery_status` / `delivered_bytes` / `truncated_at` receipt. Operators then see conflicting truth: a report says "posted", the channel contains a partial report, and cron says `Message failed` or times out. This is distinct from #253 (state-vector context-budget discipline) and #261 (derived count/range self-consistency): those validate what the summary *says*; #269 validates whether the rendered payload fits the target channel and whether delivery actually succeeded.
|
||||
|
||||
Gap. There is no channel-aware pre-send budget gate for dogfood status payloads, no stanza-safe chunker, no checksum/part numbering, and no authoritative delivery receipt bound to the cycle id. A compact summary can be internally fresh (#259) and arithmetically consistent (#261) yet still be operationally unusable because the transport cuts it mid-stanza or the message send fails after side effects. The status generator also lacks a fail-closed rule: a failed/partial send should mark the cycle as `delivery_failed` or `partial_delivery`, not publish/echo a success summary.
|
||||
|
||||
Required fix shape: (a) add per-channel payload budget metadata (`max_chars`, `safe_chars`, markdown overhead, attachment/thread fallback) to dogfood report rendering; (b) preflight-render the report and split into stanza-safe chunks before send, never in the middle of a pinpoint bullet; (c) add part numbering and a short report id/checksum (`dogfood-status d90b5f0 part 1/3`) so downstream claws can detect missing chunks; (d) record/send a typed delivery receipt with `status: delivered|partial|failed`, `message_ids`, `bytes_sent`, `chunks_sent`, `truncated_at`, and provider error; (e) if any chunk fails, emit a compact failure notice and do not mark the report as posted; (f) regression-test a report containing #257-#268-sized entries against the Discord character budget and assert the split boundaries align to pinpoint stanzas. Acceptance: a same-day summary can never silently truncate mid-pinpoint; a send failure produces a typed `delivery_failed` receipt with no contradictory "posted" success prose; cron timeout can distinguish `timed_out_before_send` vs `timed_out_after_partial_send` (#246 sibling).
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 14:32 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `62b20c7` before filing. Cluster delta: dogfood-transport-delivery-receipt +1; sibling to #246 (cron timeout ambiguity), #253 (state-vector budgeting), and #261 (summary self-consistency), but distinct transport/payload-budget layer. Concrete delta this cycle: ROADMAP-only pinpoint appended from live channel failure evidence.
|
||||
|
||||
## Pinpoint #270 — Help-text flag listing omits `--reasoning-effort`, `--base-commit`, `--allow-broad-cwd`, and the `-p` short-prompt alias even though `parse_args` accepts and validates them, so operators auditing `claw --help` see no contract for runtime-tunable flags that are fully wired and documented inline only via inline error messages
|
||||
|
||||
Dogfooded 2026-04-26 14:33 KST on `feat/jobdori-168c-emission-routing` at HEAD `364566c` (post-rebase fast-forward onto gaebal-gajae's #269 dogfood-transport-payload-budget pinpoint). Fresh `cargo run --quiet --bin claw -- --help` output was captured in full; the `Flags:` block lists exactly seven flags (`--model`, `--output-format`, `--compact`, `--permission-mode`, `--dangerously-skip-permissions`, `--allowedTools`, `--version/-V`) and the `Examples:` block uses only those. Source inspection of `parse_args` at `rust/crates/rusty-claude-cli/src/main.rs:875-942` shows the dispatcher additionally accepts `--reasoning-effort {low|medium|high}` (lines 916-936, with both space-separated and `=`-form, validated against the literal set), `--base-commit COMMIT` (lines 905-915, both forms), `--allow-broad-cwd` (lines 939-942, boolean toggle), and `-p PROMPT` as a Claw Code compat short-prompt alias (line 943-onward, greedy `args[index+1..].join(" ")`). Live verification: `cargo run --quiet --bin claw -- --reasoning-effort medium prompt "noop"` parses past the CLI parse stage and fails downstream with `[error-kind: missing_credentials]` (Anthropic auth missing), confirming the flag is dispatch-accepted; `--reasoning-effort yolo` yields `[error-kind: cli_parse] invalid value for --reasoning-effort: 'yolo'; must be low, medium, or high` (test at `main.rs:11288-11294`). None of these four surface tokens appear in any `writeln!` invocation inside `print_help_to` (`main.rs:9328-9480`).
|
||||
|
||||
Concrete failure mode: an operator running `claw --help` to audit available knobs cannot discover that reasoning effort, custom base-commit-for-diff, broad-cwd permission, or `-p` shorthand exist. They learn about `--reasoning-effort` only by typing it wrong and seeing the validation error message; about `--base-commit` only by reading source or `MERGE_CHECKLIST.md`/git scripts; about `-p` only by reading examples in third-party docs or by observing other automation. This is the second member of the help-contract-drift cluster founded by #263 (`--compact` help text said "text mode only" while runtime had a live compact-JSON dispatch path): #263 was a stale-mode-matrix on a flag that *is* listed; #270 is whole-flag absence — flags that are fully wired (parse, validate, propagate, downstream-consume) and never documented in the help surface they should anchor. Both are stale-contract-vs-runtime divergences at the CLI surface, but on different sub-axes (advertised-flag-stale-mode vs unadvertised-flag-fully-wired).
|
||||
|
||||
Gap. This is a **help-contract drift / fully-wired-but-undocumented-flag** divergence at the CLI surface, distinct from #263. #263 catalogues a documented flag whose mode matrix is incorrect; #270 catalogues four flags that the dispatcher fully accepts and the runtime fully consumes but `print_help_to` never lists in its `Flags:` block. It is also distinct from #262 (missing `--max-turns` flag — that flag is *neither* in help *nor* in dispatch), distinct from #267 (`prompt TEXT` greedy-slurp parse-asymmetry — that is a parse-side contract gap, not a help-listing gap), and distinct from #265 (`stream-json` output mode absent from both help and dispatch). #270 is specifically the **dispatch-accepted-but-help-omitted** sub-shape, which the help-contract-drift cluster needs to cover symmetrically alongside #263's documented-but-stale sub-shape.
|
||||
|
||||
Required fix shape: (a) extend the `Flags:` block in `print_help_to` to list `--reasoning-effort {low|medium|high}` with the validated value set inline (matching the error message at `main.rs:921-924`), `--base-commit COMMIT` with a one-line description tying it to `/diff` and merge-status workflows, `--allow-broad-cwd` with the security-scope semantics (when broad-cwd traversal is allowed and what it overrides), and `-p PROMPT` as the documented Claw Code compat one-shot alias with a pointer to the canonical `prompt TEXT` subcommand and to #267's greedy-slurp caveat; (b) add an `Examples:` line for at least `--reasoning-effort` and `--base-commit` matching their actual usage shape; (c) add a parser/help-parity regression test that asserts every `match`-arm string literal in `parse_args` for top-level flags appears at least once in the captured `print_help_to` output (mechanically forces help-vs-dispatch sync for future flags); (d) extend that test to also cover `=`-form variants so the next `--foo=` flag added cannot drift; (e) emit a typed `cli_parse` warning when help is generated but a registered flag has no help line (compile-time/test-time enforcement). Acceptance: `claw --help` lists every dispatch-accepted top-level flag; the parity test fails when a new flag is added to `parse_args` without a help line; #263 and #270 together close the help-contract-drift cluster on both the stale-mode and fully-omitted sub-axes.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 14:34 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `364566c` before filing (post-rebase fast-forward onto gaebal-gajae's #269 dogfood-transport-payload-budget pinpoint). Cluster delta: help-contract-drift cluster 1→2 (#263 founder + #270 second-member, closes the documented-but-stale ↔ fully-wired-but-undocumented sub-axis pair); complementary-pinpoint-pair-bundle discovery-pattern extended to 5 bundles total (#245+#250 WebSearch, #262+#264 turn-budget, #264+#266 typed-error-axis, #254+#268 MCP-resources/tools-lifecycle, now #263+#270 help-contract-drift-stale-mode-vs-omitted). Smaller-scope by design (matches #253/#254/#257/#258/#260/#261/#262/#263/#264/#265/#266/#267/#268/#269 context-budget discipline). Sister: #263 (help-contract-drift founder; #263 stale-mode-matrix on a listed flag, #270 whole-flag absence on dispatch-accepted flags). Distinct from #262 (flag missing from BOTH help and dispatch). Distinct from #265 (`stream-json` output mode absent from BOTH help and dispatch). Distinct from #267 (parse-asymmetry, not help-listing). Concurrent-dogfood-rebase parity will be confirmed local==origin==fork at HEAD `364566c+#270` after push.
|
||||
|
||||
## Pinpoint #271 — Dogfood status generation lacks a repo-identity/source-of-truth guard, so the same claw-code nudge can drift from `ultraworkers/claw-code` to `code-yeongyu/claw-code` and publish an authoritative-looking report for the wrong project
|
||||
|
||||
Dogfooded 2026-04-26 15:00 KST from the live `#clawcode-building-in-public` loop after #270. The nudge explicitly targeted the active `claw-code` dogfood branch (`feat/jobdori-168c-emission-routing`, canonical remotes `origin=https://github.com/ultraworkers/claw-code`, `fork=https://github.com/Yeachan-Heo/claw-code`), and the branch had just advanced through #269/#270. Minutes later a Jobdori status report switched context to `code-yeongyu/claw-code` (private Rust port), described `main` as dormant since 2026-04-02, reported no `ROADMAP.md`, and filed a new pinpoint about `dev/rust` branch drift. That report was structurally plausible but belonged to a different repository/project, not the live dogfood branch that Clawhip was nudging.
|
||||
|
||||
Concrete failure mode: a dogfood cycle can satisfy the shape of the requested report while silently changing the repo identity underneath it. Operators then see an authoritative status block with `Repo: code-yeongyu/claw-code`, `Active sessions: 0`, and stale branch analysis, interleaved with the actual `ultraworkers/claw-code` ROADMAP cycle. This creates stale-branch confusion and queue pollution: the wrong repo gets analyzed, the active ROADMAP branch is skipped for that cycle, and subsequent status summaries may mix pinpoints from two unrelated claw-code lineages.
|
||||
|
||||
Gap. There is no mandatory repo-identity assertion in dogfood status generation. Reports include freeform `repo` text, but they are not checked against a canonical tuple such as `{remote_url, branch, worktree_path, roadmap_path, expected_head_prefix}` before publishing. This is distinct from #259 (freshness/provenance against git+ROADMAP for the chosen repo) and #269 (transport delivery receipts): #271 validates that the chosen repo is the **intended** repo before any freshness or delivery checks run. A report can be fresh, internally consistent, and delivered successfully while still being for the wrong repository.
|
||||
|
||||
Required fix shape: (a) define a canonical dogfood target identity for each Clawhip nudge (`repo_owner/name`, `remote_url`, `branch`, `worktree_path`, required backlog file such as `ROADMAP.md`, and optional fork remote); (b) before generating status or filing a pinpoint, assert the current cwd/remotes/branch/backlog-file match that identity; (c) emit `DOGFOOD_REPO_MISMATCH` and refuse to publish if the active repo is `code-yeongyu/claw-code` or any sibling while the nudge targets `ultraworkers/claw-code`; (d) include the verified tuple in every status report as machine fields, not prose; (e) add regression coverage where two repos named `claw-code` exist and the status command must reject the wrong one despite similar names. Acceptance: a `claw-code` dogfood nudge cannot produce a status report for `code-yeongyu/claw-code`; wrong-repo analysis fails closed with a typed mismatch receipt instead of entering the public status stream.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 15:02 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `61be826` before filing. Cluster delta: dogfood-repo-identity-guard +1; sibling to #259 (freshness/provenance), #253 (state-vector context budget), and #269 (transport delivery), but distinct source-of-truth selection layer. Concrete delta this cycle: ROADMAP-only pinpoint appended from live wrong-repo report evidence.
|
||||
|
||||
## Pinpoint #272 — `--max-turns 0` zero-turn semantics are unspecified at the spec layer: three valid interpretations (parse-error, unlimited-sentinel, run-zero-iterations-fast-return) coexist with no canonicalization, the existing pinpoints #262 and #264 each prescribe the same fast-return resolution in passing without anchoring to upstream prior art, and the interaction matrix between `max_turns: 0` and `--allowedTools`, `--dangerously-skip-permissions`, session-state recording, hook execution, and `system_prompt` event emission is undefined
|
||||
|
||||
Dogfooded 2026-04-26 15:04 KST on `feat/jobdori-168c-emission-routing` at HEAD `29c262c` (post-rebase fast-forward onto gaebal-gajae's #271 dogfood-repo-identity-guard pinpoint). Reproduction matrix against `./rust/target/release/claw` (current built artifact, no source change required to demonstrate the gap):
|
||||
|
||||
- `claw --max-turns 0 -p "say hi"` → `[error-kind: cli_parse] error: unknown option: --max-turns` (rejected pre-`-p`, sister to #262 parse-side absence).
|
||||
- `claw -p "say hi" --max-turns 0` → `[error-kind: missing_credentials]` (silently absorbed into prompt body, sister to #262 position-sensitive prompt-pollution).
|
||||
- `claw --max-turns 0 prompt "say hi"` → `unknown option: --max-turns` (subcommand path also rejects).
|
||||
- `claw --max-turns=0 -p "say hi"` → `unknown option: --max-turns=0` (`=`-form same).
|
||||
|
||||
This confirms #262's surface verdict (the flag does not exist) and #264's runtime verdict (no typed primitive to plumb into). What remains uncovered, and what #272 catalogues, is the **spec/contract layer underneath both**: even granting #262's CLI flag and #264's `TurnBudget` struct land, the literal value `0` has at least three operationally distinct interpretations, none of which is anchored to a referenced precedent or bound by an interaction matrix in the existing audit triangle.
|
||||
|
||||
Three competing `--max-turns 0` semantics, each operationally valid, mutually inconsistent:
|
||||
|
||||
(1) **Parse-time error** (`MaxTurnsParseError: must be ≥ 1`): treats `0` as out-of-range like a negative number; matches the strict-validator family (`--reasoning-effort yolo` at `main.rs:11288-11294` rejects out-of-set values typed). Operationally useful for catching shell-substitution bugs (`--max-turns $UNSET_VAR` expanding to `0`). Cost: blocks the cost-zero parse-validation use case the existing pinpoints both endorse.
|
||||
|
||||
(2) **Unlimited sentinel** (`max_turns: 0` ≡ `usize::MAX`): C-stdlib / many Rust APIs use `0` as "no limit" (e.g. `std::io::Read::take(0)` semantics vary by trait, `tokio::sync::mpsc::channel(0)` rejects, but `tower::limit::ConcurrencyLimit::new(0)` admits zero permits). The existing `max_iterations: usize::MAX` default at `runtime/conversation.rs:181` already encodes "unlimited" as a max-int sentinel, and a careless port of that idiom could land `0`-as-unlimited. Operationally hostile: a user typing `--max-turns 0` to validate-only would instead unleash an unbounded loop.
|
||||
|
||||
(3) **Run-zero-iterations fast-return** (`Ok(TurnSummary { iterations: 0, assistant_messages: vec![], no_model_call: true })`): the resolution both #262 fix-shape (d) and #264 fix-shape (g) prescribe in passing. Operationally useful for cost-zero parse-validation ("does my CLI invocation parse, do my hooks load, do my tools register?") without consuming model tokens. But the existing pinpoints prescribe this resolution **without citing the upstream contract** (Anthropic Claude Code's documented `--max-turns 0` behavior, OpenAI Codex's analogous flag, or any Rust runtime precedent), and **without specifying the interaction matrix** below.
|
||||
|
||||
Verified concrete surface (rg across `rust/crates/`): zero `MaxTurnsZero`, zero `ZeroTurnFastReturn`, zero `no_model_call`, zero documented `0`-handling for `with_max_iterations` callers. The existing test at `runtime/conversation.rs:1768` constructs `with_max_iterations(1)` (smallest positive integer tested), not `with_max_iterations(0)` — so even the runtime primitive #264 catalogues has no test coverage for the zero-edge case. The subagent default `DEFAULT_AGENT_MAX_ITERATIONS: usize = 32` at `tools/lib.rs:3475` is also untested at boundary `0`.
|
||||
|
||||
Undefined interaction matrix (each cell needs a documented contract before any `--max-turns 0` semantics is canonical): (i) `--max-turns 0 --allowedTools "Read,Bash"` — does the empty turn still validate the allow-list (could surface `--allowedTools` parse errors typed) or skip validation (cheaper but loses the use case)? (ii) `--max-turns 0 --dangerously-skip-permissions` — does the zero-turn run still record the dangerous-permission flag in session metadata for audit, or is the session never created? (iii) `--max-turns 0` with a `.claw-session.jsonl` resume — does it append a no-op turn record (preserving session continuity), or silently no-op (saving a row but losing the audit trail of the zero-turn invocation)? (iv) `--max-turns 0` with `PreToolUse` / `PostToolUse` hooks registered — do hooks fire (giving observability of "dispatch reached") or skip (matching the no-tool-call contract)? (v) `--max-turns 0 --output-format json` — does the JSON envelope include `iterations: 0, assistant_messages: []` (consistent with #260's compact-JSON envelope shape) or emit a degenerate `null`/empty body? (vi) `--max-turns 0` and `system_prompt` event lane — does the event still emit (so consumers see the system prompt that *would* have been sent) or skip emission?
|
||||
|
||||
Gap. The spec layer is the **prerequisite that #262 and #264's fix-shapes both implicitly assume but neither documents**. #262 prescribes "return immediately after dispatch with `iterations: 0` and no model call" as the right semantic in fix-shape (d), but does not anchor that choice to a referenced upstream contract, does not address negative values explicitly (`--max-turns -1`: error or alias for unlimited?), and does not address `u32::MAX` (does any positive integer mean unlimited, or does the field have to become `Option<u32>`?). #264 prescribes the same `Ok(TurnSummary { iterations: 0, … })` in fix-shape (g) but inherits the same un-anchored decision and adds no interaction-matrix coverage. **No pinpoint in the existing turn-budget cluster catalogues the canonicalization act itself**: choosing semantic (3) over (1) and (2), citing the precedent, and binding the choice across the six interaction cells above.
|
||||
|
||||
Cluster shape novelty: completes the **turn-budget audit triangle** (#262 = CLI-parse layer, #264 = runtime-typed-primitive layer, #272 = spec/contract layer). The triangle now covers all three structural slots a single user-facing flag must occupy before it can ship: the request-shape gap (parse), the type-shape gap (primitive), and the meaning-shape gap (spec). Distinct from #262 (parse-side absence; #262 is "flag does not exist on the parser", #272 is "even if it existed, the value `0` has no canonical meaning"). Distinct from #264 (runtime-primitive absence; #264 is "the type cannot represent the budget", #272 is "the budget value `0` resolves to three different runtime behaviors"). Distinct from #266 (`RuntimeErrorKind` typed-error enum gap; #266 catalogues missing typed-error discriminants, #272 catalogues missing typed-success-with-zero-iterations contract).
|
||||
|
||||
Discovery-pattern continuation: founds the **Spec/contract-canonicalization-gap** sub-shape inside the turn-budget cluster, the FIRST pinpoint where the gap is not absent-flag (#262), absent-primitive (#264), or absent-error-kind (#266) but **absent-canonical-meaning-for-an-edge-value**. This sub-shape is portable: any other knob with a numeric or sentinel-shaped value (`--max-iterations`, `--timeout 0`, `--retries 0`, `--max-output-tokens 0`) has the same three-way ambiguity until canonicalized. Extends the audit-triangle pattern itself to a **three-layer-completeness** primitive: a single user-facing capability requires audit at parse-layer + primitive-layer + spec-layer before any of the three is shippable. Sister-shaped to #245+#250 (WebSearch client+server pair) and #262+#264 (turn-budget parse+primitive pair) but extends those bundles from 2-tuple to 3-tuple.
|
||||
|
||||
Required fix shape: (a) write a `TURN_BUDGET_SPEC.md` (or similar canonical-contract document under `docs/specs/`) that anchors `--max-turns 0` semantics to a referenced upstream precedent (Anthropic Claude Code's documented zero-turn behavior, with link), explicitly resolves the choice as semantic (3) `run-zero-iterations-fast-return`, and explicitly rejects semantics (1) and (2) with rationale; (b) define negative-value handling: `--max-turns -1` rejected at parse-time as `cli_parse` error (mirrors `--reasoning-effort yolo` typed-rejection precedent), with no alias semantics; (c) define unlimited-budget handling: introduce `--max-turns unlimited` as an explicit string sentinel OR document that no value means unlimited and unbounded loops require omitting the flag (avoid u32::MAX-as-sentinel which breaks downstream JSON consumers); (d) document the six-cell interaction matrix above with one paragraph per cell, each binding to a typed event/receipt: zero-turn run with `--allowedTools` validates the allow-list (cell i), records dangerous-permission flag in session-meta (cell ii), appends a typed `ZeroTurnInvocation` row to session.jsonl (cell iii), skips hook execution (cell iv) consistent with no-tool-call contract, emits `iterations: 0, assistant_messages: []` JSON envelope (cell v), and emits the `system_prompt` event so consumers can audit the would-have-been-sent prompt (cell vi); (e) replace `max_turns: u32` with `max_turns: TurnLimit` enum where `pub enum TurnLimit { Unlimited, ZeroFastReturn, Bounded(NonZeroU32) }` so the type system enforces the spec at compile-time and `0` cannot be confused with `unlimited` at any call site; (f) add tests for each interaction-matrix cell (i-vi) plus `TurnLimit::ZeroFastReturn` round-trip through `--output-format json`; (g) cross-reference the spec document from the `--max-turns` help text (#262 fix-shape (b) addition) and from the `TurnBudget` doc-comment (#264 fix-shape (a)) so future readers find canonical meaning before runtime behavior. Acceptance: `--max-turns 0` has exactly one documented behavior across CLI/runtime/JSON layers; the type system prevents semantics (1) and (2) from being silently introduced by a refactor; the six interaction cells each have a typed receipt; #262 and #264's fix-shapes can land knowing which `0` they are encoding; future zero-edge knobs (`--retries 0`, `--timeout 0`) have a canonicalization template to follow.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 15:04 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `29c262c` before filing (post-rebase fast-forward onto gaebal-gajae's #271 dogfood-repo-identity-guard pinpoint). Cluster delta: turn-budget cluster 2→3 (#262 parse-layer + #264 primitive-layer + #272 spec-layer = audit-triangle complete on a single user-facing flag); Spec/contract-canonicalization-gap sub-shape introduced (NEW sub-shape inside turn-budget cluster, portable to any sentinel-shaped numeric flag); complementary-pinpoint-pair-bundle discovery-pattern extended from 5 bundles to a first **three-tuple** (#262+#264+#272). Smaller-scope by design (matches #253-#271 context-budget discipline). Sister: #262 (parse-side; #262+#272 bracket the parse boundary's request-shape and meaning-shape gaps), #264 (primitive-side; #264+#272 bracket the runtime layer's type-shape and meaning-shape gaps), #266 (runtime-error-enum gap, parallel typed-meaning-axis but on errors not successes). Distinct from #271 (repo-identity guard at the dogfood layer; orthogonal to the turn-budget audit triangle). Distinct from silent-fallback family (catalogues silent input/output mutation; #272 catalogues missing canonical meaning at edge value). Concurrent-dogfood-rebase parity will be confirmed local==origin==fork at HEAD `29c262c+#272` after push.
|
||||
|
||||
## Pinpoint #273 — `claw status --output-format json` reports branch and dirty/clean state but omits HEAD SHA, upstream remote URL, ahead/behind counts, fetch timestamp, and source-of-truth repo identity, so machine consumers cannot detect stale/wrong-repo status from the product status surface itself
|
||||
|
||||
Dogfooded 2026-04-26 15:31 KST on `feat/jobdori-168c-emission-routing` at HEAD `ba6c5bc` (post-#272). Fresh `cargo run --quiet --bin claw -- --output-format json status` from `rust/` emits a useful workspace object (`cwd`, `project_root`, `git_branch`, `git_state`, changed/staged/unstaged/untracked counts, config/memory counts), but it omits the actual commit identity and provenance fields needed by dogfood automation: no `head_sha`, no `head_message`, no `head_timestamp`, no `upstream_branch`, no `upstream_remote_url`, no ahead/behind counts, no `last_fetch_at`, no canonical repo/source-of-truth slug, no `roadmap_last_pinpoint`, and no staleness marker. Text mode has the same gap: `Git branch feat/jobdori-168c-emission-routing` and `Git state clean`, but no commit/remotes/freshness.
|
||||
|
||||
Concrete failure mode: downstream claws can call the product-owned `claw status` surface and still cannot prove they are on the same HEAD as origin/fork, cannot distinguish `ultraworkers/claw-code` from a similarly named sibling repo, cannot detect that a local worktree is behind by one or more ROADMAP filings, and cannot cite the exact commit used for a dogfood report without shelling out to `git rev-parse`, `git remote -v`, `git rev-list --left-right --count`, and ROADMAP parsing. This is exactly the metadata that #259 (fresh status provenance) and #271 (repo identity guard) require, but it is missing from the canonical local status command that automation would naturally consume.
|
||||
|
||||
Gap. `claw status` is currently a local workspace cleanliness snapshot, not a provenance/freshness snapshot. That is fine for a human pre-commit check, but insufficient for recurring dogfood/status automation. This is distinct from #259, which requires dogfood status reports to include provenance; #273 identifies that the underlying product status surface does not provide those fields. It is distinct from #271, which requires repo-identity guards in dogfood generation; #273 identifies the missing repo identity fields in the product's JSON status. It is distinct from #269 transport delivery: #273 is pre-delivery status truth.
|
||||
|
||||
Required fix shape: (a) extend `claw status --output-format json` `workspace` with `git_head_sha`, `git_head_short`, `git_head_message`, `git_head_timestamp`, `upstream_branch`, `upstream_remote_name`, `upstream_remote_url`, `ahead`, `behind`, `last_fetch_at` (nullable), and `source_of_truth_repo` derived from the existing `OFFICIAL_REPO_URL`; (b) add optional `roadmap_last_pinpoint` / `roadmap_path` when `ROADMAP.md` exists at project root; (c) add `staleness_seconds` or `freshness_status` when upstream data is available, and `freshness_status: unknown_no_fetch` when it is not; (d) mirror the key fields in text mode compactly; (e) regression-test clean, ahead, behind, detached, no-upstream, and wrong-remote fixtures. Acceptance: a dogfood reporter can consume only `claw status --output-format json` plus ROADMAP content and refuse stale/wrong-repo reports without ad hoc git shelling.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 15:31 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `ba6c5bc` before filing. Cluster delta: product-status-provenance +1; sibling to #259 (report provenance) and #271 (repo identity guard), but distinct product status surface layer. Concrete delta this cycle: ROADMAP-only pinpoint appended after live `claw status` JSON/text verification.
|
||||
|
||||
## Pinpoint #274 — MCP tool calls and results render with the generic untyped fallback formatter while native tools get rich field-aware renderers, so every `mcp__server__tool` invocation prints a 96-char JSON summary plus a raw pretty-printed result block instead of the structured icon/path/lines/preview affordances `bash`/`Read`/`Write`/`Edit`/`Glob`/`Grep`/`WebSearch` enjoy
|
||||
|
||||
Dogfooded 2026-04-26 15:32 KST on `feat/jobdori-168c-emission-routing` at HEAD `f36f283` (post-#273). Static audit of `rust/crates/rusty-claude-cli/src/main.rs` `format_tool_call_start` (line 8504) and `format_tool_result` (line 8557) confirms the rendering arms match exclusively on native tool aliases: `"bash" | "Bash"`, `"read_file" | "Read"`, `"write_file" | "Write"`, `"edit_file" | "Edit"`, `"glob_search" | "Glob"`, `"grep_search" | "Grep"`, `"web_search" | "WebSearch"`. MCP qualified names follow the `mcp__{server}__{tool}` shape produced by `runtime::mcp::mcp_tool_name` (e.g., `mcp__claude_ai_Example_Server__weather_tool`); none of these match any specialized arm and both functions fall through to the wildcard `_ =>` branch — `summarize_tool_payload(input)` (96-char JSON-compaction truncate) for the call-start banner and `format_generic_tool_result(icon, name, &parsed)` (pretty-printed JSON dump capped at 60 lines / 4000 chars) for the result.
|
||||
|
||||
Concrete failure mode: an MCP server like `mcp__filesystem__read_file` performs the same logical operation as the native `Read` tool, but the user sees `╰─ mcp__filesystem__read_file ─╮` with `{"path":"…"}` JSON-summarized to 96 chars and a raw JSON pretty-print of the file content as result, instead of the native `📄 Reading <path>…` start banner with structured `✓ read_file: <line-count> lines` rendering. Same goes for MCP search tools (no `🔎` icon, no match summary), MCP write tools (no `✏️ Writing <path> (<lines> lines)` banner, no diff preview), and MCP shell tools (no `format_bash_result` exit-code/stdout/stderr structuring). Worse, MCP tool input schemas are typically known to the client (`tools/list` returns `inputSchema`), so the renderer has the metadata to extract semantic fields like `path`, `query`, `command`, `content`, `pattern` — it just doesn't.
|
||||
|
||||
Gap. The renderer treats MCP tools as opaque black boxes even though they cover the same semantic categories as native tools (file ops, search, shell, web). This is distinct from #254 (MCP Resources lifecycle absence — server-side concept), distinct from #258/#266/#272 (CLI parse / typed-error / spec gaps), distinct from #268 (`tools/list` re-fetch on resume — staleness, not rendering), and distinct from #261 (compact-summary internal consistency — doesn't touch tool rendering). It founds a NEW `mcp-vs-native-tool-rendering-parity` cluster on the rendering axis and pairs structurally with #268 along the MCP-axis (#268 = catalog freshness, #274 = rendering parity), forming an MCP cross-axis bundle.
|
||||
|
||||
Required fix shape: (a) introduce a `ToolRenderingProfile` enum keyed off the runtime tool category (FileRead, FileWrite, FileEdit, Search, Shell, WebSearch, Generic) and have both native and MCP tools advertise their category at registration time; (b) when an MCP tool's `inputSchema` declares well-known field names (`path`, `paths`, `query`, `pattern`, `command`, `content`), extract them in `format_tool_call_start` via a schema-aware path-extractor that supersedes the current `extract_tool_path` hardcoded key list; (c) thread server identity through the renderer so MCP tool banners can show `⚡ {server} · {tool}` instead of the raw `mcp__server__tool` underscore-glob; (d) emit a `tool_call_render_kind` field in the JSON envelope (`native_typed`, `mcp_typed`, `mcp_generic`, `untyped_fallback`) so dogfood audits can count parity coverage over time; (e) regression-test that an MCP tool whose schema declares `path: string` renders the same `📄 Reading {path}…` start banner as native `Read`, that an MCP shell tool with `command: string` renders the same exit-code/stdout/stderr structure as native `bash`, and that unknown-schema MCP tools fall back to `format_generic_tool_result` cleanly without breaking layout. Acceptance: an MCP `read_file` and a native `Read` of the same path produce visually equivalent terminal output, and the JSON envelope's per-tool `render_kind` field is populated for every tool call.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 15:32 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `f36f283` before filing. Cluster delta: founds NEW `mcp-vs-native-tool-rendering-parity` cluster (1 member); pairs with #268 (MCP catalog freshness) on the MCP-axis as a cross-axis bundle (rendering × staleness). Concrete delta this cycle: ROADMAP-only pinpoint appended after static audit of `format_tool_call_start` / `format_tool_result` rendering arms vs `runtime::mcp::mcp_tool_name` qualified-name shape — zero MCP-aware match arms, full fallback to generic untyped path.
|
||||
|
||||
## Pinpoint #275 — `claw doctor --output-format json` splits repo provenance across unrelated `install source`, `workspace`, and `system` checks, so automation cannot consume one authoritative workspace provenance object even though the needed fragments are partially present
|
||||
|
||||
Dogfooded 2026-04-26 16:02 KST on `feat/jobdori-168c-emission-routing` at HEAD `fdf8890` (post-#274). Fresh `cargo run --quiet --bin claw -- --output-format json doctor` shows the data needed for provenance is scattered: the `install source` check has `official_repo: https://github.com/ultraworkers/claw-code`, the `workspace` check has `cwd`, `project_root`, `git_branch`, and `git_state`, while the `system` check has `git_sha: fdf88903`. There is no single object tying those together as "this workspace is repo X, branch Y, head Z, clean/dirty, upstream A, source-of-truth B". The text report has the same split: official repo under Install source, branch/clean under Workspace, Git SHA under System.
|
||||
|
||||
Concrete failure mode: a dogfood/status consumer that uses `doctor` instead of `status` can see official source-of-truth and a local Git SHA, but cannot know whether that SHA belongs to the workspace branch being inspected, whether the workspace remote actually matches the official repo, whether the local branch is ahead/behind origin/fork, or whether the official repo check is merely a static install warning unrelated to the current cwd. This is the doctor-surface sibling of #273: #273 found `claw status` lacks provenance fields entirely; #275 finds `claw doctor` has fragments but no normalized provenance object.
|
||||
|
||||
Gap. Diagnostic surfaces duplicate and diverge: `status` is branch/clean focused, `doctor` is health-check focused, and neither emits a canonical `workspace_provenance` object. This forces downstream claws to shell out to git or scrape multiple `doctor.checks[]` entries and infer joins by name. It is distinct from #259/#271 (dogfood report provenance/repo guard) and distinct from #273 (status surface missing fields); #275 targets the doctor health surface's fragmented schema.
|
||||
|
||||
Required fix shape: (a) add a top-level `workspace_provenance` object to `doctor` JSON containing `project_root`, `cwd`, `git_head_sha`, `git_branch`, `git_state`, `remote_urls`, `upstream_branch`, `ahead`, `behind`, `official_repo`, and `repo_identity_status: matches_official|fork_of_official|mismatch|unknown`; (b) have `status` and `doctor` share the same provenance struct/renderer so fields cannot drift; (c) in text mode, add one compact Provenance section instead of scattering related fields across Install source/Workspace/System; (d) add tests proving a wrong remote reports `repo_identity_status: mismatch` without needing downstream string scraping. Acceptance: automation can read one `doctor.workspace_provenance` object and decide whether the current cwd is the intended claw-code worktree at the expected HEAD.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 16:02 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `fdf8890` before filing. Cluster delta: product-diagnostic-provenance +1; sister to #273 (`status` provenance) but distinct `doctor` fragmented-schema surface. Concrete delta this cycle: ROADMAP-only pinpoint appended after live `doctor` JSON/text verification.
|
||||
|
||||
## Pinpoint #276 — `--allowedTools` help line advertises only the camelCase form and an opaque "repeatable; comma-separated" prose, while `parse_args` additionally dispatches the kebab-case alias `--allowed-tools` and both `=`-form variants (`--allowedTools=VAL` / `--allowed-tools=VAL`), so the listed-flag's alias-and-value-shape coverage in help is structurally incomplete vs the parser — third help-contract-drift sub-axis distinct from #263 (listed-flag-stale-mode-matrix) and #270 (whole-flag-omitted)
|
||||
|
||||
Dogfooded 2026-04-26 16:14 KST on `feat/jobdori-168c-emission-routing` at HEAD `0240cad` (post-rebase fast-forward onto gaebal-gajae's #275 `claw doctor` provenance fragmentation pinpoint). Fresh `cargo run --quiet --bin claw -- --help` lists `--allowedTools TOOLS Restrict enabled tools (repeatable; comma-separated aliases supported)` at `rust/crates/rusty-claude-cli/src/main.rs:9418` and `claw [--model MODEL] [--allowedTools TOOL[,TOOL...]]` at `:9334` — both surfaces use only the camelCase `--allowedTools` token. Source inspection of `parse_args` at `:979-994` shows the dispatcher accepts four shape variants for the same flag: `--allowedTools VAL` (line 979 first arm), `--allowed-tools VAL` (same arm OR-pattern), `--allowedTools=VAL` (line 986 prefix arm), and `--allowed-tools=VAL` (line 990 second prefix arm). Live verification with the freshly rebuilt `target/debug/claw`: all four shapes parse past the CLI parse stage and fail downstream with `[error-kind: missing_credentials]`, confirming each variant is fully dispatch-accepted. Repeated invocation (`--allowedTools read --allowedTools glob`) also dispatches successfully, confirming the help's bare "repeatable" prose corresponds to a real wired surface but with no documented composition rule (does the second occurrence replace, append, or set-union the first?).
|
||||
|
||||
Concrete failure mode: an operator scripting `claw` with shell tooling that prefers kebab-case-only conventions (e.g. POSIX-style argv generators, automation that derives flag names from snake_case fields via `s/_/-/g`, or downstream claws that mirror Anthropic Claude Code's documented `--allowed-tools` form) sees `claw --help` advertise only `--allowedTools` and either (a) avoids the kebab-case alias under the false belief it does not exist; or (b) discovers it accidentally by typing it; or (c) reads source. The `=`-form for both casings is the same: an operator habituated to `--flag=value` shell syntax cannot tell from help that `--allowedTools=read,glob` is a real path and may avoid it. The "repeatable" prose has no example showing whether `--allowedTools read --allowedTools glob` set-unions to `{read, glob}` or whether the second occurrence overwrites — fresh verification of `normalize_allowed_tools` at `main.rs:1826` and `current_tool_registry()?.normalize_allowed_tools(values)` at `tools/src/lib.rs:192` shows the values are accumulated into a flat `Vec<String>` and each is comma-split-and-flattened into a `BTreeSet`, so the actual semantic is set-union — but the help prose neither states this nor cites the alternative.
|
||||
|
||||
Gap. The help-contract-drift cluster's third sub-axis: a listed flag whose dispatch surface accepts more shapes (alias casings, `=`-form, repetition with a defined composition rule) than the help line advertises. This is distinct from #263 (listed-flag-stale-mode-matrix: a documented flag whose mode-compatibility prose contradicts dispatch — `--compact` claims text-only while JSON dispatch path exists) and distinct from #270 (whole-flag-omitted: a fully wired flag with zero help line — `--reasoning-effort`, `--base-commit`, `--allow-broad-cwd`, `-p`). #276 catalogues the **listed-flag-incomplete-shape-coverage** sub-shape: the flag IS in help, but the help understates which forms parse, what the value-shape composition rule is, and whether common alias conventions (kebab-case, `=`-form, repetition semantics) apply. The cluster now has 3 members (#263 + #270 + #276) covering three structurally distinct help-vs-dispatch divergence shapes: stale mode (listed, wrong info), omission (unlisted, full info elsewhere), and incomplete shape (listed, partial info).
|
||||
|
||||
Distinct from #258 (`--allowedTools ""` empty-value silent-coercion at the CLI parse boundary; #258 is a runtime acceptance-of-malformed-input gap, #276 is a help-text-vs-dispatch-shape-coverage gap on the same flag). Distinct from #267 (`prompt TEXT` greedy-slurp parse-asymmetry; that's a parse-side contract gap on a different surface). Distinct from #265 (`stream-json` output mode absent from BOTH help and dispatch). Distinct from #262 (`--max-turns` flag missing from BOTH help and dispatch).
|
||||
|
||||
Discovery-pattern continuation: completes the help-contract-drift cluster's three-sub-axis audit triangle (stale-mode #263 + omitted-flag #270 + incomplete-shape #276), structurally analogous to the turn-budget audit triangle (#262 parse + #264 primitive + #272 spec) — both clusters now occupy three distinct structural slots a single CLI surface can fail at. Extends the **complementary-pinpoint-pair-bundle** discovery-pattern from 5 pair-bundles + 1 three-tuple (turn-budget #262+#264+#272) to 5 pair-bundles + 2 three-tuples (now help-contract-drift #263+#270+#276). The two three-tuples are sister-shaped: each catalogues that audit-completeness for a single user-facing CLI surface requires pinpointing THREE distinct sub-axes rather than two.
|
||||
|
||||
Required fix shape: (a) extend the `Flags:` block at `print_help_to:9418` to advertise both casings (`--allowedTools, --allowed-tools TOOLS`) on a single line, matching the OR-pattern in `parse_args:979`; (b) document `=`-form support inline (`--allowedTools=TOOLS, --allowed-tools=TOOLS` accepted) — the existing prose offers no signal that `=`-form parses; (c) document the repetition composition rule explicitly: `repeatable: each occurrence set-unions into the allow-list; pass once per logical group or comma-separate within one occurrence` — eliminating the ambiguity between replace/append/union semantics; (d) add an `Examples:` line showing kebab-case + repetition: `claw --allowed-tools read --allowed-tools glob "summarize Cargo.toml"`; (e) add a help-vs-dispatch alias-coverage regression test that asserts every flag-name-string-literal in `parse_args` (including OR-patterns and `starts_with` prefix arms) appears at least once in the captured `print_help_to` output — mechanically forces help-vs-dispatch alias sync for future flags; (f) extend the test to cover `=`-form variants by asserting that any flag accepting `--foo VAL` form also has its `--foo=VAL` form documented when both arms exist; (g) audit other listed flags for the same incomplete-shape-coverage sub-shape: `--output-format` (does the parser accept `--output-format=json`? — yes per `:889`, but help shows only the space-form), `--permission-mode` (same — `:893` accepts `=`-form, help shows only space-form), `--model` (verify), `--base-commit` after #270 fix lands. Acceptance: `claw --help` discloses every shape variant the parser accepts for every listed flag; the regression test fails when a new alias/equals-form arm is added without a help update; #263 + #270 + #276 together close the help-contract-drift cluster on all three structurally distinct help-vs-dispatch divergence shapes.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 16:14 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `0240cad` before filing (post-rebase fast-forward onto gaebal-gajae's #275 `claw doctor` provenance fragmentation pinpoint). Cluster delta: help-contract-drift cluster 2→3 (#263 stale-mode-matrix + #270 whole-flag-omitted + #276 listed-flag-incomplete-shape-coverage = three-sub-axis audit triangle complete on the help surface); complementary-pinpoint-pair-bundle discovery-pattern extended to 5 pair-bundles + 2 three-tuples (turn-budget #262+#264+#272 + help-contract-drift #263+#270+#276). Smaller-scope by design (matches #253-#275 context-budget discipline). Sister: #263 (stale-mode-matrix sub-axis), #270 (whole-flag-omitted sub-axis); #276 occupies the third structurally distinct sub-axis (listed-flag-incomplete-shape-coverage). Distinct from #258 (silent-coercion of empty value at parse boundary on the same flag; orthogonal layer — runtime acceptance vs help advertisement). Distinct from turn-budget cluster three-tuple (#262+#264+#272: parse/primitive/spec layers; #263+#270+#276 are three sub-axes within a single layer — the help surface). Concurrent-dogfood-rebase parity will be confirmed local==origin==fork at HEAD `0240cad+#276` after push.
|
||||
|
||||
## Pinpoint #277 — Dogfood reminder delivery can fail with bare `Unknown Channel` because the nudge/report path does not pre-resolve and validate channel targets against the live provider directory before attempting send
|
||||
|
||||
Dogfooded 2026-04-26 16:30 KST from the live `#clawcode-building-in-public` loop after #276. The dogfood reminder/status loop emitted `Cron job "clawcode-dogfood-cycle-reminder" failed: Error: Unknown Channel` in the same channel that was otherwise actively receiving git hooks, Jobdori reports, and Clawhip nudges. The visible error contains no target channel id/name, no provider account, no guild id, no route key, no whether the channel was deleted vs not in cache vs wrong provider vs permission denied, and no retry/fallback target.
|
||||
|
||||
Concrete failure mode: a recurring dogfood nudge can die before delivery because the configured target cannot be resolved at send time, but the only surfaced signal is a bare provider error. Operators cannot tell whether the cron used a stale channel id, a name instead of id, the wrong Discord account/guild, a missing allowlist route, or a transient directory/cache miss. This is distinct from #269: #269 covers payload-size truncation and post-send delivery receipts; #277 covers pre-send target resolution and channel identity validation before any payload is sent.
|
||||
|
||||
Gap. There is no typed `channel_resolution` preflight in the dogfood delivery path. A robust cycle should resolve `{provider, guild_id, channel_id, channel_name, route_key}` before rendering/sending the report, cache the resolved identity with a freshness timestamp, and fail closed with a typed diagnostic if the target is unknown. The current surface lets a low-level `Unknown Channel` bubble up with no context, which makes the next action ambiguous and risks repeated cron failures against the same bad target.
|
||||
|
||||
Required fix shape: (a) add a channel-target preflight before dogfood reminder send that resolves the configured target to a canonical channel id/guild/provider tuple; (b) emit a typed `delivery_target_resolution_failed { provider, configured_target, guild_id, reason, directory_freshness_ms, fallback_targets }` event instead of bare `Unknown Channel`; (c) distinguish `not_found`, `permission_denied`, `wrong_provider`, `wrong_guild`, `cache_stale`, and `deleted_or_archived`; (d) include the resolved target tuple in successful delivery receipts (#269 sibling) so later reports can prove which channel was used; (e) add regression coverage where a stale channel id fails at preflight with a typed diagnostic and does not attempt message send. Acceptance: dogfood cron never surfaces a naked `Unknown Channel`; it reports the exact configured target, resolution failure class, and safe next action.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 16:32 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `cad7bb1` before filing. Cluster delta: dogfood-delivery-target-resolution +1; sibling to #269 (payload/delivery receipt) and #246 (cron timeout ambiguity), distinct pre-send channel identity layer. Concrete delta this cycle: ROADMAP-only pinpoint appended from live `Unknown Channel` cron failure evidence.
|
||||
|
||||
## Pinpoint #278 — `Session::from_jsonl` and `Session::from_json` parse the `version` field, store it verbatim, but never compare it against `SESSION_VERSION`, so a session file with any `u32` version (past, future, or corrupted) loads successfully and is silently treated as the current schema with default-filled-or-dropped fields
|
||||
|
||||
Dogfooded 2026-04-26 16:34 KST on `feat/jobdori-168c-emission-routing` at HEAD `4e4edc8` (post fast-forward onto gaebal-gajae's #277 channel-resolution preflight pinpoint). Static audit of `rust/crates/runtime/src/session.rs` shows `const SESSION_VERSION: u32 = 1;` at line 12, and three call sites that touch the field: `Session::new` at line 162 sets `version: SESSION_VERSION` on creation; `Session::from_json` at lines 338-343 parses `version` via `required_u32` and returns `SessionError::Format("version out of range")` only when `u32::try_from` fails; `Session::from_jsonl` at lines 406+445 initializes a local `let mut version = SESSION_VERSION;` and overwrites it from the `session_meta` record's `version` field via `required_u32`. In every loader path the parsed value is stored in the returned `Session.version` field without any comparison against `SESSION_VERSION`. `grep -rn "SESSION_VERSION" rust/crates/runtime/src/session.rs` returns exactly three hits — declaration + two assignment sites — and zero comparison/migration/reject sites. `grep -iE "migrat|incompat|upgrade|downgrade|reject|mismatch|future|forward" rust/crates/runtime/src/session.rs` returns zero matches.
|
||||
|
||||
Concrete failure mode: a session JSONL produced by a future claw-code release with `version: 2` and new schema fields (e.g. an unforeseen `tool_call_render_kind` per #274, a `workspace_provenance` block per #275, a `health_check_history` array, or a renamed `compaction` shape) is loaded by an older claw-code build. The older build silently accepts `version: 2`, drops every unknown record type via `"unsupported JSONL record type at line {}: {other}"` (which is structurally fine for forward-compat), but stores `self.version = 2` in memory, then on next save writes back a `session_meta` record with `version: 2` mixed with v1-shape data. Symmetrically, a corrupted or hand-edited session with `version: 999` or `version: 0` loads without warning and round-trips as if it were the live schema. There is no operator-visible signal — no warning log, no typed error, no `--strict-version` opt-in — that the on-disk schema does not match the binary's expectations. Combined with #259/#271/#273/#275 (provenance fragmentation across dogfood/status/doctor surfaces), this means a session file's schema-of-record is not auditable from any product surface either.
|
||||
|
||||
Gap. There is no version-mismatch policy. The on-disk `version` field is treated as an opaque tag rather than a contract. A correct loader for a versioned format must either (i) accept only `version == SESSION_VERSION` and reject everything else with a typed `SchemaVersionMismatch { found, expected }` error, (ii) maintain an explicit migration table that upgrades older versions to the current shape and refuses unknown future versions, or (iii) document a forward-compat policy with explicit field-level handling rules. claw-code does none of these; the field is parsed for storage only. This is structurally identical to a database without a schema_version column being silently bumped — the data still loads, but downstream consumers cannot tell whether they got the schema they expected.
|
||||
|
||||
Distinct from #259 (dogfood report provenance — runtime emission, not on-disk persistence). Distinct from #271 (repo-identity guard — workspace remote provenance, not session schema). Distinct from #273/#275 (status/doctor diagnostic-surface provenance fragmentation — product surface field layout, not session loader semantics). Distinct from #266 (typed-error-kind for credentials missing — error-kind for one specific runtime decision, not a missing-error-kind for schema versioning). Distinct from the entire help-contract-drift cluster (#263+#270+#276 — CLI surface vs dispatcher), the turn-budget triangle (#262+#264+#272 — parse/primitive/spec layers), the MCP-axis cluster (#254+#268+#274+#275 — MCP runtime/catalog/rendering/doctor), and the provenance quartet (#259+#271+#273+#275 — provenance surfacing). #278 founds a NEW `persisted-schema-version-policy` cluster on the persistence-layer axis — the first pinpoint to target what the loader does (or fails to do) with the `version` field on disk rather than what the diagnostic surface emits about state.
|
||||
|
||||
Discovery-pattern continuation: extends the structural-gap-without-source-change discovery-pattern (silent-fallback cluster style — accept input that should be rejected) into the on-disk persistence layer. Pairs structurally with the silent-fallback cluster: silent-fallback accepts malformed CLI/runtime input without a typed error; #278 accepts wrong-version on-disk state without a typed error. Both are structurally-absent-error-kind gaps but in different layers (input vs persisted state). Pairs orthogonally with the provenance quartet: provenance is about emitting state-of-record at runtime; #278 is about validating state-of-record at load. Together they form a state-of-record cross-axis bundle (emission × validation).
|
||||
|
||||
Required fix shape: (a) add a typed `SessionError::SchemaVersionMismatch { found: u32, expected: u32, policy: VersionPolicy }` variant where `VersionPolicy` is `Strict | MigrationAvailable | ForwardCompatibleReadOnly`; (b) at the top of `from_json` and after the `session_meta` parse in `from_jsonl`, compare the parsed `version` against `SESSION_VERSION` and short-circuit when not equal under the configured policy; (c) introduce a small `migrate_session(version: u32, raw: &JsonValue) -> Result<Session, SessionError>` table even if the only entry today is `1 -> 1` identity, so future versions land with one well-known extension point; (d) when loading a future version under a `ForwardCompatibleReadOnly` policy, refuse to write back to the same path (preserve the original) and surface a one-time warning; (e) extend tests to cover `version: 0` (rejected/migrated), `version: 2` (rejected or read-only), `version: 999` (rejected), and a missing `version` field (rejected with a clear message rather than silently defaulting to `SESSION_VERSION`); (f) add a session-version provenance line to `claw status` and `claw doctor` output (closes the #273/#275 surface gap for this specific schema-of-record dimension) so operators can inspect on-disk schema age vs binary schema age without scraping JSONL. Acceptance: a session file with any version other than `SESSION_VERSION` produces a typed, surface-visible diagnostic before any in-memory state is mutated; the migration table is the single extension point for future bumps; `claw status` and `claw doctor` show `session_schema_version` as a first-class provenance field.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 16:34 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `4e4edc8` before filing (post fast-forward onto gaebal-gajae's #277 channel-resolution preflight). Cluster delta: founds NEW `persisted-schema-version-policy` cluster (1 member) on the persistence-layer axis. Cross-axis bundle with silent-fallback cluster (input vs persisted state, structurally-absent-error-kind) and with provenance quartet #259+#271+#273+#275 (emission vs load-validation of state-of-record). Concrete delta this cycle: ROADMAP-only pinpoint appended after static audit of `Session::from_json`/`Session::from_jsonl` version-handling arms — three call sites for `SESSION_VERSION` (1 declaration + 2 assignments, 0 comparisons), zero migration/mismatch sites in the entire `runtime/src/session.rs` file. Concurrent-dogfood-rebase parity will be confirmed local==origin==fork at HEAD `4e4edc8+#278` after push.
|
||||
|
||||
## Pinpoint #279 — Session loader silently drops unknown fields inside known JSON/JSONL records with no extension policy or preservation map, so future schema data can be erased even if #278 adds version checks later
|
||||
|
||||
Dogfooded 2026-04-26 17:02 KST on `feat/jobdori-168c-emission-routing` at HEAD `6c154c9` (post-#278). Static audit of `rust/crates/runtime/src/session.rs` shows the loader rejects unknown top-level JSONL record `type` values (`unsupported JSONL record type` at lines 476-480), but for known records it cherry-picks recognized fields and drops all extras. `Session::from_json` reads `version`, `messages`, timestamps, `compaction`, `fork`, `workspace_root`, `prompt_history`, and `model`, then constructs `Session` with no `extensions` / `unknown_fields` preservation. `from_jsonl` does the same for `session_meta`, `message`, `compaction`, and `prompt_history`: any future field inside a known record (for example #268's `tool_catalog_revision`, #272's `ZeroTurnInvocation`, or #273's workspace provenance) is ignored on load and omitted on the next save/render.
|
||||
|
||||
Concrete failure mode: a future v2 session can add fields under known record types and be loaded by a v1 binary without an error, warning, or preservation. If the session is later saved/rotated/compacted, those fields disappear. #278 catches that the `version` tag itself is never compared, but even with a version comparison policy the field-level behavior needs a separate contract: should unknown fields in known records be rejected, preserved, quarantined, or ignored? Today the answer is implicit silent data loss.
|
||||
|
||||
Gap. The session persistence format lacks an extension/unknown-field policy. It is neither strict (fail on unknown fields in known records) nor forward-compatible (preserve unknown fields for round-trip) nor explicitly lossy (emit a warning/receipt when dropping them). This is distinct from #278, which targets the `version` field not being compared; #279 targets the per-record field preservation policy after a record type is accepted. It is also distinct from #259/#273 provenance emission gaps: this is load/save behavior for persisted state-of-record.
|
||||
|
||||
Required fix shape: (a) define a schema policy for unknown fields in known session records: strict reject for incompatible versions OR lossless preservation via `extensions: BTreeMap<String, JsonValue>`; (b) if rejecting, include record type, line number, and field names in a typed `SessionSchemaError::UnknownFields`; (c) if preserving, round-trip unknown fields through `to_json`, `render_jsonl_snapshot`, compaction, and rotation; (d) add `schema_extensions_dropped` warning telemetry if any field is intentionally ignored; (e) regression-test JSON and JSONL sessions containing a future `tool_catalog_revision` field inside `session_meta` and a future per-message field, proving they either fail typed or survive a load-save round-trip. Acceptance: a future schema field in a known record can never disappear silently.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 17:02 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `6c154c9` before filing. Cluster delta: persisted-schema-version-policy 1→2 (#278 version-comparison + #279 unknown-field policy); sibling to #278, distinct field-preservation layer. Concrete delta this cycle: ROADMAP-only pinpoint appended after static audit of `Session::from_json` / `from_jsonl` field selection.
|
||||
|
||||
## Pinpoint #280 — Hook execution progress events (`PreToolUse`/`PostToolUse` Started/Completed/Cancelled) are eprintln-text-only when `emit_output=true` and dropped on the floor entirely when `--output-format json` is selected, so the per-turn JSON envelope shows `tool_uses`/`tool_results`/`auto_compaction`/`usage` but zero hook execution evidence even though hooks fire and can deny/mutate/cancel tools
|
||||
|
||||
Dogfooded 2026-04-26 17:05 KST on `feat/jobdori-168c-emission-routing` at HEAD `bdcf3fa` (post fast-forward onto gaebal-gajae's #279 unknown-fields silent-drop pinpoint). Static audit of the JSON-mode dispatch path: `LiveCli::run_prompt_json` at `rust/crates/rusty-claude-cli/src/main.rs:4690-4729` calls `prepare_turn_runtime(false)` (line 4691) — the bool argument is `emit_output`. `prepare_turn_runtime` at `:4567-4587` then calls `build_runtime(..., emit_output, ...)` at `:4574-4583`, which at `:7726-7728` only attaches `with_hook_progress_reporter(Box::new(CliHookProgressReporter))` when `emit_output` is `true`. In the JSON-mode path, `emit_output=false`, so the runtime gets NO progress reporter at all. `Conversation::run_pre_tool_use_hook` at `rust/crates/runtime/src/conversation.rs:224-241` and `run_post_tool_use_hook` at `:243-273` both branch on `if let Some(reporter) = self.hook_progress_reporter.as_mut()`: with no reporter, the hook still EXECUTES (the `None` arm passes `None` for the reporter slot to `HookRunner::run_pre_tool_use_with_context`/`run_post_tool_use_with_context`), but every `HookProgressEvent::{Started, Completed, Cancelled}` emission inside `runtime/src/hooks.rs` (lines 347, 366, 376, 388, 400) is conditioned on a `reporter.on_event(...)` call that never happens. Even in the text path where `emit_output=true`, `CliHookProgressReporter::on_event` at `main.rs:7735-7762` writes to `eprintln!` only — never to stdout, never to the JSON envelope, never to a `--output-format json`-consumable structured stream. The JSON envelope at `:4699-4724` exposes ten top-level fields (`message`, `model`, `iterations`, `auto_compaction`, `tool_uses`, `tool_results`, `prompt_cache_events`, `usage`, `estimated_cost`) and zero hook evidence.
|
||||
|
||||
Concrete failure mode: an operator scripts `claw -p "do thing" --output-format json | jq` against a workspace whose `~/.claw/settings.json` defines a `PreToolUse` hook that **denies** specific tool patterns, **mutates** tool input (the `updated_input_json` path at `runtime/src/hooks.rs:149`), or **cancels** the turn (`is_cancelled`/`is_failed`/`is_denied` arms at `conversation.rs:409-440`). The conversation outcome reflects the hook decision (the tool may be denied with `PermissionOutcome::Deny { reason: "PreToolUse hook cancelled tool ..." }` at `conversation.rs:413-416`), and the deny reason flows into a `tool_result` synthesized at the permission layer — but the JSON envelope never records that a hook fired, what hook (`pre_tool_use`/`post_tool_use`/`pre_tool_use_failure`/`post_tool_use_failure`), what command, what exit code, what abort path, what stdout/stderr preview, or even how many hook invocations happened across the turn. The operator's only signal that a hook touched the turn is reverse-inference from a `tool_result` reason string, which is itself a #266-style untyped-prose surface. For an automation that wants to verify "the security-audit PreToolUse hook ran on every Write/Bash invocation in this turn," the JSON envelope is structurally incapable of answering — there is no `hooks: [...]` array, no `hook_invocations: N`, no `pre_tool_use_outcomes`, no per-invocation receipt. Symmetrically, when `emit_output=true` (text mode), hook events go to stderr as `[hook pre_tool_use] BashTool: pretty.sh` prose, which is human-readable but not structured and competes with model streaming text on the same terminal.
|
||||
|
||||
Gap. Hook execution observability is wired into the runtime (`HookProgressEvent` enum at `runtime/src/hooks.rs:40-58`, `HookProgressReporter` trait at `:59-61`, three reporter call sites at `:347/366/388/400`) but only ever surfaces as opt-in stderr text, never as a structured channel that aligns with the existing `tool_uses`/`tool_results` JSON-envelope schema. The structural shape is identical to #107 (doctor-side hook subsystem opacity) but at a strictly different layer: #107 is about `claw doctor` hook-config visibility (audit-once); #280 is about per-turn `--output-format json` hook-execution visibility (per-prompt evidence). It is also adjacent to but distinct from #265 (`stream-json` mode entirely absent — no streaming lane at all) — #265 is about the missing output mode, #280 is about a missing field within the existing JSON output mode. Distinct from #260 (`--compact --output-format json` strips six fields: hooks were never one of those six in EITHER non-compact or compact envelope; this is a third structural absence, not a strip-on-compact). Distinct from #109 (config validator warnings stderr-only) — different subsystem, same plumbing pattern (structured-data-relegated-to-stderr-prose). Distinct from #259/#273/#275 provenance quartet — provenance emits state-of-record fields; #280 is per-turn execution-event evidence.
|
||||
|
||||
Founds NEW **`Hook-execution-event-envelope-coverage`** cluster on the per-turn-observability axis (1 member, #280 solo founder). Pairs with the silent-fallback family on the structurally-absent-evidence axis: silent-fallback accepts malformed input without a typed error; #280 accepts hook execution without a typed receipt. Pairs with #107 (doctor-side hook opacity) on the hooks-subsystem-observability axis to form a **hook-observability-pair** spanning both diagnostic surfaces (audit-once doctor + per-turn JSON envelope). Pairs with #265 (stream-json absent) on the structured-output-axis to form a **JSON-output-completeness-pair**: #265 catalogues the missing streaming output mode entirely, #280 catalogues a missing field within the one-shot JSON envelope that does exist. Extends the **complementary-pinpoint-pair-bundle** discovery-pattern: #280 forms the seventh pair-bundle (#107 + #280 hook-observability-spanning-doctor-and-runtime).
|
||||
|
||||
Distinct from #266 (typed-error-kind enumeration — the runtime needs typed discriminants on `RuntimeError`; #280 is about hook-execution evidence as a positive-path field, not error-kind taxonomy on the failure path). Distinct from #278 (`SESSION_VERSION` never compared on load — persistence layer; #280 is the runtime/CLI envelope layer) and from gaebal-gajae's #279 (unknown-fields silent-drop in known JSONL records — also persistence; #280 is per-turn output emission, not on-disk storage).
|
||||
|
||||
Required fix shape: (a) extend `run_prompt_json` envelope at `main.rs:4699-4724` with a top-level `hooks: [HookInvocationReceipt]` array where each entry is `{ event: "pre_tool_use" | "post_tool_use" | "pre_tool_use_failure" | "post_tool_use_failure", tool_name, tool_use_id, command, started_at, completed_at, duration_ms, outcome: "completed" | "cancelled" | "denied" | "failed", exit_code: Option<i32>, stdout_preview: Option<String>, stderr_preview: Option<String>, updated_input_changed: bool, permission_override: Option<String> }`; (b) add a JSON-mode `HookProgressReporter` impl that buffers `HookProgressEvent`s into a `Vec<HookInvocationReceipt>` instead of writing to stderr, attached unconditionally in `prepare_turn_runtime` (drop the `emit_output` gate at `main.rs:7726`); (c) align the `hooks` field with the existing `tool_uses`/`tool_results` array schema so consumers who already index by `tool_use_id` can correlate hook receipts to the tool they fired around; (d) extend `run_prompt_compact_json` (`main.rs:4665-4688`) to include either the full `hooks` array or a `hook_invocations: N` count so the compact envelope does not silently strip a fourth observability dimension on top of the six already documented in #260; (e) add a top-level `hooks_summary: { pre_tool_use_count, post_tool_use_count, denied_count, failed_count, cancelled_count }` for cheap aggregate consumers; (f) regression-test a workspace whose `settings.json` defines a `PreToolUse` hook that denies one tool and a `PostToolUse` hook that runs on success, asserting the `--output-format json` envelope contains exactly two hook receipts with the correct outcomes and stdin/stdout previews; (g) document the `hooks` field in `--help` and the wire-format docs alongside `tool_uses`/`tool_results`; (h) close the loop with #107 by having `claw doctor --output-format json` add a `hooks_subsystem` block describing configured hooks (audit-once) so #107 + #280 collectively close the hook-observability-spanning-doctor-and-runtime pair. Acceptance: an operator scripting `claw -p "x" --output-format json | jq '.hooks[]'` can enumerate every hook invocation that fired during the turn with its outcome, and a security-audit hook can prove it ran without scraping stderr or reverse-engineering tool_result reason strings.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 17:05 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `bdcf3fa` before filing (post fast-forward onto gaebal-gajae's #279 unknown-fields silent-drop pinpoint). Cluster delta: founds NEW `Hook-execution-event-envelope-coverage` cluster (1 member, #280 solo founder); pair-bundle with #107 (doctor-side hook opacity); seventh complementary-pinpoint-pair-bundle in the discovery-pattern catalogue (turn-budget #262+#264 + WebSearch #245+#250 + 4 prior pairs + hook-observability #107+#280); cross-cluster with silent-fallback family (structurally-absent-evidence axis), with #265 (JSON-output-completeness pair: missing-mode vs missing-field-in-existing-mode), and with #260 (third strip dimension on top of compact-envelope's six). Concrete delta this cycle: ROADMAP-only pinpoint appended after static audit of `prepare_turn_runtime`/`build_runtime`/`CliHookProgressReporter`/`Conversation::run_pre_tool_use_hook` showing reporter is conditionally `None` in JSON mode and the JSON envelope at `:4699-4724` carries zero hook fields. Concurrent-dogfood-rebase parity will be confirmed local==origin==fork at HEAD `bdcf3fa+#280` after push.
|
||||
|
||||
## Pinpoint #281 — Dogfood filing is not a two-phase transaction: a subagent can commit/push ROADMAP successfully, crash before Discord reporting, and leave the public cycle in an ambiguous half-committed state with no recovery receipt
|
||||
|
||||
Dogfooded 2026-04-26 17:30 KST from the live #407 recovery incident. The #407 subagent successfully filed #280, pushed commit `cf32b83` to origin/fork, then crashed during Discord posting after a gateway restart / WebSocket closure. Jobdori had to manually recover by checking origin/fork parity, reading ROADMAP to learn what #280 was, and posting a recovery message. The git state was correct, but the public coordination state was incomplete until manual repair.
|
||||
|
||||
Concrete failure mode: ROADMAP commit/push and channel report are currently independent side effects with no shared transaction id or durable outbox. If the agent dies between them, downstream claws see neither a guaranteed success nor a guaranteed failure: git may contain the filing, chat may not, and the next nudge may duplicate or skip the item depending on which source it trusts. This is distinct from #269 (payload chunking/delivery receipts) and #277 (channel target resolution): those cover message delivery mechanics; #281 covers the atomicity boundary between repository mutation and public report publication.
|
||||
|
||||
Gap. There is no dogfood filing transaction ledger with phases like `planned`, `roadmap_committed`, `pushed_origin`, `pushed_fork`, `report_posted`, `recovered`. There is no durable outbox entry containing the report body before send, no idempotency key keyed by commit SHA/pinpoint id, and no automatic recovery worker that posts the missing report after restart. Manual recovery worked only because Jobdori noticed the crash and re-read git.
|
||||
|
||||
Required fix shape: (a) before committing, create a durable filing transaction record with `pinpoint_id`, branch, expected commit message, report body, target channel, and idempotency key; (b) update it after commit, origin push, fork push, and Discord send, including message id(s); (c) on agent/gateway restart, scan for records stuck at `pushed_*` but not `report_posted` and publish an idempotent recovery report; (d) include the transaction id in both commit body and Discord post so duplicates can be suppressed; (e) add tests simulating crash-after-push-before-post to prove the report is recovered exactly once. Acceptance: a successful ROADMAP push can never remain silently unreported after a crash; recovery is automatic and machine-auditable.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 17:32 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `cf32b83` before filing. Cluster delta: dogfood-filing-transactionality +1; sibling to #269/#277 delivery-layer gaps, distinct git↔chat two-phase atomicity layer. Concrete delta this cycle: ROADMAP-only pinpoint appended from #407 crash-after-push-before-post evidence.
|
||||
|
||||
## Pinpoint #282 — `--cwd` flag is parsed only by the `system-prompt` subcommand; the primary `claw -p` runtime dispatch path has no `--cwd` override and silently uses the process's `env::current_dir()` for tool execution, bash, file_ops, and git_context
|
||||
|
||||
Dogfooded 2026-04-26 17:35 KST. Static audit of `rust/crates/rusty-claude-cli/src/main.rs` shows exactly two occurrences of the literal string `"--cwd"`: one in `parse_system_prompt_args` at `:1950` (for `claw system-prompt --cwd <path>` only) and one inside a test fixture at `:10307` exercising that same subcommand. The 25+ other call sites that resolve a working directory all call `env::current_dir()` unconditionally (`:501, :514, :553, :1820, :1834, :1888, :1898, :2263, :2327, :3354, :3572, :3607, :3628, :3638, :3671, :3687, :3812, :3889, :3946, :4513, :4978, :5177, :5198, …`), meaning `claw -p "x"`, `claw run-prompt`, `claw query-tools`, `claw doctor`, `claw render-diff` and friends all operate on whatever directory the shell was in when the binary launched, with no flag to override.
|
||||
|
||||
Gap. Operators wrapping claw-code in dispatchers, schedulers, IDE agents, MCP brokers, and per-task worktree harnesses (the exact shape of the dogfood loop, of `oh-my-o p e n c o d e`, of CI runners, of subagent spawners) cannot point a single long-lived claw process at different working directories per invocation. They have to either `cd` in a wrapping shell (which is racy across concurrent invocations of one process and impossible across threads of one process) or spawn a fresh process with `Command::current_dir`. The official upstream Claude Code CLI documents `--cwd` as a top-level flag (`https://docs.anthropic.com/en/docs/agents-and-tools/claude-code/cli-reference`) so dispatchers built against upstream silently drop the directory hint when pointed at claw-code, and the bash tool, file_ops, git_context, hook execution, and session save/load all anchor on the wrong directory with no error and no warning. PARITY.md does not list `--cwd` in its supported-flags table, but the `system-prompt` subcommand's local `--cwd` parser creates a misleading half-implementation that hides the runtime-path gap from grep-based audits.
|
||||
|
||||
Distinct from #149 (working-directory permission policy), #178 (allow-broad-cwd), and #277 (channel target resolution): those govern whether a cwd is permitted, whether broad cwds are gated, and where a report is sent. #282 is upstream of all three — without a `--cwd` flag on the runtime path, the policy never receives an explicit caller-supplied directory to evaluate; it only sees the inherited process cwd. Distinct from MCP-axis (#254/#268/#274/#275) because `--cwd` governs the host-side process's filesystem anchor before any MCP server is contacted.
|
||||
|
||||
Required fix shape: (a) extend the global argument parser at `:824-1264` (where `allow_broad_cwd`, `--output-format`, `--date` and friends are wired) to recognize a top-level `--cwd <path>` flag that is canonicalized once and threaded into every command variant (`Run`, `Query`, `Doctor`, `RenderDiff`, `RunPrompt`, `RunPromptJson`, etc.) via a single `cwd: Option<PathBuf>` field on the dispatch struct; (b) replace the 25+ raw `env::current_dir()` calls inside the runtime crate with a `resolve_cwd(global_cwd_override)` helper that prefers the explicit override and falls back to `env::current_dir()` only when none was supplied; (c) audit `bash.rs`, `file_ops.rs`, `git_context.rs`, `hooks.rs`, `branch_lock.rs`, and `compact.rs` so each receives the resolved cwd as a parameter rather than re-querying `env::current_dir()` on its own; (d) reuse `enforce_broad_cwd_policy` against the explicit override so #178's policy gate triggers on caller-supplied paths; (e) add an integration test running `claw -p "pwd via bash tool" --cwd /tmp/scratch` from a different process cwd and asserting the bash tool's `pwd` output is `/tmp/scratch`, not the launcher's directory; (f) document `--cwd` in `--help`, `claw doctor --output-format json`'s capability block, and PARITY.md's supported-flags table so the upstream gap is no longer silent. Acceptance: a long-lived claw process or per-task dispatcher can point each `claw -p` invocation at a distinct working directory by flag without spawning a new OS process or mutating the parent process's cwd.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 17:38 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `db27ac2` before filing (post fast-forward onto gaebal-gajae's #281 dogfood-filing-transactionality pinpoint). Cluster delta: founds NEW `cwd-flag-runtime-dispatch-gap` cluster (1 member, #282 solo founder); cross-cluster with silent-fallback family (claw-code silently inherits parent process cwd instead of erroring on missing `--cwd`), with PARITY.md half-implementation pattern, and with #178/#149 cwd-policy-gate axis (upstream of policy enforcement). Concrete delta this cycle: ROADMAP-only pinpoint appended after static audit confirmed exactly two `"--cwd"` literal occurrences (one in `parse_system_prompt_args`, one in a test fixture), zero on the primary runtime dispatch path. Concurrent-dogfood-rebase parity will be confirmed local==origin==fork at HEAD `db27ac2+#282` after push.
|
||||
|
||||
## Pinpoint #283 — `auto_compaction_input_tokens_threshold` is only settable via environment variable `CLAUDE_CODE_AUTO_COMPACT_INPUT_TOKENS`; no config-file key and no CLI flag expose it, so the 100 000-token default is a silent constant from an operator's perspective
|
||||
|
||||
Static audit of `rust/crates/runtime/src/conversation.rs`. `new_with_features` (`:166-189`) calls `auto_compaction_threshold_from_env()` unconditionally at construction time; the builder method `with_auto_compaction_input_tokens_threshold` (`:198-201`) exists but is never called in the CLI dispatch path — `grep -n "with_auto_compaction_input_tokens_threshold" rust/crates/rusty-claude-cli/src/main.rs` returns zero results. `auto_compaction_threshold_from_env` reads `CLAUDE_CODE_AUTO_COMPACT_INPUT_TOKENS` from the process environment (`:690-697`); if absent or unparseable it falls back to `DEFAULT_AUTO_COMPACTION_INPUT_TOKENS_THRESHOLD = 100_000` (`:18, :703`). `RuntimeFeatureConfig` (`:56-68` of `config.rs`) has no `compaction_threshold` field; the config loader at `:300-340` of `config.rs` never attempts to populate one. `build_runtime_with_plugin_state` (`:7680-7740` of `main.rs`) builds `ConversationRuntime::new_with_features` from `feature_config` and never calls the builder method afterward.
|
||||
|
||||
Gap. An operator who wants to raise or lower the compaction threshold for a project (e.g., a repo with a large context that should compact at 200 000 tokens, or a tight CI harness that should compact at 50 000 tokens) has three choices: (a) set the env var before every invocation — fragile across wrappers that launch new processes without inheriting the callers env; (b) live with 100 000 — may be wrong for model or context size; (c) compile a custom binary. No `settings.json` key, no `.clawconfig` field, no `--compaction-threshold` CLI flag. The builder method proves the design allows per-runtime override but the CLI path never routes any input to it. Distinct from #282 (`--cwd` gap): #282 is about filesystem context; #283 is about conversation compaction policy. Distinct from #109/ConfigValidator: no validation failure occurs — the default simply fires silently.
|
||||
|
||||
Required fix shape: (a) add `auto_compaction_threshold: Option<u32>` to `RuntimeFeatureConfig` (`:56` of `config.rs`); (b) populate it from a `settings.json`/`.clawrc` key (e.g., `autoCompactionInputTokensThreshold`) in the config loader alongside existing feature flags; (c) add a top-level `--compaction-threshold <N>` CLI flag in the global arg parser, parsed into `CliArgs`; (d) in `build_runtime_with_plugin_state`, call `.with_auto_compaction_input_tokens_threshold(...)` with precedence: CLI flag > config file > env var > compiled default; (e) surface the resolved threshold in `claw doctor --output-format json` under a `compaction` block so operators can inspect which source won; (f) validate in `ConfigValidator` that the threshold is a positive integer and warn on values under 10 000 (probable misconfiguration). Acceptance: `claw -p "x" --compaction-threshold 200000` uses 200 000; a `settings.json` with `"autoCompactionInputTokensThreshold": 150000` uses 150 000; env var still overrides config but not CLI; `claw doctor` shows `compaction.threshold_source` as one of `cli`, `config`, `env`, `default`.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 18:00 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `b05561c` (post-rebase onto origin/main, #282 already present). Cluster delta: extends `silent-fallback-family` (threshold is silently inherited from env-only path, config/CLI lanes absent); builder method `with_auto_compaction_input_tokens_threshold` present but unreachable from CLI path. Concrete delta this cycle: ROADMAP-only pinpoint appended after static audit confirmed zero calls to `with_auto_compaction_input_tokens_threshold` in `main.rs`, no compaction field in `RuntimeFeatureConfig`, and env-var as the sole runtime override. Concurrent-dogfood-rebase parity will be confirmed local==origin==fork after push.
|
||||
|
||||
## Pinpoint #284 — `/ultraplan` is documented as deep multi-step planning but the REPL implementation only prints a static three-line placeholder, so users complaining it is difficult to use are hitting a contract/behavior void rather than a UX copy issue
|
||||
|
||||
Dogfooded 2026-04-26 18:24 KST after Sigrid reported that users are complaining `ultraplan` is difficult to use. Static audit found the documented contract in `USAGE.md`: `/ultraplan [task]` is described as "Deep planning with multi-step reasoning" and promises "a structured plan with numbered steps, reasoning for each step, and expected outcomes." The actual REPL dispatch path in `rust/crates/rusty-claude-cli/src/main.rs` routes `SlashCommand::Ultraplan { task }` to `self.run_ultraplan(task.as_deref())`, and `run_ultraplan` only executes `println!("{}", format_ultraplan_report(task));`. `format_ultraplan_report` returns a static three-line report: `Task`, `Action break work into a multi-step execution plan`, and `Output plan should cover goals, risks, sequencing, verification, and rollback`. No internal prompt is run, no planning tool is invoked, no progress reporter is attached, no persisted plan artifact is created, and no numbered plan is produced.
|
||||
|
||||
Concrete failure mode: a user runs `/ultraplan refactor auth`, expecting an actual deep plan because docs and help say so, but receives meta-instructions about what a plan should contain. The command looks successful yet produces no usable plan. This explains "difficult to use" complaints better than a wording issue: the command is discoverable and documented, but its behavior is a placeholder masquerading as a product feature.
|
||||
|
||||
Gap. There is no product contract boundary separating implemented slash commands from scaffold/stub commands for `/ultraplan`. `STUB_COMMANDS` filters some unimplemented commands from help/completion, but `/ultraplan` is not filtered because it has a handler; the handler is still functionally stubbed. This is distinct from #280 hook-envelope opacity and #283 hidden env-only config: #284 is a user-facing slash-command promise vs runtime behavior gap.
|
||||
|
||||
Required fix shape: choose one of two explicit paths. Product path: implement `/ultraplan` by calling the internal prompt/runtime with an `InternalPromptProgressReporter::ultraplan`, persist the generated plan in session state, include numbered steps/risks/verification/rollback, and support JSON/resume behavior if appropriate. Honesty path: demote `/ultraplan` to a clearly labeled placeholder/stub, remove the "deep planning" promise from USAGE/help, and point users to the actual planning workflow. Acceptance: running `/ultraplan <task>` either returns a real structured plan or clearly refuses as not implemented; it must not succeed with a static meta-template.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 18:25 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `1a7b8ea` before filing. Cluster delta: slash-command-contract-vs-runtime +1; concrete user-signal source: Sigrid report of `ultraplan` usability complaints. Concrete delta this cycle: ROADMAP-only follow-up appended from docs/code audit.
|
||||
|
||||
## Pinpoint #285 — Provider/model/websearch selection is split across hard-coded registries and env vars instead of a single settings-file contract, blocking user-requested multi-provider/multi-model config and swappable search backends
|
||||
|
||||
Dogfooded 2026-04-26 18:27 KST from Sigrid's channel request to make search engine selection and multi-provider/multi-model declarations configurable in settings. Static audit shows the runtime settings layer only parses a single optional `model`, `aliases`, permission settings, MCP, plugins, sandbox, OAuth, provider fallbacks, and trusted roots in `rust/crates/runtime/src/config.rs`. There is no structured `providers`, `models`, or `websearch` config section. Provider routing is still mostly code/env driven: `rust/crates/api/src/providers/mod.rs` has a hard-coded `MODEL_REGISTRY`, prefix checks (`claude`, `grok`, `openai/`, `qwen`, `kimi`), and env-var based base URL/auth resolution. `ProviderClient::from_model` dispatches from the model string and env metadata rather than a loaded provider graph. `WebSearch` ignores runtime settings entirely: `build_search_url` uses `CLAWD_WEB_SEARCH_BASE_URL` if set, otherwise DuckDuckGo HTML search.
|
||||
|
||||
Concrete failure mode: a user wants one settings file to declare `providers.lmstudio = { type: "openai", url: "http://.../v1" }`, `models[] = { name, provider, maxContext }`, default `model`, default permission mode, and `websearch = { provider: "tavily", apiKey: ... }`. Today the model name can be set, but the provider endpoint/auth/model metadata/search backend cannot be expressed as first-class config. Users must rely on global env vars, hard-coded model prefix heuristics, and hidden DuckDuckGo/base-url behavior, which makes local LM Studio/vLLM/Ollama, hosted OpenAI-compatible providers, and Tavily/Brave/search-provider swaps difficult to reason about and impossible to inspect via `claw doctor` as one coherent source of truth.
|
||||
|
||||
Gap. Claw Code lacks a declarative provider graph and websearch backend contract in `settings.json`. This is distinct from #283 (one compaction threshold only env-settable): #285 is the broader provider/search capability plane. It also intersects with #273/#275 provenance because `status`/`doctor` cannot report the real provider source-of-truth if it lives partly in env and partly in model-prefix code.
|
||||
|
||||
Required fix shape: (a) add schema-backed settings sections for `providers`, `models`, and `websearch` with safe secret handling (support env indirection for API keys instead of encouraging raw key commits); (b) define precedence `CLI > local/project/user config > env > built-in defaults`; (c) make `ProviderClient` resolve from the merged config graph, including custom OpenAI-compatible base URLs, auth env/key refs, max context, max output, and reasoning/tool quirks; (d) make `WebSearch` dispatch through configured providers such as DuckDuckGo, Tavily, Brave, or custom base URL; (e) surface the resolved provider/model/search backend in `claw status --output-format json` and `claw doctor`; (f) add tests for LM Studio-style OpenAI-compatible config, multi-model selection, and Tavily-style search backend config without leaking raw API keys in output. Acceptance: the user-requested provider/model/search shape can be placed in settings, resolved deterministically, and audited without relying on undocumented env-only behavior.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 18:28 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `92a598e` before filing. Cluster delta: declarative-provider-websearch-config +1; concrete user-signal source: Sigrid request in #clawcode-building-in-public. Concrete delta this cycle: ROADMAP-only follow-up appended from config/provider/websearch audit.
|
||||
|
||||
## Pinpoint #286 — Parallel `Agent` execution can leave forever-running manifests because background thread lifecycle is not durable across process/gateway death and has no heartbeat/stale reaper
|
||||
|
||||
Dogfooded 2026-04-26 18:32 KST after Sigrid requested heavy dogfooding around parallel execution and async execution because users report mistakes there. Static audit of `rust/crates/tools/src/lib.rs` shows `execute_agent` writes an `AgentOutput` manifest with `status: "running"`, `derivedState: "working"`, and a `lane.started` event, then calls `spawn_agent_job`. `spawn_agent_job` launches a detached `std::thread::Builder::spawn` closure and immediately returns `Ok(())`; the `JoinHandle` is discarded. The only transition out of `running` happens inside the in-process thread via `run_agent_job` → `persist_agent_terminal_state(..., "completed"|"failed")`, or if spawn itself fails before the thread starts.
|
||||
|
||||
Concrete failure mode: if the parent process/gateway crashes, restarts, OOMs, or is killed after the `running` manifest is written but before the detached thread persists terminal state, the manifest remains `running` forever. There is no durable job queue, PID/thread identity, heartbeat timestamp, lease, resume record, or stale reaper. `derive_agent_state("running", ..)` always returns `working`, so downstream parallel/team coordination sees the lane as active rather than `orphaned`, `lost`, or `needs_recovery`. This is exactly the class of parallel/async mistake users notice: a lane looks alive because a JSON file says `running`, not because any worker is actually executing.
|
||||
|
||||
Gap. Agent parallelism has a fire-and-forget in-process thread model but reports as durable background execution. Tests cover spawn failure and fake completion/failure, but they do not simulate crash-after-running-manifest-before-terminal-state, dropped `JoinHandle`, process restart, stale heartbeat, or reaper classification. This is distinct from #281 dogfood git↔Discord transactionality: #286 is runtime lane lifecycle durability for parallel worker execution.
|
||||
|
||||
Required fix shape: (a) persist a durable agent job record with `agent_id`, owner process id/start time, heartbeat timestamp, and phase before spawning; (b) either retain/track `JoinHandle`s in a supervisor or move execution to a durable worker queue; (c) update heartbeat during long `run_turn` execution; (d) on startup/tool access, scan manifests stuck in `running` beyond a lease and classify them as `orphaned_worker` / `needs_recovery` instead of `working`; (e) expose stale/orphaned lane state in Agent/Team status and lane events; (f) regression-test crash-after-manifest-before-terminal-state by creating a running manifest with stale heartbeat and verifying the reaper emits a typed blocker. Acceptance: a parallel Agent lane cannot remain silently `running` forever after its executor disappears.
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 18:33 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `639e1e3` before filing. Cluster delta: parallel-agent-lifecycle-durability +1; concrete user-signal source: Sigrid request to dogfood parallel/async execution mistakes. Concrete delta this cycle: ROADMAP-only pinpoint appended from Agent spawn/lifecycle audit.
|
||||
|
||||
## Pinpoint #287 — Auto-compaction is reactive-after-success instead of preflight-before-request, so oversized resumed sessions can hit context-window failure and “session broke / auto-compact did not work” before compaction ever runs
|
||||
|
||||
Dogfooded 2026-04-26 18:38 KST after Sigrid reported frequent session breakage where sessions are not maintained and auto-compaction does not appear to work. Static audit of `rust/crates/runtime/src/conversation.rs` shows `run_turn` calls `maybe_auto_compact()` only after the assistant/tool loop completes successfully and after provider usage has been recorded. `maybe_auto_compact` checks `self.usage_tracker.cumulative_usage().input_tokens` against `auto_compaction_input_tokens_threshold`; that usage is reconstructed from prior assistant message usage and updated from successful provider events, not from a preflight estimate of the prompt/session that is about to be sent. If the next request is already too large and the provider returns `context_window_blocked` before a successful usage event, `maybe_auto_compact` is never reached. CLI error formatting then tells the user to run `/compact` manually, which is exactly the visible failure mode: session continuity breaks first, auto-compact never fires.
|
||||
|
||||
Concrete failure mode: a long/resumed session grows near or beyond model context. The next turn is sent without preflight compaction because current auto-compaction is only post-turn. The provider rejects the request for context window size, `run_turn` returns `Err`, the runtime shuts down plugins, and no compaction is persisted. The user sees a broken session/context-window error and must manually recover with `/compact`, despite auto-compaction being advertised as protecting long sessions.
|
||||
|
||||
Gap. Auto-compaction lacks a pre-request guard based on `estimate_session_tokens(&session) + estimated_new_prompt_tokens + requested_output_tokens` and lacks a retry path that compacts and resends after a typed context-window failure. This is distinct from #283 (threshold config is env-only): #287 is the timing/trigger semantics that make auto-compaction fail in the exact oversized-session case users expect it to handle. It also intersects with session-maintenance complaints because failed turns do not persist a compacted recovery state.
|
||||
|
||||
Required fix shape: (a) add a preflight auto-compact phase before provider dispatch using estimated session/request size and model context metadata; (b) include the threshold, estimated session tokens, estimated request tokens, and context window in a typed `auto_compaction_preflight` event/status surface; (c) after `context_window_blocked`, optionally run a safe compact-and-retry once, with an explicit receipt; (d) persist the compacted session before retry so session continuity is recoverable even if the retry fails; (e) surface whether compaction was skipped because the session was below threshold, no messages were removable, or compaction would not fit; (f) add regression coverage where a resumed oversized session compacts before request and does not hit provider context-window rejection first. Acceptance: an oversized maintained session gets compacted or fails with a typed “not compactable” reason before provider context-window failure, never with silent “auto-compact did not run.”
|
||||
|
||||
**Status:** Open. No source code changed. Filed 2026-04-26 18:39 KST. Branch: feat/jobdori-168c-emission-routing. HEAD: `79eeaae` before filing. Cluster delta: session-continuity-auto-compaction-semantics +1; concrete user-signal source: Sigrid report of frequent session breakage and auto-compaction not working. Concrete delta this cycle: ROADMAP-only pinpoint appended from auto-compaction trigger audit.
|
||||
|
||||
@@ -9192,136 +9192,44 @@ fn permission_policy(
|
||||
}
|
||||
|
||||
fn convert_messages(messages: &[ConversationMessage]) -> Vec<InputMessage> {
|
||||
let mut converted = Vec::new();
|
||||
let mut index = 0;
|
||||
|
||||
while index < messages.len() {
|
||||
let message = &messages[index];
|
||||
match message.role {
|
||||
MessageRole::Assistant => {
|
||||
let tool_use_ids = message
|
||||
.blocks
|
||||
.iter()
|
||||
.filter_map(|block| match block {
|
||||
ContentBlock::ToolUse { id, .. } => Some(id.clone()),
|
||||
_ => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
let (tool_result_blocks, next_index) = if tool_use_ids.is_empty() {
|
||||
(Vec::new(), index + 1)
|
||||
} else {
|
||||
collect_immediate_tool_results(messages, index + 1)
|
||||
};
|
||||
let has_all_tool_results = !tool_use_ids.is_empty()
|
||||
&& tool_use_ids.iter().all(|id| {
|
||||
tool_result_blocks.iter().any(|block| {
|
||||
matches!(block, InputContentBlock::ToolResult { tool_use_id, .. } if tool_use_id == id)
|
||||
})
|
||||
});
|
||||
let paired_tool_result_blocks = if has_all_tool_results {
|
||||
tool_result_blocks
|
||||
.into_iter()
|
||||
.filter(|block| {
|
||||
matches!(block, InputContentBlock::ToolResult { tool_use_id, .. } if tool_use_ids.contains(tool_use_id))
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
} else {
|
||||
Vec::new()
|
||||
};
|
||||
let content = message
|
||||
.blocks
|
||||
.iter()
|
||||
.filter_map(|block| match block {
|
||||
ContentBlock::Text { text } => Some(InputContentBlock::Text {
|
||||
text: text.clone(),
|
||||
}),
|
||||
ContentBlock::ToolUse { id, name, input } if has_all_tool_results => {
|
||||
Some(InputContentBlock::ToolUse {
|
||||
id: id.clone(),
|
||||
name: name.clone(),
|
||||
input: serde_json::from_str(input)
|
||||
.unwrap_or_else(|_| serde_json::json!({ "raw": input })),
|
||||
})
|
||||
}
|
||||
ContentBlock::ToolUse { .. } | ContentBlock::ToolResult { .. } => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
if !content.is_empty() {
|
||||
converted.push(InputMessage {
|
||||
role: "assistant".to_string(),
|
||||
content,
|
||||
});
|
||||
}
|
||||
if has_all_tool_results && !paired_tool_result_blocks.is_empty() {
|
||||
converted.push(InputMessage {
|
||||
role: "user".to_string(),
|
||||
content: paired_tool_result_blocks,
|
||||
});
|
||||
index = next_index;
|
||||
} else {
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
MessageRole::Tool => {
|
||||
// Anthropic requires tool_result blocks to appear in the user message
|
||||
// immediately following their assistant tool_use. A bare Tool-role
|
||||
// message here is orphaned (for example after a resume/edit/compaction
|
||||
// boundary) and would be rejected with a provider 400.
|
||||
index += 1;
|
||||
}
|
||||
MessageRole::System | MessageRole::User => {
|
||||
let content = message
|
||||
.blocks
|
||||
.iter()
|
||||
.filter_map(|block| match block {
|
||||
ContentBlock::Text { text } => Some(InputContentBlock::Text {
|
||||
text: text.clone(),
|
||||
}),
|
||||
ContentBlock::ToolUse { .. } | ContentBlock::ToolResult { .. } => None,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
if !content.is_empty() {
|
||||
converted.push(InputMessage {
|
||||
role: "user".to_string(),
|
||||
content,
|
||||
});
|
||||
}
|
||||
index += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
converted
|
||||
}
|
||||
|
||||
fn collect_immediate_tool_results(
|
||||
messages: &[ConversationMessage],
|
||||
start: usize,
|
||||
) -> (Vec<InputContentBlock>, usize) {
|
||||
let mut blocks = Vec::new();
|
||||
let mut index = start;
|
||||
while let Some(message) = messages.get(index) {
|
||||
if message.role != MessageRole::Tool {
|
||||
break;
|
||||
}
|
||||
blocks.extend(message.blocks.iter().filter_map(|block| match block {
|
||||
ContentBlock::ToolResult {
|
||||
tool_use_id,
|
||||
output,
|
||||
is_error,
|
||||
..
|
||||
} => Some(InputContentBlock::ToolResult {
|
||||
tool_use_id: tool_use_id.clone(),
|
||||
content: vec![ToolResultContentBlock::Text {
|
||||
text: output.clone(),
|
||||
}],
|
||||
is_error: *is_error,
|
||||
}),
|
||||
ContentBlock::Text { .. } | ContentBlock::ToolUse { .. } => None,
|
||||
}));
|
||||
index += 1;
|
||||
}
|
||||
(blocks, index)
|
||||
messages
|
||||
.iter()
|
||||
.filter_map(|message| {
|
||||
let role = match message.role {
|
||||
MessageRole::System | MessageRole::User | MessageRole::Tool => "user",
|
||||
MessageRole::Assistant => "assistant",
|
||||
};
|
||||
let content = message
|
||||
.blocks
|
||||
.iter()
|
||||
.map(|block| match block {
|
||||
ContentBlock::Text { text } => InputContentBlock::Text { text: text.clone() },
|
||||
ContentBlock::ToolUse { id, name, input } => InputContentBlock::ToolUse {
|
||||
id: id.clone(),
|
||||
name: name.clone(),
|
||||
input: serde_json::from_str(input)
|
||||
.unwrap_or_else(|_| serde_json::json!({ "raw": input })),
|
||||
},
|
||||
ContentBlock::ToolResult {
|
||||
tool_use_id,
|
||||
output,
|
||||
is_error,
|
||||
..
|
||||
} => InputContentBlock::ToolResult {
|
||||
tool_use_id: tool_use_id.clone(),
|
||||
content: vec![ToolResultContentBlock::Text {
|
||||
text: output.clone(),
|
||||
}],
|
||||
is_error: *is_error,
|
||||
},
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
(!content.is_empty()).then(|| InputMessage {
|
||||
role: role.to_string(),
|
||||
content,
|
||||
})
|
||||
})
|
||||
.collect()
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_lines)]
|
||||
@@ -9525,7 +9433,7 @@ mod tests {
|
||||
PromptHistoryEntry, SlashCommand, StatusUsage, DEFAULT_MODEL, LATEST_SESSION_REFERENCE,
|
||||
STUB_COMMANDS,
|
||||
};
|
||||
use api::{ApiError, InputContentBlock, MessageResponse, OutputContentBlock, Usage};
|
||||
use api::{ApiError, MessageResponse, OutputContentBlock, Usage};
|
||||
use plugins::{
|
||||
PluginManager, PluginManagerConfig, PluginTool, PluginToolDefinition, PluginToolPermission,
|
||||
};
|
||||
@@ -12992,93 +12900,6 @@ UU conflicted.rs",
|
||||
assert_eq!(converted[1].role, "assistant");
|
||||
assert_eq!(converted[2].role, "user");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn converts_parallel_tool_results_into_immediate_single_user_message_256() {
|
||||
let messages = vec![
|
||||
ConversationMessage::assistant(vec![
|
||||
ContentBlock::ToolUse {
|
||||
id: "tool-1".to_string(),
|
||||
name: "read".to_string(),
|
||||
input: "{\"path\":\"a\"}".to_string(),
|
||||
},
|
||||
ContentBlock::ToolUse {
|
||||
id: "tool-2".to_string(),
|
||||
name: "read".to_string(),
|
||||
input: "{\"path\":\"b\"}".to_string(),
|
||||
},
|
||||
]),
|
||||
ConversationMessage::tool_result(
|
||||
"tool-1".to_string(),
|
||||
"read".to_string(),
|
||||
"a".to_string(),
|
||||
false,
|
||||
),
|
||||
ConversationMessage::tool_result(
|
||||
"tool-2".to_string(),
|
||||
"read".to_string(),
|
||||
"b".to_string(),
|
||||
false,
|
||||
),
|
||||
];
|
||||
|
||||
let converted = super::convert_messages(&messages);
|
||||
|
||||
assert_eq!(converted.len(), 2);
|
||||
assert_eq!(converted[0].role, "assistant");
|
||||
assert_eq!(converted[1].role, "user");
|
||||
assert!(matches!(
|
||||
converted[0].content.as_slice(),
|
||||
[
|
||||
InputContentBlock::ToolUse { id: id1, .. },
|
||||
InputContentBlock::ToolUse { id: id2, .. }
|
||||
] if id1 == "tool-1" && id2 == "tool-2"
|
||||
));
|
||||
assert!(matches!(
|
||||
converted[1].content.as_slice(),
|
||||
[
|
||||
InputContentBlock::ToolResult { tool_use_id: id1, .. },
|
||||
InputContentBlock::ToolResult { tool_use_id: id2, .. }
|
||||
] if id1 == "tool-1" && id2 == "tool-2"
|
||||
));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn drops_orphan_tool_use_and_tool_result_before_anthropic_dispatch_256() {
|
||||
let messages = vec![
|
||||
ConversationMessage::assistant(vec![
|
||||
ContentBlock::Text {
|
||||
text: "before tool".to_string(),
|
||||
},
|
||||
ContentBlock::ToolUse {
|
||||
id: "orphan".to_string(),
|
||||
name: "bash".to_string(),
|
||||
input: "{\"command\":\"pwd\"}".to_string(),
|
||||
},
|
||||
]),
|
||||
ConversationMessage::user_text("resume prompt"),
|
||||
ConversationMessage::tool_result(
|
||||
"orphan".to_string(),
|
||||
"bash".to_string(),
|
||||
"late".to_string(),
|
||||
false,
|
||||
),
|
||||
];
|
||||
|
||||
let converted = super::convert_messages(&messages);
|
||||
|
||||
assert_eq!(converted.len(), 2);
|
||||
assert_eq!(converted[0].role, "assistant");
|
||||
assert!(matches!(
|
||||
converted[0].content.as_slice(),
|
||||
[InputContentBlock::Text { text }] if text == "before tool"
|
||||
));
|
||||
assert_eq!(converted[1].role, "user");
|
||||
assert!(matches!(
|
||||
converted[1].content.as_slice(),
|
||||
[InputContentBlock::Text { text }] if text == "resume prompt"
|
||||
));
|
||||
}
|
||||
#[test]
|
||||
fn repl_help_mentions_history_completion_and_multiline() {
|
||||
let help = render_repl_help();
|
||||
|
||||
Reference in New Issue
Block a user