From a88c34aa151d9a7fd69c842ab630345ae9ce4c55 Mon Sep 17 00:00:00 2001 From: Yuriy Date: Sat, 31 Jan 2026 11:58:49 +0300 Subject: [PATCH] Protocol v2 schema plumbing (Commit 2) - llm_response_schema_v2.json: object-only, PATCH_FILE, base_sha256 - PAPAYU_PROTOCOL_VERSION=1|2 (default 1) - schema_version and schema_hash dynamic in trace/log - compiled_response_schema uses v1 or v2 by protocol - response_format sends v2 schema when protocol=2 - Tests: test_schema_v2_compiles, test_schema_hash_non_empty_v2 - trace_to_golden: schema_hash_for_version by trace schema_version Co-authored-by: Cursor --- CHANGELOG.md | 1 + docs/PROTOCOL_V2_PLAN.md | 4 +- env.openai.example | 2 + src-tauri/config/llm_response_schema_v2.json | 152 +++++++++++++++++++ src-tauri/src/bin/trace_to_golden.rs | 11 +- src-tauri/src/commands/llm_planner.rs | 72 +++++++-- 6 files changed, 225 insertions(+), 17 deletions(-) create mode 100644 src-tauri/config/llm_response_schema_v2.json diff --git a/CHANGELOG.md b/CHANGELOG.md index e3de535..fa5d85b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -50,6 +50,7 @@ - **make/npm shortcuts:** `make golden` (trace→fixture), `make test-protocol` (golden_traces_v1_validate). - **CI:** `.github/workflows/protocol-check.yml` — golden_traces_v1_validate на push/PR. - **Политика golden traces:** в docs/golden_traces/README.md — когда/как обновлять, при смене schema_hash. +- **Protocol v2 schema (plumbing):** `llm_response_schema_v2.json` — object-only, PATCH_FILE, base_sha256. `PAPAYU_PROTOCOL_VERSION=1|2` (default 1). schema_version и schema_hash динамические в trace. ### Изменено diff --git a/docs/PROTOCOL_V2_PLAN.md b/docs/PROTOCOL_V2_PLAN.md index cf5b2c9..5778627 100644 --- a/docs/PROTOCOL_V2_PLAN.md +++ b/docs/PROTOCOL_V2_PLAN.md @@ -60,8 +60,8 @@ - `schema_version=2` → допускает `PATCH_FILE` / `REPLACE_RANGE` и расширенные поля. В коде: -- Компилировать обе схемы: `llm_response_schema_v1.json`, `llm_response_schema_v2.json`. -- Выбор активной по env: `PAPAYU_PROTOCOL_VERSION=1|2`. +- Компилировать обе схемы: `llm_response_schema.json` (v1), `llm_response_schema_v2.json`. +- Выбор активной по env: `PAPAYU_PROTOCOL_VERSION=1|2` (default 1). - Валидация/парсер: сначала проверить schema v2 (если включена), иначе v1. --- diff --git a/env.openai.example b/env.openai.example index 9d2ec4e..c71359c 100644 --- a/env.openai.example +++ b/env.openai.example @@ -27,6 +27,8 @@ PAPAYU_LLM_MODEL=gpt-4o-mini # PAPAYU_TRACE=1 # PAPAYU_TRACE_RAW=1 — сохранять raw_content (с маскировкой sk-/Bearer) +# PAPAYU_PROTOCOL_VERSION=1|2 — версия схемы (default 1; v2 — PATCH_FILE, object-only) + # Контекст-диета: max 8 файлов, 20k на файл, 120k total. # PAPAYU_CONTEXT_MAX_FILES=8 # PAPAYU_CONTEXT_MAX_FILE_CHARS=20000 diff --git a/src-tauri/config/llm_response_schema_v2.json b/src-tauri/config/llm_response_schema_v2.json new file mode 100644 index 0000000..cd596ce --- /dev/null +++ b/src-tauri/config/llm_response_schema_v2.json @@ -0,0 +1,152 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "x_schema_version": 2, + "type": "object", + "additionalProperties": false, + "required": ["actions"], + "properties": { + "actions": { + "type": "array", + "items": { "$ref": "#/$defs/action" }, + "maxItems": 200 + }, + "summary": { "type": "string" }, + "context_requests": { + "type": "array", + "items": { "$ref": "#/$defs/context_request" } + }, + "memory_patch": { "$ref": "#/$defs/memory_patch" } + }, + "$defs": { + "action": { + "type": "object", + "additionalProperties": false, + "required": ["kind", "path"], + "properties": { + "kind": { + "type": "string", + "enum": [ + "CREATE_FILE", + "CREATE_DIR", + "UPDATE_FILE", + "PATCH_FILE", + "DELETE_FILE", + "DELETE_DIR" + ] + }, + "path": { "type": "string" }, + "content": { "type": "string" }, + "patch": { "type": "string" }, + "base_sha256": { + "type": "string", + "pattern": "^[a-f0-9]{64}$" + } + }, + "allOf": [ + { + "if": { "properties": { "kind": { "const": "CREATE_DIR" } } }, + "then": { + "not": { + "anyOf": [ + { "required": ["content"] }, + { "required": ["patch"] }, + { "required": ["base_sha256"] } + ] + } + } + }, + { + "if": { "properties": { "kind": { "const": "DELETE_DIR" } } }, + "then": { + "not": { + "anyOf": [ + { "required": ["content"] }, + { "required": ["patch"] }, + { "required": ["base_sha256"] } + ] + } + } + }, + { + "if": { "properties": { "kind": { "const": "DELETE_FILE" } } }, + "then": { + "not": { + "anyOf": [ + { "required": ["content"] }, + { "required": ["patch"] }, + { "required": ["base_sha256"] } + ] + } + } + }, + { + "if": { "properties": { "kind": { "enum": ["CREATE_FILE", "UPDATE_FILE"] } } }, + "then": { + "required": ["content"], + "not": { + "anyOf": [ + { "required": ["patch"] }, + { "required": ["base_sha256"] } + ] + } + } + }, + { + "if": { "properties": { "kind": { "const": "PATCH_FILE" } } }, + "then": { + "required": ["patch", "base_sha256"], + "not": { "anyOf": [{ "required": ["content"] }] } + } + } + ] + }, + "context_request": { + "type": "object", + "additionalProperties": false, + "required": ["type"], + "properties": { + "type": { "type": "string", "enum": ["read_file", "search", "logs", "env"] }, + "path": { "type": "string" }, + "start_line": { "type": "integer", "minimum": 1 }, + "end_line": { "type": "integer", "minimum": 1 }, + "glob": { "type": "string" }, + "query": { "type": "string" }, + "source": { "type": "string" }, + "last_n": { "type": "integer", "minimum": 1, "maximum": 5000 } + }, + "allOf": [ + { + "if": { "properties": { "type": { "const": "read_file" } } }, + "then": { "required": ["path"] } + }, + { + "if": { "properties": { "type": { "const": "search" } } }, + "then": { "required": ["query"] } + }, + { + "if": { "properties": { "type": { "const": "logs" } } }, + "then": { "required": ["source"] } + } + ] + }, + "memory_patch": { + "type": "object", + "additionalProperties": false, + "properties": { + "user.preferred_style": { "type": "string" }, + "user.ask_budget": { "type": "integer" }, + "user.risk_tolerance": { "type": "string" }, + "user.default_language": { "type": "string" }, + "user.output_format": { "type": "string" }, + "project.default_test_command": { "type": "string" }, + "project.default_lint_command": { "type": "string" }, + "project.default_format_command": { "type": "string" }, + "project.package_manager": { "type": "string" }, + "project.build_command": { "type": "string" }, + "project.src_roots": { "type": "array", "items": { "type": "string" } }, + "project.test_roots": { "type": "array", "items": { "type": "string" } }, + "project.ci_notes": { "type": "string" } + } + } + } +} diff --git a/src-tauri/src/bin/trace_to_golden.rs b/src-tauri/src/bin/trace_to_golden.rs index ae9fafa..8225f76 100644 --- a/src-tauri/src/bin/trace_to_golden.rs +++ b/src-tauri/src/bin/trace_to_golden.rs @@ -9,8 +9,12 @@ use std::env; use std::fs; use std::path::Path; -fn schema_hash() -> String { - let schema_raw = include_str!("../../config/llm_response_schema.json"); +fn schema_hash_for_version(version: u32) -> String { + let schema_raw = if version == 2 { + include_str!("../../config/llm_response_schema_v2.json") + } else { + include_str!("../../config/llm_response_schema.json") + }; let mut hasher = Sha256::new(); hasher.update(schema_raw.as_bytes()); format!("{:x}", hasher.finalize()) @@ -63,11 +67,12 @@ fn trace_to_golden_format(trace: &serde_json::Value) -> Result u32 { + std::env::var("PAPAYU_PROTOCOL_VERSION") + .ok() + .and_then(|s| s.trim().parse().ok()) + .filter(|v| *v == 1 || *v == 2) + .unwrap_or(1) +} pub(crate) fn schema_hash() -> String { + schema_hash_for_version(protocol_version()) +} + +pub(crate) fn schema_hash_for_version(version: u32) -> String { + let raw = if version == 2 { + SCHEMA_V2_RAW + } else { + SCHEMA_RAW + }; let mut hasher = Sha256::new(); - hasher.update(SCHEMA_RAW.as_bytes()); + hasher.update(raw.as_bytes()); format!("{:x}", hasher.finalize()) } +fn current_schema_version() -> u32 { + protocol_version() +} + #[derive(serde::Serialize)] struct ChatMessage { role: String, @@ -135,7 +157,7 @@ fn redact_secrets(s: &str) -> String { fn write_trace(project_path: &str, trace_id: &str, trace: &mut serde_json::Value) { // Добавляем config_snapshot для воспроизводимости let config_snapshot = serde_json::json!({ - "schema_version": LLM_PLAN_SCHEMA_VERSION, + "schema_version": current_schema_version(), "schema_hash": schema_hash(), "strict_json": std::env::var("PAPAYU_LLM_STRICT_JSON").unwrap_or_default(), "trace_raw": std::env::var("PAPAYU_TRACE_RAW").unwrap_or_default(), @@ -217,7 +239,8 @@ pub const FIXIT_SYSTEM_PROMPT: &str = r#"Ты — режим Fix-it внутри - Не делай широкие рефакторы без запроса: исправляй минимально. - Если не хватает данных, можно задать 1 вопрос; иначе действуй."#; -/// Формальная версия схемы ответа (для воспроизводимости и будущего v2). +/// Формальная версия схемы v1 (для тестов и совместимости). +#[allow(dead_code)] pub const LLM_PLAN_SCHEMA_VERSION: u32 = 1; /// System prompt: режим Fix-plan (один JSON, context_requests, план → подтверждение → применение). @@ -400,9 +423,14 @@ const REPAIR_PROMPT_PLAN_ACTIONS_MUST_BE_EMPTY: &str = r#" Верни объект с "actions": [] и "summary" (диагноз + план шагов). "#; -/// Компилирует JSON Schema для локальной валидации (один раз). +/// Компилирует JSON Schema для локальной валидации (v1 или v2 по protocol_version). fn compiled_response_schema() -> Option { - let schema: serde_json::Value = serde_json::from_str(include_str!("../../config/llm_response_schema.json")).ok()?; + let raw = if protocol_version() == 2 { + SCHEMA_V2_RAW + } else { + SCHEMA_RAW + }; + let schema: serde_json::Value = serde_json::from_str(raw).ok()?; JSONSchema::options().compile(&schema).ok() } @@ -778,7 +806,7 @@ pub async fn plan( } } let system_prompt = get_system_prompt_for_mode(); - let system_content = format!("{}{}\n\nLLM_PLAN_SCHEMA_VERSION={}", system_prompt, memory_block, LLM_PLAN_SCHEMA_VERSION); + let system_content = format!("{}{}\n\nLLM_PLAN_SCHEMA_VERSION={}", system_prompt, memory_block, current_schema_version()); let project_root = Path::new(path); let base_context = context::gather_base_context(project_root, &mem); @@ -843,9 +871,14 @@ pub async fn plan( .next() .unwrap_or("unknown"); + let schema_version = current_schema_version(); let response_format = if use_strict_json { - let schema_json: serde_json::Value = serde_json::from_str(include_str!("../../config/llm_response_schema.json")) - .unwrap_or_else(|_| serde_json::json!({})); + let raw = if schema_version == 2 { + SCHEMA_V2_RAW + } else { + SCHEMA_RAW + }; + let schema_json: serde_json::Value = serde_json::from_str(raw).unwrap_or_else(|_| serde_json::json!({})); Some(ResponseFormatJsonSchema { ty: "json_schema".to_string(), json_schema: ResponseFormatJsonSchemaInner { @@ -895,7 +928,7 @@ pub async fn plan( "LLM_REQUEST_SENT", &[ ("model", model.trim().to_string()), - ("schema_version", LLM_PLAN_SCHEMA_VERSION.to_string()), + ("schema_version", schema_version.to_string()), ("strict_json", (!skip_response_format && use_strict_json).to_string()), ("provider", provider.to_string()), ("token_budget", max_tokens.to_string()), @@ -1098,7 +1131,7 @@ pub async fn plan( let mut trace_val = serde_json::json!({ "trace_id": trace_id, "event": "LLM_PLAN_OK", - "schema_version": LLM_PLAN_SCHEMA_VERSION, + "schema_version": current_schema_version(), "model": model.trim(), "provider": provider, "actions_count": last_actions.len(), @@ -1142,8 +1175,8 @@ pub async fn plan( #[cfg(test)] mod tests { use super::{ - extract_files_read_from_plan_context, parse_actions_from_json, schema_hash, validate_actions, - validate_update_without_base, FIX_PLAN_SYSTEM_PROMPT, LLM_PLAN_SCHEMA_VERSION, + extract_files_read_from_plan_context, parse_actions_from_json, schema_hash, schema_hash_for_version, + validate_actions, validate_update_without_base, FIX_PLAN_SYSTEM_PROMPT, LLM_PLAN_SCHEMA_VERSION, }; use crate::types::{Action, ActionKind}; use std::fs; @@ -1170,6 +1203,21 @@ mod tests { assert!(system_content.contains("LLM_PLAN_SCHEMA_VERSION=1")); } + #[test] + fn test_schema_v2_compiles() { + let schema: serde_json::Value = + serde_json::from_str(super::SCHEMA_V2_RAW).expect("v2 schema valid JSON"); + let compiled = jsonschema::JSONSchema::options().compile(&schema); + assert!(compiled.is_ok(), "v2 schema must compile"); + } + + #[test] + fn test_schema_hash_non_empty_v2() { + let h = schema_hash_for_version(2); + assert!(!h.is_empty()); + assert_eq!(h.len(), 64); + } + #[test] fn test_validate_actions_empty() { assert!(validate_actions(&[]).is_ok());