From a88c34aa151d9a7fd69c842ab630345ae9ce4c55 Mon Sep 17 00:00:00 2001
From: Yuriy <yrippert@gmaile.com>
Date: Sat, 31 Jan 2026 11:58:49 +0300
Subject: [PATCH] Protocol v2 schema plumbing (Commit 2)

- llm_response_schema_v2.json: object-only, PATCH_FILE, base_sha256
- PAPAYU_PROTOCOL_VERSION=1|2 (default 1)
- schema_version and schema_hash dynamic in trace/log
- compiled_response_schema uses v1 or v2 by protocol
- response_format sends v2 schema when protocol=2
- Tests: test_schema_v2_compiles, test_schema_hash_non_empty_v2
- trace_to_golden: schema_hash_for_version by trace schema_version

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 CHANGELOG.md                                 |   1 +
 docs/PROTOCOL_V2_PLAN.md                     |   4 +-
 env.openai.example                           |   2 +
 src-tauri/config/llm_response_schema_v2.json | 152 +++++++++++++++++++
 src-tauri/src/bin/trace_to_golden.rs         |  11 +-
 src-tauri/src/commands/llm_planner.rs        |  72 +++++++--
 6 files changed, 225 insertions(+), 17 deletions(-)
 create mode 100644 src-tauri/config/llm_response_schema_v2.json

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e3de535..fa5d85b 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -50,6 +50,7 @@
 - **make/npm shortcuts:** `make golden` (trace→fixture), `make test-protocol` (golden_traces_v1_validate).
 - **CI:** `.github/workflows/protocol-check.yml` — golden_traces_v1_validate на push/PR.
 - **Политика golden traces:** в docs/golden_traces/README.md — когда/как обновлять, при смене schema_hash.
+- **Protocol v2 schema (plumbing):** `llm_response_schema_v2.json` — object-only, PATCH_FILE, base_sha256. `PAPAYU_PROTOCOL_VERSION=1|2` (default 1). schema_version и schema_hash динамические в trace.
 
 ### Изменено
 
diff --git a/docs/PROTOCOL_V2_PLAN.md b/docs/PROTOCOL_V2_PLAN.md
index cf5b2c9..5778627 100644
--- a/docs/PROTOCOL_V2_PLAN.md
+++ b/docs/PROTOCOL_V2_PLAN.md
@@ -60,8 +60,8 @@
 - `schema_version=2` → допускает `PATCH_FILE` / `REPLACE_RANGE` и расширенные поля.
 
 В коде:
-- Компилировать обе схемы: `llm_response_schema_v1.json`, `llm_response_schema_v2.json`.
-- Выбор активной по env: `PAPAYU_PROTOCOL_VERSION=1|2`.
+- Компилировать обе схемы: `llm_response_schema.json` (v1), `llm_response_schema_v2.json`.
+- Выбор активной по env: `PAPAYU_PROTOCOL_VERSION=1|2` (default 1).
 - Валидация/парсер: сначала проверить schema v2 (если включена), иначе v1.
 
 ---
diff --git a/env.openai.example b/env.openai.example
index 9d2ec4e..c71359c 100644
--- a/env.openai.example
+++ b/env.openai.example
@@ -27,6 +27,8 @@ PAPAYU_LLM_MODEL=gpt-4o-mini
 # PAPAYU_TRACE=1
 # PAPAYU_TRACE_RAW=1 — сохранять raw_content (с маскировкой sk-/Bearer)
 
+# PAPAYU_PROTOCOL_VERSION=1|2 — версия схемы (default 1; v2 — PATCH_FILE, object-only)
+
 # Контекст-диета: max 8 файлов, 20k на файл, 120k total.
 # PAPAYU_CONTEXT_MAX_FILES=8
 # PAPAYU_CONTEXT_MAX_FILE_CHARS=20000
diff --git a/src-tauri/config/llm_response_schema_v2.json b/src-tauri/config/llm_response_schema_v2.json
new file mode 100644
index 0000000..cd596ce
--- /dev/null
+++ b/src-tauri/config/llm_response_schema_v2.json
@@ -0,0 +1,152 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "x_schema_version": 2,
+  "type": "object",
+  "additionalProperties": false,
+  "required": ["actions"],
+  "properties": {
+    "actions": {
+      "type": "array",
+      "items": { "$ref": "#/$defs/action" },
+      "maxItems": 200
+    },
+    "summary": { "type": "string" },
+    "context_requests": {
+      "type": "array",
+      "items": { "$ref": "#/$defs/context_request" }
+    },
+    "memory_patch": { "$ref": "#/$defs/memory_patch" }
+  },
+  "$defs": {
+    "action": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["kind", "path"],
+      "properties": {
+        "kind": {
+          "type": "string",
+          "enum": [
+            "CREATE_FILE",
+            "CREATE_DIR",
+            "UPDATE_FILE",
+            "PATCH_FILE",
+            "DELETE_FILE",
+            "DELETE_DIR"
+          ]
+        },
+        "path": { "type": "string" },
+        "content": { "type": "string" },
+        "patch": { "type": "string" },
+        "base_sha256": {
+          "type": "string",
+          "pattern": "^[a-f0-9]{64}$"
+        }
+      },
+      "allOf": [
+        {
+          "if": { "properties": { "kind": { "const": "CREATE_DIR" } } },
+          "then": {
+            "not": {
+              "anyOf": [
+                { "required": ["content"] },
+                { "required": ["patch"] },
+                { "required": ["base_sha256"] }
+              ]
+            }
+          }
+        },
+        {
+          "if": { "properties": { "kind": { "const": "DELETE_DIR" } } },
+          "then": {
+            "not": {
+              "anyOf": [
+                { "required": ["content"] },
+                { "required": ["patch"] },
+                { "required": ["base_sha256"] }
+              ]
+            }
+          }
+        },
+        {
+          "if": { "properties": { "kind": { "const": "DELETE_FILE" } } },
+          "then": {
+            "not": {
+              "anyOf": [
+                { "required": ["content"] },
+                { "required": ["patch"] },
+                { "required": ["base_sha256"] }
+              ]
+            }
+          }
+        },
+        {
+          "if": { "properties": { "kind": { "enum": ["CREATE_FILE", "UPDATE_FILE"] } } },
+          "then": {
+            "required": ["content"],
+            "not": {
+              "anyOf": [
+                { "required": ["patch"] },
+                { "required": ["base_sha256"] }
+              ]
+            }
+          }
+        },
+        {
+          "if": { "properties": { "kind": { "const": "PATCH_FILE" } } },
+          "then": {
+            "required": ["patch", "base_sha256"],
+            "not": { "anyOf": [{ "required": ["content"] }] }
+          }
+        }
+      ]
+    },
+    "context_request": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["type"],
+      "properties": {
+        "type": { "type": "string", "enum": ["read_file", "search", "logs", "env"] },
+        "path": { "type": "string" },
+        "start_line": { "type": "integer", "minimum": 1 },
+        "end_line": { "type": "integer", "minimum": 1 },
+        "glob": { "type": "string" },
+        "query": { "type": "string" },
+        "source": { "type": "string" },
+        "last_n": { "type": "integer", "minimum": 1, "maximum": 5000 }
+      },
+      "allOf": [
+        {
+          "if": { "properties": { "type": { "const": "read_file" } } },
+          "then": { "required": ["path"] }
+        },
+        {
+          "if": { "properties": { "type": { "const": "search" } } },
+          "then": { "required": ["query"] }
+        },
+        {
+          "if": { "properties": { "type": { "const": "logs" } } },
+          "then": { "required": ["source"] }
+        }
+      ]
+    },
+    "memory_patch": {
+      "type": "object",
+      "additionalProperties": false,
+      "properties": {
+        "user.preferred_style": { "type": "string" },
+        "user.ask_budget": { "type": "integer" },
+        "user.risk_tolerance": { "type": "string" },
+        "user.default_language": { "type": "string" },
+        "user.output_format": { "type": "string" },
+        "project.default_test_command": { "type": "string" },
+        "project.default_lint_command": { "type": "string" },
+        "project.default_format_command": { "type": "string" },
+        "project.package_manager": { "type": "string" },
+        "project.build_command": { "type": "string" },
+        "project.src_roots": { "type": "array", "items": { "type": "string" } },
+        "project.test_roots": { "type": "array", "items": { "type": "string" } },
+        "project.ci_notes": { "type": "string" }
+      }
+    }
+  }
+}
diff --git a/src-tauri/src/bin/trace_to_golden.rs b/src-tauri/src/bin/trace_to_golden.rs
index ae9fafa..8225f76 100644
--- a/src-tauri/src/bin/trace_to_golden.rs
+++ b/src-tauri/src/bin/trace_to_golden.rs
@@ -9,8 +9,12 @@ use std::env;
 use std::fs;
 use std::path::Path;
 
-fn schema_hash() -> String {
-    let schema_raw = include_str!("../../config/llm_response_schema.json");
+fn schema_hash_for_version(version: u32) -> String {
+    let schema_raw = if version == 2 {
+        include_str!("../../config/llm_response_schema_v2.json")
+    } else {
+        include_str!("../../config/llm_response_schema.json")
+    };
     let mut hasher = Sha256::new();
     hasher.update(schema_raw.as_bytes());
     format!("{:x}", hasher.finalize())
@@ -63,11 +67,12 @@ fn trace_to_golden_format(trace: &serde_json::Value) -> Result<serde_json::Value
         .or_else(|| trace.get("config_snapshot").and_then(|c| c.get("schema_version")))
         .cloned()
         .unwrap_or(serde_json::json!(1));
+    let version = schema_version.as_u64().unwrap_or(1) as u32;
     let schema_hash_val = trace
         .get("schema_hash")
         .or_else(|| trace.get("config_snapshot").and_then(|c| c.get("schema_hash")))
         .cloned()
-        .unwrap_or_else(|| serde_json::Value::String(schema_hash()));
+        .unwrap_or_else(|| serde_json::Value::String(schema_hash_for_version(version)));
 
     let validated = trace.get("validated_json").cloned();
     let validated_obj = validated
diff --git a/src-tauri/src/commands/llm_planner.rs b/src-tauri/src/commands/llm_planner.rs
index db4a2ae..354bec2 100644
--- a/src-tauri/src/commands/llm_planner.rs
+++ b/src-tauri/src/commands/llm_planner.rs
@@ -23,13 +23,35 @@ use std::time::Duration;
 use uuid::Uuid;
 
 const SCHEMA_RAW: &str = include_str!("../../config/llm_response_schema.json");
+const SCHEMA_V2_RAW: &str = include_str!("../../config/llm_response_schema_v2.json");
+
+fn protocol_version() -> u32 {
+    std::env::var("PAPAYU_PROTOCOL_VERSION")
+        .ok()
+        .and_then(|s| s.trim().parse().ok())
+        .filter(|v| *v == 1 || *v == 2)
+        .unwrap_or(1)
+}
 
 pub(crate) fn schema_hash() -> String {
+    schema_hash_for_version(protocol_version())
+}
+
+pub(crate) fn schema_hash_for_version(version: u32) -> String {
+    let raw = if version == 2 {
+        SCHEMA_V2_RAW
+    } else {
+        SCHEMA_RAW
+    };
     let mut hasher = Sha256::new();
-    hasher.update(SCHEMA_RAW.as_bytes());
+    hasher.update(raw.as_bytes());
     format!("{:x}", hasher.finalize())
 }
 
+fn current_schema_version() -> u32 {
+    protocol_version()
+}
+
 #[derive(serde::Serialize)]
 struct ChatMessage {
     role: String,
@@ -135,7 +157,7 @@ fn redact_secrets(s: &str) -> String {
 fn write_trace(project_path: &str, trace_id: &str, trace: &mut serde_json::Value) {
     // Добавляем config_snapshot для воспроизводимости
     let config_snapshot = serde_json::json!({
-        "schema_version": LLM_PLAN_SCHEMA_VERSION,
+        "schema_version": current_schema_version(),
         "schema_hash": schema_hash(),
         "strict_json": std::env::var("PAPAYU_LLM_STRICT_JSON").unwrap_or_default(),
         "trace_raw": std::env::var("PAPAYU_TRACE_RAW").unwrap_or_default(),
@@ -217,7 +239,8 @@ pub const FIXIT_SYSTEM_PROMPT: &str = r#"Ты — режим Fix-it внутри
 - Не делай широкие рефакторы без запроса: исправляй минимально.
 - Если не хватает данных, можно задать 1 вопрос; иначе действуй."#;
 
-/// Формальная версия схемы ответа (для воспроизводимости и будущего v2).
+/// Формальная версия схемы v1 (для тестов и совместимости).
+#[allow(dead_code)]
 pub const LLM_PLAN_SCHEMA_VERSION: u32 = 1;
 
 /// System prompt: режим Fix-plan (один JSON, context_requests, план → подтверждение → применение).
@@ -400,9 +423,14 @@ const REPAIR_PROMPT_PLAN_ACTIONS_MUST_BE_EMPTY: &str = r#"
 Верни объект с "actions": [] и "summary" (диагноз + план шагов).
 "#;
 
-/// Компилирует JSON Schema для локальной валидации (один раз).
+/// Компилирует JSON Schema для локальной валидации (v1 или v2 по protocol_version).
 fn compiled_response_schema() -> Option<JSONSchema> {
-    let schema: serde_json::Value = serde_json::from_str(include_str!("../../config/llm_response_schema.json")).ok()?;
+    let raw = if protocol_version() == 2 {
+        SCHEMA_V2_RAW
+    } else {
+        SCHEMA_RAW
+    };
+    let schema: serde_json::Value = serde_json::from_str(raw).ok()?;
     JSONSchema::options().compile(&schema).ok()
 }
 
@@ -778,7 +806,7 @@ pub async fn plan(
         }
     }
     let system_prompt = get_system_prompt_for_mode();
-    let system_content = format!("{}{}\n\nLLM_PLAN_SCHEMA_VERSION={}", system_prompt, memory_block, LLM_PLAN_SCHEMA_VERSION);
+    let system_content = format!("{}{}\n\nLLM_PLAN_SCHEMA_VERSION={}", system_prompt, memory_block, current_schema_version());
 
     let project_root = Path::new(path);
     let base_context = context::gather_base_context(project_root, &mem);
@@ -843,9 +871,14 @@ pub async fn plan(
         .next()
         .unwrap_or("unknown");
 
+    let schema_version = current_schema_version();
     let response_format = if use_strict_json {
-        let schema_json: serde_json::Value = serde_json::from_str(include_str!("../../config/llm_response_schema.json"))
-            .unwrap_or_else(|_| serde_json::json!({}));
+        let raw = if schema_version == 2 {
+            SCHEMA_V2_RAW
+        } else {
+            SCHEMA_RAW
+        };
+        let schema_json: serde_json::Value = serde_json::from_str(raw).unwrap_or_else(|_| serde_json::json!({}));
         Some(ResponseFormatJsonSchema {
             ty: "json_schema".to_string(),
             json_schema: ResponseFormatJsonSchemaInner {
@@ -895,7 +928,7 @@ pub async fn plan(
             "LLM_REQUEST_SENT",
             &[
                 ("model", model.trim().to_string()),
-                ("schema_version", LLM_PLAN_SCHEMA_VERSION.to_string()),
+                ("schema_version", schema_version.to_string()),
                 ("strict_json", (!skip_response_format && use_strict_json).to_string()),
                 ("provider", provider.to_string()),
                 ("token_budget", max_tokens.to_string()),
@@ -1098,7 +1131,7 @@ pub async fn plan(
     let mut trace_val = serde_json::json!({
         "trace_id": trace_id,
         "event": "LLM_PLAN_OK",
-        "schema_version": LLM_PLAN_SCHEMA_VERSION,
+        "schema_version": current_schema_version(),
         "model": model.trim(),
         "provider": provider,
         "actions_count": last_actions.len(),
@@ -1142,8 +1175,8 @@ pub async fn plan(
 #[cfg(test)]
 mod tests {
     use super::{
-        extract_files_read_from_plan_context, parse_actions_from_json, schema_hash, validate_actions,
-        validate_update_without_base, FIX_PLAN_SYSTEM_PROMPT, LLM_PLAN_SCHEMA_VERSION,
+        extract_files_read_from_plan_context, parse_actions_from_json, schema_hash, schema_hash_for_version,
+        validate_actions, validate_update_without_base, FIX_PLAN_SYSTEM_PROMPT, LLM_PLAN_SCHEMA_VERSION,
     };
     use crate::types::{Action, ActionKind};
     use std::fs;
@@ -1170,6 +1203,21 @@ mod tests {
         assert!(system_content.contains("LLM_PLAN_SCHEMA_VERSION=1"));
     }
 
+    #[test]
+    fn test_schema_v2_compiles() {
+        let schema: serde_json::Value =
+            serde_json::from_str(super::SCHEMA_V2_RAW).expect("v2 schema valid JSON");
+        let compiled = jsonschema::JSONSchema::options().compile(&schema);
+        assert!(compiled.is_ok(), "v2 schema must compile");
+    }
+
+    #[test]
+    fn test_schema_hash_non_empty_v2() {
+        let h = schema_hash_for_version(2);
+        assert!(!h.is_empty());
+        assert_eq!(h.len(), 64);
+    }
+
     #[test]
     fn test_validate_actions_empty() {
         assert!(validate_actions(&[]).is_ok());