Protocol v2 schema plumbing (Commit 2)

- llm_response_schema_v2.json: object-only, PATCH_FILE, base_sha256
- PAPAYU_PROTOCOL_VERSION=1|2 (default 1)
- schema_version and schema_hash dynamic in trace/log
- compiled_response_schema uses v1 or v2 by protocol
- response_format sends v2 schema when protocol=2
- Tests: test_schema_v2_compiles, test_schema_hash_non_empty_v2
- trace_to_golden: schema_hash_for_version by trace schema_version

Co-authored-by: Cursor <cursoragent@cursor.com>
This commit is contained in:
Yuriy 2026-01-31 11:58:49 +03:00
parent f2f33e24d6
commit a88c34aa15
6 changed files with 225 additions and 17 deletions

View File

@ -50,6 +50,7 @@
- **make/npm shortcuts:** `make golden` (trace→fixture), `make test-protocol` (golden_traces_v1_validate).
- **CI:** `.github/workflows/protocol-check.yml` — golden_traces_v1_validate на push/PR.
- **Политика golden traces:** в docs/golden_traces/README.md — когда/как обновлять, при смене schema_hash.
- **Protocol v2 schema (plumbing):** `llm_response_schema_v2.json` — object-only, PATCH_FILE, base_sha256. `PAPAYU_PROTOCOL_VERSION=1|2` (default 1). schema_version и schema_hash динамические в trace.
### Изменено

View File

@ -60,8 +60,8 @@
- `schema_version=2` → допускает `PATCH_FILE` / `REPLACE_RANGE` и расширенные поля.
В коде:
- Компилировать обе схемы: `llm_response_schema_v1.json`, `llm_response_schema_v2.json`.
- Выбор активной по env: `PAPAYU_PROTOCOL_VERSION=1|2`.
- Компилировать обе схемы: `llm_response_schema.json` (v1), `llm_response_schema_v2.json`.
- Выбор активной по env: `PAPAYU_PROTOCOL_VERSION=1|2` (default 1).
- Валидация/парсер: сначала проверить schema v2 (если включена), иначе v1.
---

View File

@ -27,6 +27,8 @@ PAPAYU_LLM_MODEL=gpt-4o-mini
# PAPAYU_TRACE=1
# PAPAYU_TRACE_RAW=1 — сохранять raw_content (с маскировкой sk-/Bearer)
# PAPAYU_PROTOCOL_VERSION=1|2 — версия схемы (default 1; v2 — PATCH_FILE, object-only)
# Контекст-диета: max 8 файлов, 20k на файл, 120k total.
# PAPAYU_CONTEXT_MAX_FILES=8
# PAPAYU_CONTEXT_MAX_FILE_CHARS=20000

View File

@ -0,0 +1,152 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"x_schema_version": 2,
"type": "object",
"additionalProperties": false,
"required": ["actions"],
"properties": {
"actions": {
"type": "array",
"items": { "$ref": "#/$defs/action" },
"maxItems": 200
},
"summary": { "type": "string" },
"context_requests": {
"type": "array",
"items": { "$ref": "#/$defs/context_request" }
},
"memory_patch": { "$ref": "#/$defs/memory_patch" }
},
"$defs": {
"action": {
"type": "object",
"additionalProperties": false,
"required": ["kind", "path"],
"properties": {
"kind": {
"type": "string",
"enum": [
"CREATE_FILE",
"CREATE_DIR",
"UPDATE_FILE",
"PATCH_FILE",
"DELETE_FILE",
"DELETE_DIR"
]
},
"path": { "type": "string" },
"content": { "type": "string" },
"patch": { "type": "string" },
"base_sha256": {
"type": "string",
"pattern": "^[a-f0-9]{64}$"
}
},
"allOf": [
{
"if": { "properties": { "kind": { "const": "CREATE_DIR" } } },
"then": {
"not": {
"anyOf": [
{ "required": ["content"] },
{ "required": ["patch"] },
{ "required": ["base_sha256"] }
]
}
}
},
{
"if": { "properties": { "kind": { "const": "DELETE_DIR" } } },
"then": {
"not": {
"anyOf": [
{ "required": ["content"] },
{ "required": ["patch"] },
{ "required": ["base_sha256"] }
]
}
}
},
{
"if": { "properties": { "kind": { "const": "DELETE_FILE" } } },
"then": {
"not": {
"anyOf": [
{ "required": ["content"] },
{ "required": ["patch"] },
{ "required": ["base_sha256"] }
]
}
}
},
{
"if": { "properties": { "kind": { "enum": ["CREATE_FILE", "UPDATE_FILE"] } } },
"then": {
"required": ["content"],
"not": {
"anyOf": [
{ "required": ["patch"] },
{ "required": ["base_sha256"] }
]
}
}
},
{
"if": { "properties": { "kind": { "const": "PATCH_FILE" } } },
"then": {
"required": ["patch", "base_sha256"],
"not": { "anyOf": [{ "required": ["content"] }] }
}
}
]
},
"context_request": {
"type": "object",
"additionalProperties": false,
"required": ["type"],
"properties": {
"type": { "type": "string", "enum": ["read_file", "search", "logs", "env"] },
"path": { "type": "string" },
"start_line": { "type": "integer", "minimum": 1 },
"end_line": { "type": "integer", "minimum": 1 },
"glob": { "type": "string" },
"query": { "type": "string" },
"source": { "type": "string" },
"last_n": { "type": "integer", "minimum": 1, "maximum": 5000 }
},
"allOf": [
{
"if": { "properties": { "type": { "const": "read_file" } } },
"then": { "required": ["path"] }
},
{
"if": { "properties": { "type": { "const": "search" } } },
"then": { "required": ["query"] }
},
{
"if": { "properties": { "type": { "const": "logs" } } },
"then": { "required": ["source"] }
}
]
},
"memory_patch": {
"type": "object",
"additionalProperties": false,
"properties": {
"user.preferred_style": { "type": "string" },
"user.ask_budget": { "type": "integer" },
"user.risk_tolerance": { "type": "string" },
"user.default_language": { "type": "string" },
"user.output_format": { "type": "string" },
"project.default_test_command": { "type": "string" },
"project.default_lint_command": { "type": "string" },
"project.default_format_command": { "type": "string" },
"project.package_manager": { "type": "string" },
"project.build_command": { "type": "string" },
"project.src_roots": { "type": "array", "items": { "type": "string" } },
"project.test_roots": { "type": "array", "items": { "type": "string" } },
"project.ci_notes": { "type": "string" }
}
}
}
}

View File

@ -9,8 +9,12 @@ use std::env;
use std::fs;
use std::path::Path;
fn schema_hash() -> String {
let schema_raw = include_str!("../../config/llm_response_schema.json");
fn schema_hash_for_version(version: u32) -> String {
let schema_raw = if version == 2 {
include_str!("../../config/llm_response_schema_v2.json")
} else {
include_str!("../../config/llm_response_schema.json")
};
let mut hasher = Sha256::new();
hasher.update(schema_raw.as_bytes());
format!("{:x}", hasher.finalize())
@ -63,11 +67,12 @@ fn trace_to_golden_format(trace: &serde_json::Value) -> Result<serde_json::Value
.or_else(|| trace.get("config_snapshot").and_then(|c| c.get("schema_version")))
.cloned()
.unwrap_or(serde_json::json!(1));
let version = schema_version.as_u64().unwrap_or(1) as u32;
let schema_hash_val = trace
.get("schema_hash")
.or_else(|| trace.get("config_snapshot").and_then(|c| c.get("schema_hash")))
.cloned()
.unwrap_or_else(|| serde_json::Value::String(schema_hash()));
.unwrap_or_else(|| serde_json::Value::String(schema_hash_for_version(version)));
let validated = trace.get("validated_json").cloned();
let validated_obj = validated

View File

@ -23,13 +23,35 @@ use std::time::Duration;
use uuid::Uuid;
const SCHEMA_RAW: &str = include_str!("../../config/llm_response_schema.json");
const SCHEMA_V2_RAW: &str = include_str!("../../config/llm_response_schema_v2.json");
fn protocol_version() -> u32 {
std::env::var("PAPAYU_PROTOCOL_VERSION")
.ok()
.and_then(|s| s.trim().parse().ok())
.filter(|v| *v == 1 || *v == 2)
.unwrap_or(1)
}
pub(crate) fn schema_hash() -> String {
schema_hash_for_version(protocol_version())
}
pub(crate) fn schema_hash_for_version(version: u32) -> String {
let raw = if version == 2 {
SCHEMA_V2_RAW
} else {
SCHEMA_RAW
};
let mut hasher = Sha256::new();
hasher.update(SCHEMA_RAW.as_bytes());
hasher.update(raw.as_bytes());
format!("{:x}", hasher.finalize())
}
fn current_schema_version() -> u32 {
protocol_version()
}
#[derive(serde::Serialize)]
struct ChatMessage {
role: String,
@ -135,7 +157,7 @@ fn redact_secrets(s: &str) -> String {
fn write_trace(project_path: &str, trace_id: &str, trace: &mut serde_json::Value) {
// Добавляем config_snapshot для воспроизводимости
let config_snapshot = serde_json::json!({
"schema_version": LLM_PLAN_SCHEMA_VERSION,
"schema_version": current_schema_version(),
"schema_hash": schema_hash(),
"strict_json": std::env::var("PAPAYU_LLM_STRICT_JSON").unwrap_or_default(),
"trace_raw": std::env::var("PAPAYU_TRACE_RAW").unwrap_or_default(),
@ -217,7 +239,8 @@ pub const FIXIT_SYSTEM_PROMPT: &str = r#"Ты — режим Fix-it внутри
- Не делай широкие рефакторы без запроса: исправляй минимально.
- Если не хватает данных, можно задать 1 вопрос; иначе действуй."#;
/// Формальная версия схемы ответа (для воспроизводимости и будущего v2).
/// Формальная версия схемы v1 (для тестов и совместимости).
#[allow(dead_code)]
pub const LLM_PLAN_SCHEMA_VERSION: u32 = 1;
/// System prompt: режим Fix-plan (один JSON, context_requests, план → подтверждение → применение).
@ -400,9 +423,14 @@ const REPAIR_PROMPT_PLAN_ACTIONS_MUST_BE_EMPTY: &str = r#"
Верни объект с "actions": [] и "summary" (диагноз + план шагов).
"#;
/// Компилирует JSON Schema для локальной валидации (один раз).
/// Компилирует JSON Schema для локальной валидации (v1 или v2 по protocol_version).
fn compiled_response_schema() -> Option<JSONSchema> {
let schema: serde_json::Value = serde_json::from_str(include_str!("../../config/llm_response_schema.json")).ok()?;
let raw = if protocol_version() == 2 {
SCHEMA_V2_RAW
} else {
SCHEMA_RAW
};
let schema: serde_json::Value = serde_json::from_str(raw).ok()?;
JSONSchema::options().compile(&schema).ok()
}
@ -778,7 +806,7 @@ pub async fn plan(
}
}
let system_prompt = get_system_prompt_for_mode();
let system_content = format!("{}{}\n\nLLM_PLAN_SCHEMA_VERSION={}", system_prompt, memory_block, LLM_PLAN_SCHEMA_VERSION);
let system_content = format!("{}{}\n\nLLM_PLAN_SCHEMA_VERSION={}", system_prompt, memory_block, current_schema_version());
let project_root = Path::new(path);
let base_context = context::gather_base_context(project_root, &mem);
@ -843,9 +871,14 @@ pub async fn plan(
.next()
.unwrap_or("unknown");
let schema_version = current_schema_version();
let response_format = if use_strict_json {
let schema_json: serde_json::Value = serde_json::from_str(include_str!("../../config/llm_response_schema.json"))
.unwrap_or_else(|_| serde_json::json!({}));
let raw = if schema_version == 2 {
SCHEMA_V2_RAW
} else {
SCHEMA_RAW
};
let schema_json: serde_json::Value = serde_json::from_str(raw).unwrap_or_else(|_| serde_json::json!({}));
Some(ResponseFormatJsonSchema {
ty: "json_schema".to_string(),
json_schema: ResponseFormatJsonSchemaInner {
@ -895,7 +928,7 @@ pub async fn plan(
"LLM_REQUEST_SENT",
&[
("model", model.trim().to_string()),
("schema_version", LLM_PLAN_SCHEMA_VERSION.to_string()),
("schema_version", schema_version.to_string()),
("strict_json", (!skip_response_format && use_strict_json).to_string()),
("provider", provider.to_string()),
("token_budget", max_tokens.to_string()),
@ -1098,7 +1131,7 @@ pub async fn plan(
let mut trace_val = serde_json::json!({
"trace_id": trace_id,
"event": "LLM_PLAN_OK",
"schema_version": LLM_PLAN_SCHEMA_VERSION,
"schema_version": current_schema_version(),
"model": model.trim(),
"provider": provider,
"actions_count": last_actions.len(),
@ -1142,8 +1175,8 @@ pub async fn plan(
#[cfg(test)]
mod tests {
use super::{
extract_files_read_from_plan_context, parse_actions_from_json, schema_hash, validate_actions,
validate_update_without_base, FIX_PLAN_SYSTEM_PROMPT, LLM_PLAN_SCHEMA_VERSION,
extract_files_read_from_plan_context, parse_actions_from_json, schema_hash, schema_hash_for_version,
validate_actions, validate_update_without_base, FIX_PLAN_SYSTEM_PROMPT, LLM_PLAN_SCHEMA_VERSION,
};
use crate::types::{Action, ActionKind};
use std::fs;
@ -1170,6 +1203,21 @@ mod tests {
assert!(system_content.contains("LLM_PLAN_SCHEMA_VERSION=1"));
}
#[test]
fn test_schema_v2_compiles() {
let schema: serde_json::Value =
serde_json::from_str(super::SCHEMA_V2_RAW).expect("v2 schema valid JSON");
let compiled = jsonschema::JSONSchema::options().compile(&schema);
assert!(compiled.is_ok(), "v2 schema must compile");
}
#[test]
fn test_schema_hash_non_empty_v2() {
let h = schema_hash_for_version(2);
assert!(!h.is_empty());
assert_eq!(h.len(), 64);
}
#[test]
fn test_validate_actions_empty() {
assert!(validate_actions(&[]).is_ok());