ml-knowledge-platform/knowledge_platform/tools/builtin/diff.py
2026-02-16 04:50:51 -08:00

143 lines
4.8 KiB
Python

"""DiffTool — compare two files or show changes between strings.
Produces unified diff output for reviewing changes before or after edits.
"""
from __future__ import annotations
import difflib
from pathlib import Path
from typing import Any, ClassVar
from ..base import Tool, ToolParameter, ToolResult
class DiffTool(Tool):
"""Compare two files and produce a unified diff.
Can compare two file paths, or show what changed in a file
by providing the file path and a ``new_content`` string.
"""
name: ClassVar[str] = "diff"
description: ClassVar[str] = (
"Compare two files or a file against new content. "
"Produces unified diff output showing additions and removals."
)
parameters: ClassVar[list[ToolParameter]] = [
ToolParameter(
name="file_a",
type="string",
description="Absolute path to the first file (or the original file)",
),
ToolParameter(
name="file_b",
type="string",
description=(
"Absolute path to the second file to compare against. "
"Mutually exclusive with new_content."
),
required=False,
),
ToolParameter(
name="new_content",
type="string",
description=(
"New content string to compare against file_a. "
"Mutually exclusive with file_b."
),
required=False,
),
ToolParameter(
name="context_lines",
type="integer",
description="Number of context lines around changes (default 3)",
required=False,
default=3,
),
]
async def execute(self, **kwargs: Any) -> ToolResult:
file_a = Path(kwargs["file_a"])
file_b_str: str | None = kwargs.get("file_b")
new_content: str | None = kwargs.get("new_content")
context_lines: int = kwargs.get("context_lines", 3)
if not file_a.is_absolute():
return ToolResult.fail(f"file_a must be absolute: {file_a}")
if file_b_str is not None and new_content is not None:
return ToolResult.fail(
"Provide either file_b or new_content, not both."
)
if file_b_str is None and new_content is None:
return ToolResult.fail(
"Provide either file_b (path to second file) or "
"new_content (string to compare against)."
)
# Read file_a
if not file_a.exists():
return ToolResult.fail(f"File not found: {file_a}")
if not file_a.is_file():
return ToolResult.fail(f"Not a file: {file_a}")
try:
content_a = file_a.read_text(encoding="utf-8")
except UnicodeDecodeError:
return ToolResult.fail(f"Cannot read binary file: {file_a}")
except PermissionError:
return ToolResult.fail(f"Permission denied: {file_a}")
# Determine content_b and label
if file_b_str is not None:
file_b = Path(file_b_str)
if not file_b.is_absolute():
return ToolResult.fail(f"file_b must be absolute: {file_b}")
if not file_b.exists():
return ToolResult.fail(f"File not found: {file_b}")
if not file_b.is_file():
return ToolResult.fail(f"Not a file: {file_b}")
try:
content_b = file_b.read_text(encoding="utf-8")
except UnicodeDecodeError:
return ToolResult.fail(f"Cannot read binary file: {file_b}")
except PermissionError:
return ToolResult.fail(f"Permission denied: {file_b}")
label_a = str(file_a)
label_b = str(file_b)
else:
content_b = new_content # type: ignore[assignment]
label_a = str(file_a)
label_b = f"{file_a} (proposed)"
lines_a = content_a.splitlines(keepends=True)
lines_b = content_b.splitlines(keepends=True)
diff_lines = list(difflib.unified_diff(
lines_a,
lines_b,
fromfile=label_a,
tofile=label_b,
n=context_lines,
))
if not diff_lines:
return ToolResult.success(
"Files are identical.",
changes=0,
)
# Count additions and removals (skip header lines)
additions = sum(1 for l in diff_lines if l.startswith("+") and not l.startswith("+++"))
removals = sum(1 for l in diff_lines if l.startswith("-") and not l.startswith("---"))
output = "".join(diff_lines)
return ToolResult.success(
output,
additions=additions,
removals=removals,
changes=additions + removals,
)