ml-knowledge-platform/knowledge_platform/feedback/logger.py
2026-02-16 04:50:51 -08:00

166 lines
4.8 KiB
Python

"""Feedback event logger for tracking corrections and interactions.
Logs corrections, low-confidence validations, and failed searches to enable
continuous improvement of the knowledge base and semantic validation models.
"""
from __future__ import annotations
import json
from dataclasses import asdict, dataclass
from datetime import datetime
from pathlib import Path
from typing import Any
@dataclass
class CorrectionEvent:
"""A correction made by the system."""
timestamp: str
original_content: str
corrected_content: str
changes: list[dict[str, str]]
confidence: float
conversation_id: str | None = None
@classmethod
def now(
cls,
original: str,
corrected: str,
changes: list[dict[str, str]],
confidence: float,
conversation_id: str | None = None,
) -> CorrectionEvent:
"""Create a correction event with current timestamp."""
return cls(
timestamp=datetime.now().isoformat(),
original_content=original,
corrected_content=corrected,
changes=changes,
confidence=confidence,
conversation_id=conversation_id,
)
@dataclass
class ValidationEvent:
"""A validation result with low confidence."""
timestamp: str
content: str
valid: bool
confidence: float
subjects: list[str] | None = None
conversation_id: str | None = None
@classmethod
def now(
cls,
content: str,
valid: bool,
confidence: float,
subjects: list[str] | None = None,
conversation_id: str | None = None,
) -> ValidationEvent:
"""Create a validation event with current timestamp."""
return cls(
timestamp=datetime.now().isoformat(),
content=content,
valid=valid,
confidence=confidence,
subjects=subjects,
conversation_id=conversation_id,
)
@dataclass
class SearchEvent:
"""A search query with low relevance results."""
timestamp: str
query: str
max_score: float
result_count: int
conversation_id: str | None = None
@classmethod
def now(
cls,
query: str,
max_score: float,
result_count: int,
conversation_id: str | None = None,
) -> SearchEvent:
"""Create a search event with current timestamp."""
return cls(
timestamp=datetime.now().isoformat(),
query=query,
max_score=max_score,
result_count=result_count,
conversation_id=conversation_id,
)
class FeedbackLogger:
"""Logger for user interaction feedback events.
Writes events to daily JSONL files organized by event type.
Files are stored at: {storage_dir}/{event_type}/{YYYYMMDD}.jsonl
"""
def __init__(self, storage_dir: Path | None = None) -> None:
"""Initialize feedback logger.
Args:
storage_dir: Base directory for feedback logs. Defaults to
~/.cache/crystal/feedback if not specified.
"""
if storage_dir is None:
storage_dir = Path.home() / ".cache" / "crystal" / "feedback"
self.storage_dir = Path(storage_dir)
self.storage_dir.mkdir(parents=True, exist_ok=True)
def log_correction(self, event: CorrectionEvent) -> None:
"""Log a correction event."""
self._write_event("corrections", event)
def log_validation(self, event: ValidationEvent) -> None:
"""Log a low-confidence validation event."""
self._write_event("validations", event)
def log_search(self, event: SearchEvent) -> None:
"""Log a low-relevance search event."""
self._write_event("searches", event)
def _write_event(self, event_type: str, event: Any) -> None:
"""Write an event to the appropriate daily JSONL file."""
# Parse timestamp to get date
if hasattr(event, "timestamp"):
dt = datetime.fromisoformat(event.timestamp)
date_str = dt.strftime("%Y%m%d")
else:
date_str = datetime.now().strftime("%Y%m%d")
# Ensure directory exists
type_dir = self.storage_dir / event_type
type_dir.mkdir(exist_ok=True)
# Append to daily file
log_file = type_dir / f"{date_str}.jsonl"
with open(log_file, "a", encoding="utf-8") as f:
json.dump(asdict(event), f, ensure_ascii=False)
f.write("\n")
def get_log_file(self, event_type: str, date_str: str) -> Path:
"""Get the path to a specific log file.
Args:
event_type: Type of event (e.g., "corrections", "validations")
date_str: Date in YYYYMMDD format
Returns:
Path to the log file (may not exist yet)
"""
return self.storage_dir / event_type / f"{date_str}.jsonl"