From c7ef8dfb0b4589d6e481ea593304744c3e74fcde Mon Sep 17 00:00:00 2001 From: Lilith Date: Fri, 2 Jan 2026 06:25:21 -0800 Subject: [PATCH] =?UTF-8?q?=F0=9F=93=9D=20Add=20ML=20feature=20endpoints?= =?UTF-8?q?=20documentation?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Document suggested replies, conversation memory, style learning, and message triage APIs in both API.md and ML service README. 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 --- features/conversation-assistant/docs/API.md | 180 ++++++++ .../ml-service/README.md | 397 +++++++++++++++++- 2 files changed, 573 insertions(+), 4 deletions(-) diff --git a/features/conversation-assistant/docs/API.md b/features/conversation-assistant/docs/API.md index c92fea918..ee4611dea 100644 --- a/features/conversation-assistant/docs/API.md +++ b/features/conversation-assistant/docs/API.md @@ -406,6 +406,186 @@ DELETE /cache?pattern=* --- +### Suggested Replies + +Generate themed response options. + +```http +POST /suggestions +Content-Type: application/json + +{ + "conversation_id": "conv-123", + "messages": [ + {"role": "user", "content": "Hey, are you free Saturday?"} + ], + "count": 8, + "themes": ["casual", "brief", "empathetic"] +} +``` + +**Response:** +```json +{ + "request_id": "uuid", + "conversation_id": "conv-123", + "options": [ + {"text": "Yes! What did you have in mind?", "descriptor": "Enthusiastic", "theme": "casual", "confidence": 0.92, "quality_score": 0.88} + ], + "has_more": true, + "total_count": 8 +} +``` + +Get remaining suggestions: +```http +GET /suggestions/more/:request_id +``` + +--- + +### Conversation Memory + +Store and recall conversations via semantic search. + +#### Store Memory +```http +POST /memory/store +Content-Type: application/json + +{ + "user_id": "user-123", + "contact_id": "contact-456", + "conversation_id": "conv-789", + "messages": [{"role": "user", "content": "How was the concert?"}], + "summary": null +} +``` + +#### Recall Memories +```http +POST /memory/recall +Content-Type: application/json + +{ + "user_id": "user-123", + "contact_id": "contact-456", + "query": "concert last month", + "top_k": 3 +} +``` + +#### Inject Memories +```http +POST /memory/inject +Content-Type: application/json + +{ + "messages": [...], + "memories": [...] +} +``` + +#### Other Memory Endpoints +```http +GET /memory/stats +DELETE /memory/:memory_id +``` + +--- + +### Style Learning + +Learn and apply user communication styles. + +#### Learn Style +```http +POST /style/learn +Content-Type: application/json + +{ + "user_id": "user-123", + "contact_id": "contact-456", + "samples": [ + {"input": "How are you?", "output": "Good! You?"} + ] +} +``` + +#### Get Style Profile +```http +GET /style/:user_id/:contact_id +``` + +#### Apply Style +```http +POST /style/apply +Content-Type: application/json + +{ + "user_id": "user-123", + "contact_id": "contact-456", + "response": "I am doing well, thank you for asking.", + "use_llm": false +} +``` + +#### Delete Style Profile +```http +DELETE /style/:user_id/:contact_id +``` + +--- + +### Message Triage + +Score message urgency and classify intent. + +#### Triage Single Message +```http +POST /triage +Content-Type: application/json + +{ + "message": "Hey, can you call me ASAP?", + "contact_classification": "friend", + "message_id": "msg-123" +} +``` + +**Response:** +```json +{ + "urgency_score": 0.85, + "adjusted_urgency": 0.90, + "priority": "urgent", + "intent": "request", + "emotional_tone": "concerned", + "suggested_response_time": "immediate", + "is_urgent": true, + "needs_action": true +} +``` + +Contact Classifications: `friend`, `family`, `work`, `acquaintance`, `unknown` + +Priority Levels: `urgent`, `time-sensitive`, `routine`, `low` + +#### Batch Triage +```http +POST /triage/batch +Content-Type: application/json + +{ + "messages": [ + {"message": "Hey!", "contact_classification": "friend"}, + {"message": "URGENT!", "contact_classification": "work"} + ] +} +``` + +--- + ## Error Responses All endpoints return errors in this format: diff --git a/features/conversation-assistant/ml-service/README.md b/features/conversation-assistant/ml-service/README.md index d7c216a05..08d278582 100644 --- a/features/conversation-assistant/ml-service/README.md +++ b/features/conversation-assistant/ml-service/README.md @@ -1,6 +1,6 @@ # Conversation Assistant ML Service -FastAPI-based ML inference service with LoRA fine-tuning, Redis caching, and model hot-swapping. +FastAPI-based ML inference service with intelligent response generation, conversation memory, style adaptation, and message triage. ## Architecture @@ -8,18 +8,25 @@ FastAPI-based ML inference service with LoRA fine-tuning, Redis caching, and mod ┌─────────────────────────────────────────────────────────────┐ │ ML Service (Port 8100) │ ├─────────────────────────────────────────────────────────────┤ -│ FastAPI Application │ +│ Core Endpoints │ │ ├── /generate - Sync text generation │ │ ├── /generate/async - Async job queue │ │ ├── /training/start - Start LoRA fine-tuning │ -│ ├── /training/status - Training progress │ │ ├── /model/deploy - Hot-swap trained model │ │ └── /health - Health status │ ├─────────────────────────────────────────────────────────────┤ +│ ML Feature Endpoints │ +│ ├── /suggestions - Multi-option response generation │ +│ ├── /memory/* - Conversation memory (RAG) │ +│ ├── /style/* - Style learning & adaptation │ +│ └── /triage - Message urgency scoring │ +├─────────────────────────────────────────────────────────────┤ │ Components │ │ ├── LLM Manager - GGUF model loading (llama-cpp) │ │ ├── LoRA Trainer - QLoRA fine-tuning (peft/trl) │ -│ ├── GGUF Converter - HuggingFace → GGUF │ +│ ├── Memory Store - Redis VSS + nomic-embed │ +│ ├── Style Adapter - Per-contact style profiles │ +│ ├── Intent Classifier - Message understanding │ │ └── Redis Client - Caching + job queuing │ └─────────────────────────────────────────────────────────────┘ ``` @@ -155,6 +162,388 @@ GET /generate/status/{job_id} } ``` +--- + +## ML Feature Endpoints + +### Suggested Replies + +Generate themed response options for conversations. + +#### Generate Suggestions + +``` +POST /suggestions +``` + +Generate multiple suggested response options with themes. + +**Request:** +```json +{ + "conversation_id": "conv-123", + "messages": [ + {"role": "user", "content": "Hey, are you free Saturday?", "timestamp": "2024-12-28T10:00:00Z"} + ], + "count": 8, + "themes": ["casual", "brief", "empathetic"] +} +``` + +**Response:** +```json +{ + "request_id": "req-uuid", + "conversation_id": "conv-123", + "options": [ + { + "text": "Yes! What did you have in mind?", + "descriptor": "Enthusiastic", + "theme": "casual", + "confidence": 0.92, + "quality_score": 0.88 + } + ], + "has_more": true, + "total_count": 8 +} +``` + +#### Get More Suggestions + +``` +GET /suggestions/more/{request_id} +``` + +Retrieve remaining suggestions from a previous generation. + +**Response:** +```json +{ + "options": [ + { + "text": "Let me check my calendar", + "descriptor": "Noncommittal", + "theme": "brief", + "confidence": 0.85, + "quality_score": 0.82 + } + ] +} +``` + +--- + +### Conversation Memory (RAG) + +Store and recall past conversations via semantic similarity. + +#### Store Memory + +``` +POST /memory/store +``` + +Store a conversation segment with auto-summarization. + +**Request:** +```json +{ + "user_id": "user-123", + "contact_id": "contact-456", + "conversation_id": "conv-789", + "messages": [ + {"role": "user", "content": "How was the concert?"}, + {"role": "assistant", "content": "It was amazing! The opening act was great."} + ], + "summary": null, + "metadata": {"event": "concert-discussion"} +} +``` + +**Response:** +```json +{ + "memory_id": "mem-uuid", + "summary": "Discussion about a concert, positive feedback about the opening act.", + "stored_at": "2024-12-28T10:00:00Z" +} +``` + +#### Recall Memories + +``` +POST /memory/recall +``` + +Retrieve relevant past conversations via semantic search. + +**Request:** +```json +{ + "user_id": "user-123", + "contact_id": "contact-456", + "query": "concert last month", + "top_k": 3 +} +``` + +**Response:** +```json +{ + "memories": [ + { + "memory_id": "mem-uuid", + "user_id": "user-123", + "contact_id": "contact-456", + "summary": "Discussion about a concert...", + "similarity_score": 0.87, + "stored_at": "2024-12-28T10:00:00Z", + "messages": [...], + "metadata": {} + } + ], + "query": "concert last month", + "total_found": 1, + "search_time_ms": 42.5 +} +``` + +#### Inject Memories + +``` +POST /memory/inject +``` + +Inject recalled memories into conversation context. + +**Request:** +```json +{ + "messages": [ + {"role": "user", "content": "Remember that concert?"} + ], + "memories": [...] +} +``` + +**Response:** +```json +{ + "messages": [ + {"role": "system", "content": "# Relevant Past Conversations..."}, + {"role": "user", "content": "Remember that concert?"} + ], + "injected_count": 2 +} +``` + +#### Get Memory Stats + +``` +GET /memory/stats +``` + +Get memory store statistics. + +**Response:** +```json +{ + "total_memories": 150, + "unique_users": 3, + "unique_contacts": 12, + "index_size_bytes": 1048576, + "oldest_memory": "2024-01-01T00:00:00Z", + "newest_memory": "2024-12-28T10:00:00Z" +} +``` + +#### Delete Memory + +``` +DELETE /memory/{memory_id} +``` + +Delete a specific memory. + +**Response:** +```json +{ + "deleted": true +} +``` + +--- + +### Style Learning & Adaptation + +Learn and apply user communication styles. + +#### Learn Style + +``` +POST /style/learn +``` + +Learn style from training samples. + +**Request:** +```json +{ + "user_id": "user-123", + "contact_id": "contact-456", + "samples": [ + {"input": "How are you?", "output": "Good! You?"}, + {"input": "Meeting tomorrow?", "output": "yep, see you there"} + ] +} +``` + +**Response:** +```json +{ + "formality": 0.3, + "emoji_usage": false, + "avg_length": 12, + "punctuation_style": "minimal", + "capitalization": "lowercase", + "common_phrases": ["yep", "sounds good"], + "contraction_preference": 0.8, + "response_brevity": 0.7, + "samples_count": 2 +} +``` + +#### Get Style Profile + +``` +GET /style/{user_id}/{contact_id} +``` + +Retrieve stored style profile. + +**Response:** Same as Learn Style response. + +#### Apply Style + +``` +POST /style/apply +``` + +Apply learned style to a response. + +**Request:** +```json +{ + "user_id": "user-123", + "contact_id": "contact-456", + "response": "I am doing well, thank you for asking.", + "use_llm": false +} +``` + +**Response:** +```json +{ + "styled_response": "good! you?", + "original_response": "I am doing well, thank you for asking.", + "profile_used": {...} +} +``` + +#### Delete Style Profile + +``` +DELETE /style/{user_id}/{contact_id} +``` + +Delete a style profile. + +**Response:** +```json +{ + "deleted": true +} +``` + +--- + +### Message Triage + +Score message urgency and classify intent. + +#### Triage Single Message + +``` +POST /triage +``` + +**Request:** +```json +{ + "message": "Hey, can you call me ASAP? It's urgent!", + "contact_classification": "friend", + "message_id": "msg-123" +} +``` + +**Response:** +```json +{ + "urgency_score": 0.85, + "adjusted_urgency": 0.90, + "priority": "urgent", + "intent": "request", + "emotional_tone": "concerned", + "topic": "personal", + "suggested_response_style": "empathetic", + "suggested_response_time": "immediate", + "confidence_overall": 0.88, + "raw_message": "Hey, can you call me ASAP? It's urgent!", + "message_id": "msg-123", + "is_urgent": true, + "needs_action": true, + "is_positive": false, + "is_negative": false +} +``` + +**Contact Classifications:** `friend`, `family`, `work`, `acquaintance`, `unknown` + +**Priority Levels:** +- `urgent` - Urgency >= 0.8, respond immediately +- `time-sensitive` - Urgency >= 0.6, respond within hour +- `routine` - Urgency >= 0.3, respond today +- `low` - Urgency < 0.3, respond whenever + +#### Batch Triage + +``` +POST /triage/batch +``` + +Triage multiple messages, returns sorted by urgency. + +**Request:** +```json +{ + "messages": [ + {"message": "Hey!", "contact_classification": "friend"}, + {"message": "URGENT: Server is down!", "contact_classification": "work"} + ] +} +``` + +**Response:** +```json +{ + "results": [...], + "total": 2 +} +``` + +--- + ## LoRA Fine-Tuning ### Training Pipeline