#!/usr/bin/env python3
"""
RAG Query - Ask questions about your emails
Retrieves relevant emails + sends to LLM for natural language answer

Usage:
    python3 rag_query.py "What supplier delivery issues should I worry about?"
"""

import os
import sys
import warnings
warnings.filterwarnings('ignore')

# Config
CHROMA_PATH = "/output/chroma"
OLLAMA_HOST = "http://localhost:11434"
MODEL = "qwen3.5:9b-q4_K_M"  # Use a good text model for reasoning

def rag_query(question: str, limit: int = 10):
    from sentence_transformers import SentenceTransformer
    import chromadb
    import httpx
    
    print(f"Question: {question}\n")
    
    # Load embedding model
    print("[1/4] Loading embedding model...")
    os.environ.setdefault('OMP_NUM_THREADS', '4')
    model = SentenceTransformer("BAAI/bge-small-en-v1.5", device='cpu')
    
    # Load ChromaDB
    print("[2/4] Searching knowledge base...")
    client = chromadb.PersistentClient(path=CHROMA_PATH)
    collection = client.get_collection("emails")
    
    # Embed question
    q_emb = model.encode([question])
    
    # Search
    results = collection.query(
        query_embeddings=q_emb.tolist(),
        n_results=limit
    )
    
    docs = results['documents'][0]
    metas = results['metadatas'][0]
    
    print(f"[3/4] Found {len(docs)} relevant emails")
    
    # Build context
    context = ""
    for i, (doc, meta) in enumerate(zip(docs, metas)):
        context += f"\n--- Email {i+1} ---\n"
        context += f"From: {meta.get('sender', 'N/A')}\n"
        context += f"Subject: {meta.get('subject', 'N/A')}\n"
        context += f"Date: {meta.get('date', 'N/A')}\n"
        context += f"Category: {meta.get('category', 'N/A')}\n"
        context += f"Content:\n{doc[:800]}\n"
    
    # Send to LLM
    print(f"[4/4] Asking Ollama ({MODEL})...")
    
    prompt = f"""You are a helpful assistant analyzing a user's business emails.

Based on the following retrieved emails, answer the user's question.

Include specific details from the emails (names, dates, amounts, issues) when relevant.
If the emails don't contain enough information to answer, say so.

USER QUESTION: {question}

RETRIEVED EMAILS:
{context}

Provide a clear, concise answer based on the emails above."""

    try:
        response = httpx.post(
            f"{OLLAMA_HOST}/api/chat",
            json={
                "model": MODEL,
                "messages": [{"role": "user", "content": prompt}],
                "stream": False,
                "options": {"temperature": 0.3, "num_ctx": 8192}
            },
            timeout=120
        )
        
        if response.status_code == 200:
            answer = response.json()['message']['content']
            print("\n" + "=" * 70)
            print("ANSWER")
            print("=" * 70)
            print(answer)
            return answer
        else:
            print(f"Error: {response.status_code}")
            
    except Exception as e:
        print(f"LLM Error: {e}")
        print("\nFalling back to raw results...\n")
        for i, (doc, meta) in enumerate(zip(docs, metas)):
            print(f"\n[{i+1}] {meta.get('subject', 'N/A')}")
            print(f"    From: {meta.get('sender', 'N/A')}")
            print(f"    Preview: {doc[:200]}...")

def main():
    if len(sys.argv) < 2:
        print("Usage: python3 rag_query.py \"Your question here\"")
        print("Example: python3 rag_query.py \"What supplier delivery issues should I worry about?\"")
        sys.exit(1)
    
    question = " ".join(sys.argv[1:])
    rag_query(question)

if __name__ == "__main__":
    main()
