import asyncio
import json
import aiohttp
import redis.asyncio as redis
import re
import os
import time
import chromadb

# --- CONFIGURATION ---
MAX_CONCURRENT_OLLAMA_REQUESTS = 3 
ALLOW_TEXT_EMOTES = False

# =====================================================================
# 1. INITIALIZE VECTOR DATABASES (Lore & Episodic Memory)
# =====================================================================
print("Initializing ChromaDB Vector Databases...")
chroma_client = chromadb.PersistentClient(path="./asl_vectordb")

# The Lore Database
lore_collection = chroma_client.get_or_create_collection(name="world_lore")

# --- NEW: The Episodic Memory Database ---
memory_collection = chroma_client.get_or_create_collection(name="episodic_memories")
memory_queue = asyncio.Queue()

if os.path.exists("asl_lore.md"):
    if lore_collection.count() == 0:
        print("[VECTOR DB] Reading asl_lore.txt and vectorizing chunks...")
        with open("asl_lore.txt", "r", encoding="utf-8") as f:
            raw_lore = f.read()
        
        lore_chunks = [chunk.strip() for chunk in raw_lore.split('\n\n') if chunk.strip()]
        
        if lore_chunks:
            chunk_ids = [f"lore_{i}" for i in range(len(lore_chunks))]
            lore_collection.add(documents=lore_chunks, ids=chunk_ids)
            print(f"[VECTOR DB] Successfully stored {len(lore_chunks)} lore chunks!")
else:
    print("[WARNING] asl_lore.txt not found.")

semaphore = asyncio.Semaphore(MAX_CONCURRENT_OLLAMA_REQUESTS)
chat_memory = {}

# =====================================================================
# ---  BACKGROUND MEMORY SUMMARIZER (The "Dream State") ---
# =====================================================================
async def memory_summarizer_worker(session):
    print("[BACKGROUND] Memory Summarizer Worker is active.")
    while True:
        job = await memory_queue.get()
        session_id = job['session_id']
        player_name = job['player_name']
        npc_tag = job['npc_tag']
        chat_log = job['chat_log']

        prompt = f"Summarize the key events, facts, and the emotional tone of this conversation snippet between {player_name} and {npc_tag}. Keep it to 2 brief sentences in the past tense.\nConversation Log:\n{chat_log}"

        try:
            print(f"[MEMORY DB] Generating background memory for {player_name} and {npc_tag}...")
            # We use /api/generate here because we just want a raw text summary, not a JSON macro
            async with session.post('http://localhost:11434/api/generate', json={
                "model": "gemma4",
                "prompt": prompt,
                "stream": False,
                "options": {
                    "temperature": 0.1
                }
            }) as response:
                result = await response.json()
                summary = result['response'].strip()
                
                if summary:
                    doc_id = f"{session_id}_{int(time.time())}"
                    # We store the session_id as metadata so NPCs only recall their OWN memories with this specific player
                    memory_collection.add(
                        documents=[summary],
                        metadatas=[{"session_id": session_id}],
                        ids=[doc_id]
                    )
                    print(f"[MEMORY DB] Memory Saved: {summary}")

        except Exception as e:
            print(f"[MEMORY ERROR] Failed to summarize memory: {e}")
        
        memory_queue.task_done()
# =====================================================================


async def process_message(r, session, message_data):
    try:
        data = json.loads(message_data)
        player_name = data.get('player', data.get('target_player', 'Unknown'))
        npc_tag = data.get('npc_tag', 'UnknownNPC')
        message = data.get('message', '')
        
        if not ALLOW_TEXT_EMOTES:
            message = re.sub(r'\*.*?\*', '', message).strip()

        player_race = data.get('player_race', 'Unknown')
        player_alignment = data.get('player_alignment', 'Unknown')
        nearby_players = data.get('nearby_players', '')
        
        npc_persona = data.get('persona', 'You are a generic citizen.')
        npc_profession = data.get('profession', 'Commoner')
        npc_mood = data.get('mood', 'Neutral')
        npc_secret = data.get('secret', '')
        
        npc_alignment = data.get('npc_alignment', 'True Neutral')
        npc_gender = data.get('npc_gender', 'Unknown')
        npc_race = data.get('npc_race', 'Creature')
        npc_routine = data.get('npc_routine', '')
        
        player_state = data.get('player_state', 'Relaxed and unarmed.')
        world_state = data.get('world_state', 'Nothing of note is happening.')
        npc_health = data.get('npc_health', 'Healthy and uninjured.')
        relationship = data.get('relationship', 'Neutral or Friendly.')
        location_context = data.get('location_context', 'You are in a generic area.')

        group_context = f"Be aware that these other players are listening nearby: {nearby_players}." if nearby_players else ""
        secret_context = f"YOUR SECRET (Reveal only if players are persuasive): {npc_secret}" if npc_secret else ""
        routine_context = f"YOUR REQUIRED ROUTINE: {npc_routine}" if npc_routine else ""

        session_id = f"{player_name}_{npc_tag}"

        # =====================================================================
        # 2. THE DUAL RAG QUERY (Lore + Memories)
        # =====================================================================
        search_query = f"{location_context} {message}"
        retrieved_lore = "No specific local lore currently relevant."
        past_memories = ""

        # Fetch Lore
        if lore_collection.count() > 0:
            results = lore_collection.query(query_texts=[search_query], n_results=1)
            if results['documents'] and results['documents'][0]:
                retrieved_lore = f"- {results['documents'][0][0]}"

        # Fetch Episodic Memories (ONLY memories between this specific NPC and this specific Player)
        if memory_collection.count() > 0:
            mem_results = memory_collection.query(
                query_texts=[search_query],
                n_results=2,
                where={"session_id": session_id} 
            )
            if mem_results['documents'] and mem_results['documents'][0]:
                formatted_mems = "\n- ".join(mem_results['documents'][0])
                past_memories = f"\nPAST MEMORIES OF {player_name}:\n- {formatted_mems}"
        # =====================================================================

        dynamic_system_prompt = f"""

        {npc_persona}
        
        CURRENT STATUS & TRAITS:
        - Race & Gender: {npc_gender} {npc_race}
        - Profession: {npc_profession}
        - Alignment: {npc_alignment}
        - Conversational Charisma: Low/Gruff unless otherwise specified.
        - Current Mood: {npc_mood}
        - Current Physical State: {npc_health}
        {secret_context}
        {routine_context}
        
        CURRENT LOCATION: {location_context}
        
        RELEVANT WORLD KNOWLEDGE:
        {retrieved_lore}
        {past_memories}
        
        CURRENT WORLD RUMORS/EVENTS:
        {world_state}
        
        CURRENT TARGET: You are speaking to {player_name}, who is a {player_alignment} {player_race}. 
        Their physical state: {player_state}
        Relationship to you: {relationship}
        {group_context}
        React appropriately based on your personality, alignment, and mood.
        
        CRITICAL ENGINE RULES:
        Respond ONLY in valid JSON. You MUST use exactly these FIVE keys: "thought", "speech", "emotion", "action", and "action_target".
        
        ACTION RULE:
        Your "action" key MUST be exactly one of the following words:
        [WANDER, PATROL, FOLLOW, GUARD, GO_TO, INTERACT, USE_OBJECT, RETURN_TO_POST, ATTACK, REST, STEALTH, SEARCH, UNSTEALTH, PEACE, COMMAND]
        
       ACTION RULE:
        Your "action" key MUST be exactly one of the following words:
        [WANDER, PATROL, FOLLOW, GUARD, GO_TO, INTERACT, USE_OBJECT, RETURN_TO_POST, ATTACK, REST, STEALTH, SEARCH, UNSTEALTH, PEACE, COMMAND, CONVERSE]
        
        - Use CONVERSE if you want to initiate a back-and-forth dialogue with a standard, unintelligent NPC. You will write their response for them.

        YOUR RESPONSE MUST BE A SINGLE, VALID JSON OBJECT. YOU MUST USE THIS EXACT TEMPLATE:
        {{
            "thought": "Your internal reasoning here.",
            "speech": "What YOU say out loud.",
            "emotion": "MACRO WORD",
            "action": "MACRO WORD",
            "action_target": "Target name",
            "target_speech": "If action is CONVERSE, write what the target NPC replies back to you here. Otherwise, leave blank."
        }}
        """
        
        if session_id not in chat_memory:
            chat_memory[session_id] = [{"role": "system", "content": dynamic_system_prompt}]
        else:
            chat_memory[session_id][0] = {"role": "system", "content": dynamic_system_prompt}

        chat_memory[session_id].append({"role": "user", "content": f"{player_name} says: {message}"})

        # =====================================================================
        # --- THE MEMORY EXTRACTION TRIGGER ---
        # =====================================================================
        if len(chat_memory[session_id]) > 10:
            # Grab the 5 oldest conversation messages (skipping the system prompt at [0])
            messages_to_summarize = chat_memory[session_id][1:6]
            chat_log_str = "\n".join([m['content'] for m in messages_to_summarize])
            
            # Fire and forget: push it to the background queue!
            await memory_queue.put({
                'session_id': session_id,
                'player_name': player_name,
                'npc_tag': npc_tag,
                'chat_log': chat_log_str
            })

            # Slide the window to keep live generation fast
            chat_memory[session_id] = [chat_memory[session_id][0]] + chat_memory[session_id][-5:]
        # =====================================================================

        async with semaphore:
            print(f"[THINKING] Processing reply for {player_name}...")
            async with session.post('http://localhost:11434/api/chat', json={
                "model": "llama3",
                "messages": chat_memory[session_id],
                "format": "json",
                "stream": False,
                "options": {
                    "temperature": 0.2
                }
            }, timeout=45) as response:
                
                response.raise_for_status()
                result = await response.json()
                raw_reply_text = result['message']['content']

        try:
            agent_brain = json.loads(raw_reply_text)
            agent_brain = {k.lower(): v for k, v in agent_brain.items()}
            
            if "thought" not in agent_brain: agent_brain["thought"] = ""
            if "speech" not in agent_brain: agent_brain["speech"] = ""
            if "emotion" not in agent_brain: agent_brain["emotion"] = "NEUTRAL" 
            if "action" not in agent_brain: agent_brain["action"] = "GUARD"
            if "action_target" not in agent_brain: agent_brain["action_target"] = ""
            if "target_speech" not in agent_brain: agent_brain["target_speech"] = ""
            
            if not agent_brain["speech"].strip():
                agent_brain["speech"] = "*grunts quietly*"
            
            agent_brain["action_target"] = agent_brain["action_target"].replace("?", "").replace(".", "").strip()

            clean_reply_text = json.dumps(agent_brain)

        except json.JSONDecodeError:
            print(f"[WARNING] AI Hallucinated! Overriding with safe defaults.")
            clean_reply_text = json.dumps({
                "thought": "I lost my train of thought.",
                "speech": "*grunts quietly*",
                "emotion": "NEUTRAL",
                "action": "WANDER",
                "action_target": ""
            })

        print(f"[REPLY] from {npc_tag} to {player_name}: {clean_reply_text}")
        chat_memory[session_id].append({"role": "assistant", "content": clean_reply_text})

        reply_payload = {
            "npc_tag": npc_tag,
            "target_player": player_name, 
            "reply": clean_reply_text
        }
        await r.rpush('llm_to_nwn', json.dumps(reply_payload))

    except Exception as e:
        print(f"[ERROR] Failed to process message: {e}")

async def main():
    print("Initializing Async Redis Bridge...")
    r = redis.Redis(host='127.0.0.1', port=6380, decode_responses=True)

    try:
        await r.ping()
        print("SUCCESS: Connected to the Docker Redis database!")
    except Exception as e:
        print(f"CRITICAL ERROR: Could not connect to Redis. {e}")
        return

    print(f"Ready! Listening for game messages. Max GPU concurrency: {MAX_CONCURRENT_OLLAMA_REQUESTS}")

    async with aiohttp.ClientSession() as session:
        # --- NEW: Start the background memory worker ---
        asyncio.create_task(memory_summarizer_worker(session))

        while True:
            try:
                result = await r.blpop('nwn_to_llm')
                if result:
                    queue_name, message_data = result
                    asyncio.create_task(process_message(r, session, message_data))
            except Exception as e:
                print(f"[LOOP ERROR] {e}")
                await asyncio.sleep(1)

if __name__ == "__main__":
    asyncio.run(main())