if session_id not in chat_memory:
chat_memory[session_id] = [{"role": "system", "content": NPC_SYSTEM_PROMPT}]
- # 3. Add the player's new message to their specific history
- print(f"\n[RECEIVED] {player_name} -> {npc_tag}: '{message}'")
+ # 3. Add the player's new message
chat_memory[session_id].append({"role": "user", "content": f"{player_name} says: {message}"})
+ # --- THE SLIDING WINDOW FIX ---
+ # If the memory gets longer than 11 messages (1 system prompt + 10 chat messages)
+ if len(chat_memory[session_id]) > 11:
+ # Keep the system prompt at index [0], and grab the 10 most recent messages
+ chat_memory[session_id] = [chat_memory[session_id][0]] + chat_memory[session_id][-10:]
+
# 4. The Semaphore Bouncer: Wait in line if the GPU is currently full
async with semaphore:
print(f"[THINKING] Processing reply for {player_name}...")