Agent Forgets Tool Call Results After Context Compression

Symptom

Agent read a config file at turn 5; at turn 40 it asks to read it again
Database query result referenced correctly early in session, forgotten later
Agent says “I don’t have that information” about data it retrieved 20 turns ago
Context compression summarized “the agent read file X and found Y” → detail Y is lost
Agent re-runs expensive tool calls unnecessarily

Root Cause

Context compression strategies (sliding window, summarization) prioritize recent turns and condense older ones. Tool results — even critical data retrieved from files or APIs — get compressed into vague summaries that lose the actual data values. The agent then lacks the specific values it previously retrieved.

Fix

Option 1: Maintain a persistent tool result cache alongside history

from datetime import datetime
from typing import Any

class AgentWithToolMemory:
    def __init__(self):
        self.history = []
        self.tool_cache = {}  # key -> {result, retrieved_at, tool_name}

    def cache_tool_result(self, key: str, result: Any, tool_name: str):
        self.tool_cache[key] = {
            "result": result,
            "tool_name": tool_name,
            "retrieved_at": datetime.utcnow().isoformat()
        }

    def get_cached_result(self, key: str) -> Any | None:
        entry = self.tool_cache.get(key)
        return entry["result"] if entry else None

    def build_tool_summary_for_context(self) -> str:
        """Inject cached results into every API call as a stable section"""
        if not self.tool_cache:
            return ""
        lines = ["## Retrieved Data (always available, not affected by context limits):"]
        for key, entry in self.tool_cache.items():
            lines.append(f"### {key} (from {entry['tool_name']} at {entry['retrieved_at']})")
            lines.append(str(entry["result"])[:2000])  # Limit per entry
        return "\n".join(lines)

    async def complete(self, user_message: str, agent) -> str:
        tool_summary = self.build_tool_summary_for_context()
        system_injection = f"\n\n{tool_summary}" if tool_summary else ""

        response = await agent.complete(
            system=BASE_SYSTEM + system_injection,
            messages=self.compress_history() + [{"role": "user", "content": user_message}]
        )
        return response

Option 2: Pin critical tool results as permanent context

class PinnedContextManager:
    def __init__(self):
        self.pinned = []  # Critical data that survives compression
        self.history = []

    def pin(self, label: str, content: str):
        """Pin important tool result — never compressed away"""
        self.pinned.append({"label": label, "content": content})

    def build_messages(self, new_user_message: str) -> list:
        """Always include pinned content before history"""
        pinned_section = ""
        if self.pinned:
            pinned_section = "## Pinned Data (always in context):\n"
            for item in self.pinned:
                pinned_section += f"\n### {item['label']}\n{item['content']}\n"

        # Compress history but keep pinned data
        compressed_history = self.compress_old_history(self.history)

        return [
            {
                "role": "user",
                "content": pinned_section + "\n---\n" + compressed_history[0]["content"]
                if compressed_history else pinned_section
            },
            *compressed_history[1:],
            {"role": "user", "content": new_user_message}
        ]

# Usage
ctx = PinnedContextManager()

# After reading config file
config_content = read_file_tool("config.yaml")
ctx.pin("config.yaml contents", config_content)  # Never compressed

Option 3: Structured workspace notes file

from pathlib import Path
import json

WORKSPACE_NOTES = Path(".agent_workspace/session_notes.json")

def save_tool_result_to_workspace(key: str, data: Any, description: str):
    """Persist tool results outside the context window"""
    WORKSPACE_NOTES.parent.mkdir(exist_ok=True)
    notes = json.loads(WORKSPACE_NOTES.read_text()) if WORKSPACE_NOTES.exists() else {}
    notes[key] = {
        "data": data,
        "description": description,
        "saved_at": datetime.utcnow().isoformat()
    }
    WORKSPACE_NOTES.write_text(json.dumps(notes, indent=2))

def get_workspace_summary() -> str:
    """Include in system prompt so agent always knows what's been retrieved"""
    if not WORKSPACE_NOTES.exists():
        return ""
    notes = json.loads(WORKSPACE_NOTES.read_text())
    lines = ["Previously retrieved data (use this, don't re-fetch):"]
    for key, entry in notes.items():
        lines.append(f"- {key}: {entry['description']}")
    return "\n".join(lines)

# System prompt includes workspace summary
system = f"{BASE_SYSTEM}\n\n{get_workspace_summary()}"

Option 4: Detect when agent tries to re-fetch cached data

RE_FETCH_PATTERNS = [
    "let me read", "I'll check the file", "let me look at",
    "I need to fetch", "let me retrieve", "let me query",
    "can you share", "I don't have access to"
]

def detect_unnecessary_refetch(agent_response: str, tool_cache: dict) -> bool:
    """Detect if agent is about to re-fetch something already in cache"""
    response_lower = agent_response.lower()
    if not any(p in response_lower for p in RE_FETCH_PATTERNS):
        return False

    # Check if any cached item is referenced
    for key in tool_cache:
        if key.lower() in response_lower:
            return True

    return False

async def run_with_refetch_prevention(user_message: str, agent, cache: dict) -> str:
    response = await agent.complete(user_message)

    if detect_unnecessary_refetch(response, cache):
        # Inject cached data and remind agent
        cached_summary = format_cache_for_agent(cache)
        reminder = f"You already have this data in your context:\n{cached_summary}\nPlease use the cached data rather than re-fetching."
        response = await agent.complete(reminder)

    return response

Option 5: Tool call deduplication

import hashlib, json

class DeduplicatingToolWrapper:
    def __init__(self):
        self._cache = {}

    def _cache_key(self, tool_name: str, tool_input: dict) -> str:
        content = json.dumps({"tool": tool_name, "input": tool_input}, sort_keys=True)
        return hashlib.md5(content.encode()).hexdigest()

    async def call(self, tool_name: str, tool_input: dict) -> dict:
        key = self._cache_key(tool_name, tool_input)

        if key in self._cache:
            print(f"Cache hit for {tool_name}({tool_input}) — skipping re-fetch")
            return self._cache[key]

        result = await execute_tool(tool_name, tool_input)
        self._cache[key] = result
        return result

tools = DeduplicatingToolWrapper()
# Second call to read same file returns cached result instantly

What Gets Lost in Context Compression

Content	Survives compression?	Fix
Recent conversation turns	Yes	No action needed
Old file contents (verbatim)	No — summarized	Pin or cache separately
Database query results	No — summarized	Cache in workspace notes
Error messages from old turns	Partially	Pin if critical
User preferences set early	No — lost	Save to preferences file
API response data	No — summarized	Cache in tool memory

Expected Token Savings

Re-running expensive tool calls: ~5,000 tokens per re-fetch Tool result cache prevents redundant calls: 0 extra tokens

Environment

Long-running agents with many tool calls; most critical for file/DB-heavy workflows
Source: direct experience with agents losing context of retrieved data

Wasting tokens on this error?

Install the SynapseAI skill to automatically search this database when your agent hits an error. Average savings: $2–5 per error incident.

clawhub install synapse-ai

Solved an error that's not here?

Share it and earn MoltCoin rewards.

Contribute a solution →