Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 15 additions & 2 deletions strands-command/scripts/python/agent_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from strands.agent.conversation_manager import SlidingWindowConversationManager
from strands.session import S3SessionManager
from strands.models import BedrockModel, CacheConfig
from strands.vended_plugins.context_offloader import ContextOffloader, S3Storage
from botocore.config import Config

from strands_tools import http_request, shell
Expand Down Expand Up @@ -229,17 +230,29 @@ def run_agent(query: str):
else:
raise ValueError("Both SESSION_ID and S3_SESSION_BUCKET must be set")

# Offload oversized tool results (large PR diffs, file reads, shell output)
# to storage instead of letting them crowd out the context window. The
# plugin registers a retrieval tool so the agent can fetch the full content
# on demand. Use S3 storage rather than in-memory: the offloaded result is
# replaced in the conversation with a storage reference that the session
# manager persists, so a resumed session must still be able to resolve that
# reference in a later process — which in-memory storage could not.
context_offloader = ContextOffloader(
storage=S3Storage(bucket=s3_bucket, prefix=f"{s3_prefix}/offload"),
)

# Create agent with optional trace attributes for Langfuse
agent_kwargs = {
"model": model,
"system_prompt": system_prompt,
"tools": tools,
"session_manager": session_manager,
"plugins": [context_offloader],
}

if trace_attributes:
agent_kwargs["trace_attributes"] = trace_attributes

agent = Agent(**agent_kwargs)

print("Processing user query...")
Expand Down
92 changes: 72 additions & 20 deletions strands-command/scripts/python/github_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,61 @@ def _github_request(
return f"Error {e!s}"


# Max pages to follow when fetching a paginated list endpoint, as a safety bound
# (100 items/page x 30 pages = 3000 items) against an unbounded follow loop.
_MAX_LIST_PAGES = 30


def _github_get_all_pages(endpoint: str, repo: str | None = None) -> list[dict[str, Any]] | str:
"""GET a paginated list endpoint, following Link rel="next" until exhausted.

GitHub list endpoints (e.g. a PR's files) return at most 100 items per page,
so reading a single response silently drops the rest. This follows the
pagination links and concatenates every page.

Args:
endpoint: API endpoint path (e.g., "pulls/123/files")
repo: Repository in "owner/repo" format

Returns:
The combined list of items, or an error string on failure.
"""
if repo is None:
repo = os.environ.get("GITHUB_REPOSITORY")
if not repo:
return "Error: GITHUB_REPOSITORY environment variable not found"

token = os.environ.get("GITHUB_TOKEN", "")
if not token:
return "Error: GITHUB_TOKEN environment variable not found"

headers = {
"Authorization": f"Bearer {token}",
"Accept": "application/vnd.github.v3+json",
}
url: str | None = f"https://api.github.com/repos/{repo}/{endpoint}"
params: dict | None = {"per_page": 100}
items: list[dict[str, Any]] = []

try:
for _ in range(_MAX_LIST_PAGES):
response = requests.get(url, headers=headers, params=params, timeout=30)
response.raise_for_status()
page = response.json()
if not isinstance(page, list):
return f"Error: expected a list from {endpoint}, got {type(page).__name__}"
items.extend(page)
# The "next" link already carries the page/per_page query params.
url = response.links.get("next", {}).get("url")
params = None
if not url:
return items
# Hit the page cap; return what we have rather than looping unbounded.
return items
except Exception as e:
return f"Error {e!s}"


def check_should_call_write_api_or_record(func):
"""Decorator that checks if a write api should be called, or if the tool should record to JSONL."""
@wraps(func)
Expand Down Expand Up @@ -522,38 +577,35 @@ def get_pr_files(pr_number: int, repo: str | None = None) -> str:
Returns:
Formatted list of changed files with their diffs
"""
result = _github_request("GET", f"pulls/{pr_number}/files", repo)
result = _github_get_all_pages(f"pulls/{pr_number}/files", repo)
if isinstance(result, str):
console.print(Panel(escape(result), title="[bold red]Error", border_style="red"))
return result

output = f"Files changed in PR #{pr_number}:\n\n"


# Return each file's full patch. Large results are managed centrally by the
# ContextOffloader plugin (configured on the agent), which offloads oversized
# tool results to storage and gives the agent a tool to retrieve them on
# demand -- so this function does not truncate diffs itself.
for file in result:
filename = file['filename']
status = file['status']
additions = file['additions']
deletions = file['deletions']
changes = file['changes']

output += f"📄 **{filename}** ({status})\n"
output += f" +{additions} -{deletions} (~{changes} changes)\n"

if file.get('patch'):
output += f" Diff:\n"
# Limit diff size to avoid overwhelming output
patch_lines = file['patch'].split('\n')
if len(patch_lines) > 50:
output += '\n'.join(patch_lines[:50])
output += f"\n ... (truncated, {len(patch_lines) - 50} more lines)\n"
else:
output += file['patch']
output += "\n"

header = (
f"📄 **{filename}** ({status})\n"
f" +{additions} -{deletions} (~{changes} changes)\n"
)
patch = file.get('patch')

if patch:
output += header + " Diff:\n" + patch + "\n\n"
else:
output += " (Binary file or no diff available)\n"

output += "\n"

output += header + " (Binary file or no diff available)\n\n"

console.print(Panel(escape(output), title=f"[bold green]PR #{pr_number} Files", border_style="blue"))
return output

Expand Down
126 changes: 126 additions & 0 deletions strands-command/scripts/python/tests/test_github_tools.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
"""Tests for github_tools PR helpers."""
from unittest.mock import patch

import requests

from .. import github_tools


def test_get_pr_files_returns_full_diff_untruncated():
"""The full patch is returned, with no per-file line cap.

Guards the original bug: get_pr_files used to truncate each file's patch to
50 lines, hiding the tail. Oversized results are now managed by the agent's
ContextOffloader plugin rather than truncated here.
"""
big_patch = "\n".join(f"+line {i}" for i in range(120))
files = [{
"filename": "a.py", "status": "modified",
"additions": 120, "deletions": 0, "changes": 120, "patch": big_patch,
}]

with patch.object(github_tools, "_github_get_all_pages", lambda *a, **k: files):
out = github_tools.get_pr_files(1, repo="o/r")

assert "truncated" not in out
assert "line 0" in out
assert "line 119" in out # tail is present, not cut at 50


def test_get_pr_files_inlines_every_changed_file():
"""All changed files are inlined in full, in API order."""
files = [
{"filename": "small.py", "status": "modified", "additions": 5,
"deletions": 0, "changes": 5, "patch": "\n".join(f"+s{i}" for i in range(5))},
{"filename": "big.py", "status": "modified", "additions": 5000,
"deletions": 0, "changes": 5000, "patch": "\n".join(f"+b{i}" for i in range(5000))},
]

with patch.object(github_tools, "_github_get_all_pages", lambda *a, **k: files):
out = github_tools.get_pr_files(1, repo="o/r")

assert "small.py" in out and "+s4" in out
assert "big.py" in out and "+b4999" in out # full tail of the large file present
assert "omitted" not in out.lower()


def test_get_pr_files_handles_binary_file():
"""Files without a patch (binary) are reported, not crashed on."""
files = [{
"filename": "logo.png", "status": "added",
"additions": 0, "deletions": 0, "changes": 0,
}]

with patch.object(github_tools, "_github_get_all_pages", lambda *a, **k: files):
out = github_tools.get_pr_files(1, repo="o/r")

assert "logo.png" in out
assert "Binary file or no diff available" in out


def test_get_pr_files_returns_error_string_on_request_failure():
"""A request error is surfaced as-is rather than formatted as a file list."""
with patch.object(github_tools, "_github_get_all_pages", lambda *a, **k: "Error: boom"):
out = github_tools.get_pr_files(1, repo="o/r")

assert out == "Error: boom"


class _FakeResponse:
"""Minimal requests.Response stand-in for pagination tests."""

def __init__(self, payload, next_url=None):
self._payload = payload
self.links = {"next": {"url": next_url}} if next_url else {}

def raise_for_status(self):
pass

def json(self):
return self._payload


def test_get_pr_files_follows_pagination_across_pages(monkeypatch):
"""All pages of a PR's files are fetched, not just the first.

Guards against silently reviewing only the first ~30/100 changed files on
large PRs. The first page advertises a Link rel="next"; the helper must
follow it and concatenate every page.
"""
monkeypatch.setenv("GITHUB_TOKEN", "t")
page1 = [{"filename": f"p1_{i}.py", "status": "modified", "additions": 1,
"deletions": 0, "changes": 1, "patch": f"+a{i}"} for i in range(100)]
page2 = [{"filename": "p2_final.py", "status": "modified", "additions": 1,
"deletions": 0, "changes": 1, "patch": "+final"}]
responses = [
_FakeResponse(page1, next_url="https://api.github.com/next-page"),
_FakeResponse(page2),
]

def fake_get(url, **kwargs):
return responses.pop(0)

monkeypatch.setattr(requests, "get", fake_get)
out = github_tools.get_pr_files(1, repo="o/r")

assert "p1_0.py" in out
assert "p1_99.py" in out
assert "p2_final.py" in out # file from the second page is present
assert not responses # both pages were consumed


def test_get_pr_files_caps_pagination(monkeypatch):
"""An endpoint that always advertises a next page is bounded, not infinite."""
monkeypatch.setenv("GITHUB_TOKEN", "t")

def fake_get(url, **kwargs):
return _FakeResponse(
[{"filename": "f.py", "status": "modified", "additions": 1,
"deletions": 0, "changes": 1, "patch": "+x"}],
next_url="https://api.github.com/always-next",
)

monkeypatch.setattr(requests, "get", fake_get)
# Should terminate (at the page cap) rather than hang.
out = github_tools.get_pr_files(1, repo="o/r")
assert "f.py" in out