Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ build/
venv/
env/

# IDE configurations
.vscode/
.idea/
*.sublime-project
*.sublime-workspace

# Test / tooling caches
.pytest_cache/
.mypy_cache/
Expand Down
6 changes: 4 additions & 2 deletions interface/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,10 @@ class ExperimentConfig:
"""Selects one implementation along each experimental axis."""

prompting: Literal["minimal", "standard", "verbose"] = "standard"
observation: Literal["text_only", "image_text", "image_only"] = "image_text"
context_window: Literal["current", "last3"] = "last3"
observation: Literal["text_only", "image_text", "image_only"] = "image_only"
include_current_observation_description: bool = False
observation_text_includes_facing: bool = False
context_window: Literal["current", "last3"] = "current"
querying: Literal["step_by_step", "subgoal", "full_trajectory"] = "step_by_step"
chat_history: Literal["stateless", "rolling", "full"] = "stateless"
chat_turns_max: int = 3
Expand Down
30 changes: 22 additions & 8 deletions interface/coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from gridworld.backends.base import GridState
from gridworld.task_spec import Position, TaskSpecification
from prompting_experiments.prompt_templates import observation as observation_templates

FACING_ORDER = ["NORTH", "EAST", "SOUTH", "WEST"]

Expand Down Expand Up @@ -126,29 +127,42 @@ def describe_cell(
cols: int,
) -> str:
if row < 1 or row > rows or col < 1 or col > cols:
return "out of bounds"
return observation_templates.CELL_OUT_OF_BOUNDS
if (row, col) in walls:
return "wall"
return observation_templates.CELL_WALL
if (row, col) == goal:
return f"GOAL ({row},{col})"
return observation_templates.CELL_GOAL.format(row=row, col=col)

key_color = key_at_cell(task_spec, state, row, col)
if key_color:
return f"{key_color} key ({row},{col})"
return observation_templates.CELL_KEY.format(
key_color=key_color,
row=row,
col=col,
)

for door in task_spec.mechanisms.doors:
if to_row_col(door.position) == (row, col):
status = "open" if door.id in state.open_doors else door.initial_state
return f"{status} {door.requires_key} door ({row},{col})"
return observation_templates.CELL_DOOR.format(
status=status,
requires_key=door.requires_key,
row=row,
col=col,
)

for gate in task_spec.mechanisms.gates:
if to_row_col(gate.position) == (row, col):
cur = "open" if gate.id in state.open_gates else gate.initial_state
return f"{cur} gate ({row},{col})"
return observation_templates.CELL_GATE.format(state=cur, row=row, col=col)

for switch in task_spec.mechanisms.switches:
if to_row_col(switch.position) == (row, col):
on_off = "on" if switch.id in state.active_switches else switch.initial_state
return f"switch ({on_off}) ({row},{col})"
return observation_templates.CELL_SWITCH.format(
state=on_off,
row=row,
col=col,
)

return f"open ({row},{col})"
return observation_templates.CELL_OPEN.format(row=row, col=col)
90 changes: 53 additions & 37 deletions interface/feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
switch_at_cell,
switches_controlling_gate,
)
from prompting_experiments.prompt_templates import feedback as feedback_templates


def infer_step_outcome(
Expand All @@ -35,13 +36,17 @@ def infer_step_outcome(
door = next((d for d in task_spec.mechanisms.doors if d.id == door_id), None)
color = door.requires_key if door else "matching"
if action == "MOVE_FORWARD" and prev_pos != curr_pos:
return "OPENED", f"Opened {color} door {door_id} and moved to {curr_pos}."
return "OPENED", f"Opened {color} door {door_id}."
return "OPENED", feedback_templates.OPENED_AND_MOVED.format(
color=color,
door_id=door_id,
position=curr_pos,
)
return "OPENED", feedback_templates.OPENED_DOOR.format(color=color, door_id=door_id)

if action in ("TURN_LEFT", "TURN_RIGHT"):
if prev.agent_direction != curr.agent_direction:
return "TURNED", f"Now facing {agent_facing(curr)}."
return "NOTHING", f"{action} had no effect."
return "TURNED", feedback_templates.NOW_FACING.format(facing=agent_facing(curr))
return "NOTHING", feedback_templates.ACTION_NO_EFFECT.format(action=action)

if action == "MOVE_FORWARD":
if prev_pos == curr_pos:
Expand All @@ -50,9 +55,10 @@ def infer_step_outcome(
if key_color:
return (
"BLOCKED",
f"MOVE_FORWARD blocked by a {key_color} key at {fwd}. "
"Keys occupy their cell; you cannot walk onto them. "
"Face the key and use PICKUP from your current cell.",
feedback_templates.MOVE_BLOCKED_BY_KEY.format(
key_color=key_color,
position=fwd,
),
)
gate = gate_at_cell(task_spec, prev, fwd[0], fwd[1])
if gate and not gate["open"]:
Expand All @@ -61,33 +67,39 @@ def infer_step_outcome(
switch_list = ", ".join(controllers)
return (
"BLOCKED",
f"MOVE_FORWARD blocked by closed gate {gate['id']} at {fwd}. "
f"Activate switch(es) {switch_list} to open it.",
feedback_templates.MOVE_BLOCKED_BY_GATE_WITH_SWITCHES.format(
gate_id=gate["id"],
position=fwd,
switches=switch_list,
),
)
return (
"BLOCKED",
f"MOVE_FORWARD blocked by closed gate {gate['id']} at {fwd}.",
feedback_templates.MOVE_BLOCKED_BY_GATE.format(
gate_id=gate["id"],
position=fwd,
),
)
return "BLOCKED", "MOVE_FORWARD blocked by wall or closed door/gate."
return "BLOCKED", feedback_templates.MOVE_BLOCKED_GENERIC
if terminated and reward > 0 and curr_pos == goal:
return "DONE", f"Reached goal at {goal}."
return "MOVED", f"Moved to {curr_pos}."
return "DONE", feedback_templates.REACHED_GOAL.format(goal=goal)
return "MOVED", feedback_templates.MOVED_TO.format(position=curr_pos)

if action == "PICKUP":
if (
prev.agent_carrying != curr.agent_carrying
or len(curr.collected_keys) > len(prev.collected_keys)
):
carried = curr.agent_carrying or "a"
return "PICKUP", f"Picked up {carried} key."
return "NOTHING", "Nothing to pick up here."
return "PICKUP", feedback_templates.PICKED_UP_KEY.format(key_color=carried)
return "NOTHING", feedback_templates.NOTHING_TO_PICK_UP

if action == "TOGGLE":
if (
prev.active_switches != curr.active_switches
or prev.open_gates != curr.open_gates
):
return "TOGGLED", "Toggled switch or gate state changed."
return "TOGGLED", feedback_templates.TOGGLED_STATE_CHANGED
fwd = forward_cell(prev)
switch_ahead = switch_at_cell(task_spec, fwd[0], fwd[1])
switch_here = switch_at_cell(task_spec, prev_pos[0], prev_pos[1])
Expand All @@ -96,34 +108,34 @@ def infer_step_outcome(
if switch_ahead["switch_type"] == "hold":
return (
"NOTHING",
f"TOGGLE had no effect. MOVE_FORWARD onto the switch at {fwd} "
"(hold switches activate while you stand on them).",
feedback_templates.TOGGLE_HOLD_SWITCH_HINT.format(position=fwd),
)
return (
"NOTHING",
f"TOGGLE had no effect. MOVE_FORWARD onto the switch at {fwd}, then TOGGLE.",
feedback_templates.TOGGLE_SWITCH_HINT.format(position=fwd),
)
if gate_ahead and not gate_ahead["open"]:
controllers = switches_controlling_gate(task_spec, str(gate_ahead["id"]))
if controllers:
switch_list = ", ".join(controllers)
return (
"NOTHING",
"Gates cannot be toggled directly. "
f"Activate switch(es) {switch_list} instead.",
feedback_templates.GATE_TOGGLE_WITH_SWITCHES.format(
switches=switch_list,
),
)
return "NOTHING", "Gates cannot be toggled directly. Activate a linked switch instead."
return "NOTHING", feedback_templates.GATE_TOGGLE_GENERIC
return (
"NOTHING",
"TOGGLE had no effect. Stand on a switch and TOGGLE, or use PICKUP/keys for doors.",
feedback_templates.TOGGLE_NO_EFFECT,
)

if action == "DONE":
if terminated and reward > 0 and curr_pos == goal:
return "DONE", f"Task complete at {goal}."
return "WRONG_DONE", f"DONE called but not at goal {goal}."
return "DONE", feedback_templates.TASK_COMPLETE.format(goal=goal)
return "WRONG_DONE", feedback_templates.WRONG_DONE.format(goal=goal)

return "INVALID", f"Unknown or unsupported action {action}."
return "INVALID", feedback_templates.UNKNOWN_ACTION.format(action=action)


def format_step_feedback(
Expand All @@ -139,23 +151,27 @@ def format_step_feedback(
)
prev_pos = agent_row_col(prev)
if event_type == "BLOCKED":
return f"BLOCKED — {action}: {event_message} You remain at {prev_pos}.", event_type
return feedback_templates.BLOCKED_FEEDBACK.format(action=action, message=event_message, position=prev_pos), event_type
if event_type == "TURNED":
return f"TURNED — {action}: {event_message}", event_type
return feedback_templates.TURNED_FEEDBACK.format(action=action, message=event_message), event_type
if event_type == "MOVED":
return f"MOVED — {action}: {event_message}", event_type
return feedback_templates.MOVED_FEEDBACK.format(action=action, message=event_message), event_type
if event_type == "DONE":
return f"SUCCESS — {action}: {event_message}", event_type
return feedback_templates.SUCCESS_FEEDBACK.format(action=action, message=event_message), event_type
if event_type == "PICKUP":
return f"PICKUP — {action}: {event_message}", event_type
return feedback_templates.PICKUP_FEEDBACK.format(action=action, message=event_message), event_type
if event_type == "NOTHING":
return f"NOTHING — {action}: {event_message} You remain at {prev_pos}.", event_type
return feedback_templates.NOTHING_FEEDBACK.format(action=action, message=event_message, position=prev_pos), event_type
if event_type == "OPENED":
return f"OPENED — {action}: {event_message}", event_type
return feedback_templates.OPENED_FEEDBACK.format(action=action, message=event_message), event_type
if event_type == "TOGGLED":
return f"TOGGLED — {action}: {event_message}", event_type
return feedback_templates.TOGGLED_FEEDBACK.format(action=action, message=event_message), event_type
if event_type == "WRONG_DONE":
return f"WRONG DONE — {action}: {event_message} You remain at {prev_pos}.", event_type
return feedback_templates.WRONG_DONE_FEEDBACK.format(action=action, message=event_message, position=prev_pos), event_type
if event_type == "INVALID":
return f"INVALID — {action}: {event_message} You remain at {prev_pos}.", event_type
return f"{event_type} — {action}: {event_message}", event_type
return feedback_templates.INVALID_FEEDBACK.format(action=action, message=event_message, position=prev_pos), event_type
return feedback_templates.DEFAULT_FEEDBACK.format(
event_type=event_type,
action=action,
message=event_message,
), event_type
59 changes: 49 additions & 10 deletions interface/observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
History when ``context_window == "last3"`` (last 3 executed steps, oldest first):

* **text_only** — full text history only (position, facing, action, feedback).
* **image_only** — prior decision-frame PNGs + ``Action: …`` labels (no text history).
* **image_only** — prior decision-frame PNGs + inventory/action labels (no text history).
* **image_text** — full text history **and** prior decision-frame PNGs.

History is derived from enriched ``transcript`` step records.
Expand All @@ -18,7 +18,12 @@
from gridworld.backends.base import GridState
from gridworld.task_spec import TaskSpecification

from interface.renderer import render_user_observation_text, rgb_to_image_block
from interface.renderer import (
render_current_inventory_text,
render_user_observation_text,
rgb_to_image_block,
)
from prompting_experiments.prompt_templates import observation as observation_templates

ObservationMode = Literal["text_only", "image_text", "image_only"]
ContextWindow = Literal["current", "last3"]
Expand Down Expand Up @@ -51,11 +56,17 @@ def history_text(
if not recs:
return ""

lines = ["Recent history (last 3 steps, oldest first):"]
lines = [observation_templates.RECENT_HISTORY_HEADER]
for rec in recs:
row, col = rec["position_after"]
lines.append(
f" ({int(row)}, {int(col)}) facing {rec['facing_after']} -> {rec['action']} -> {rec['prompt_feedback']}"
observation_templates.RECENT_HISTORY_STEP.format(
row=int(row),
col=int(col),
facing=rec["facing_after"],
action=rec["action"],
feedback=rec["prompt_feedback"],
)
)
return "\n".join(lines)

Expand All @@ -77,17 +88,24 @@ def history_content_blocks(
if rgb is None:
continue
blocks.append(rgb_to_image_block(rgb))
if observation == "image_only":
blocks.append({"type": "text", "text": f"Action: {rec['action']}\n\n"})
inventory = _history_record_inventory(rec)
text = (
observation_templates.IMAGE_HISTORY_INVENTORY_ACTION.format(
inventory=inventory,
action=rec["action"],
)
if observation == "image_only"
else observation_templates.IMAGE_HISTORY_INVENTORY.format(inventory=inventory)
)
blocks.append({"type": "text", "text": text})

if not blocks:
return []

intro = (
"Recent steps (oldest first). Each image is the maze view from which the "
"following action was chosen; infer pose and environment state from the image.\n\n"
observation_templates.IMAGE_ONLY_HISTORY_INTRO
if observation == "image_only"
else "Recent step views (oldest first):\n\n"
else observation_templates.IMAGE_TEXT_HISTORY_INTRO
)
return [{"type": "text", "text": intro}] + blocks

Expand All @@ -96,13 +114,34 @@ def current_observation_text(
observation: ObservationMode,
task_spec: TaskSpecification,
state: GridState,
*,
include_description: bool = False,
include_facing: bool = False,
) -> str:
if observation == "image_only":
return render_current_inventory_text(state)
if not include_description:
return ""
return render_user_observation_text(task_spec, state)
return render_user_observation_text(task_spec, state, include_facing=include_facing)


def current_image_blocks(observation: ObservationMode, rgb: np.ndarray | None) -> list[dict]:
if observation == "text_only" or rgb is None:
return []
return [rgb_to_image_block(rgb)]


def _history_record_inventory(rec: dict[str, Any]) -> str:
state_before = rec.get("state_before")
if isinstance(state_before, dict):
inventory = state_before.get("inventory")
if isinstance(inventory, list):
return ", ".join(str(item) for item in inventory) or "empty"

state_after = rec.get("state_after")
if isinstance(state_after, dict):
inventory = state_after.get("inventory")
if isinstance(inventory, list):
return ", ".join(str(item) for item in inventory) or "empty"

return "unknown"
Loading