Skip to content
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,12 @@ build/
venv/
env/

# IDE configurations
.vscode/
.idea/
*.sublime-project
*.sublime-workspace

# Test / tooling caches
.pytest_cache/
.mypy_cache/
Expand Down
2 changes: 1 addition & 1 deletion interface/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ class ExperimentConfig:

prompting: Literal["minimal", "standard", "verbose"] = "standard"
observation: Literal["text_only", "image_text", "image_only"] = "image_text"
context_window: Literal["current", "last3"] = "last3"
context_window: Literal["current", "last3"] = "current"
querying: Literal["step_by_step", "subgoal", "full_trajectory"] = "step_by_step"
chat_history: Literal["stateless", "rolling", "full"] = "stateless"
chat_turns_max: int = 3
Expand Down
30 changes: 22 additions & 8 deletions interface/coords.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from gridworld.backends.base import GridState
from gridworld.task_spec import Position, TaskSpecification
from prompting_experiments.prompt_templates import observation as observation_templates

FACING_ORDER = ["NORTH", "EAST", "SOUTH", "WEST"]

Expand Down Expand Up @@ -126,29 +127,42 @@ def describe_cell(
cols: int,
) -> str:
if row < 1 or row > rows or col < 1 or col > cols:
return "out of bounds"
return observation_templates.CELL_OUT_OF_BOUNDS
if (row, col) in walls:
return "wall"
return observation_templates.CELL_WALL
if (row, col) == goal:
return f"GOAL ({row},{col})"
return observation_templates.CELL_GOAL.format(row=row, col=col)

key_color = key_at_cell(task_spec, state, row, col)
if key_color:
return f"{key_color} key ({row},{col})"
return observation_templates.CELL_KEY.format(
key_color=key_color,
row=row,
col=col,
)

for door in task_spec.mechanisms.doors:
if to_row_col(door.position) == (row, col):
status = "open" if door.id in state.open_doors else door.initial_state
return f"{status} {door.requires_key} door ({row},{col})"
return observation_templates.CELL_DOOR.format(
status=status,
requires_key=door.requires_key,
row=row,
col=col,
)

for gate in task_spec.mechanisms.gates:
if to_row_col(gate.position) == (row, col):
cur = "open" if gate.id in state.open_gates else gate.initial_state
return f"{cur} gate ({row},{col})"
return observation_templates.CELL_GATE.format(state=cur, row=row, col=col)

for switch in task_spec.mechanisms.switches:
if to_row_col(switch.position) == (row, col):
on_off = "on" if switch.id in state.active_switches else switch.initial_state
return f"switch ({on_off}) ({row},{col})"
return observation_templates.CELL_SWITCH.format(
state=on_off,
row=row,
col=col,
)

return f"open ({row},{col})"
return observation_templates.CELL_OPEN.format(row=row, col=col)
90 changes: 53 additions & 37 deletions interface/feedback.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
switch_at_cell,
switches_controlling_gate,
)
from prompting_experiments.prompt_templates import feedback as feedback_templates


def infer_step_outcome(
Expand All @@ -35,13 +36,17 @@ def infer_step_outcome(
door = next((d for d in task_spec.mechanisms.doors if d.id == door_id), None)
color = door.requires_key if door else "matching"
if action == "MOVE_FORWARD" and prev_pos != curr_pos:
return "OPENED", f"Opened {color} door {door_id} and moved to {curr_pos}."
return "OPENED", f"Opened {color} door {door_id}."
return "OPENED", feedback_templates.OPENED_AND_MOVED.format(
color=color,
door_id=door_id,
position=curr_pos,
)
return "OPENED", feedback_templates.OPENED_DOOR.format(color=color, door_id=door_id)

if action in ("TURN_LEFT", "TURN_RIGHT"):
if prev.agent_direction != curr.agent_direction:
return "TURNED", f"Now facing {agent_facing(curr)}."
return "NOTHING", f"{action} had no effect."
return "TURNED", feedback_templates.NOW_FACING.format(facing=agent_facing(curr))
return "NOTHING", feedback_templates.ACTION_NO_EFFECT.format(action=action)

if action == "MOVE_FORWARD":
if prev_pos == curr_pos:
Expand All @@ -50,9 +55,10 @@ def infer_step_outcome(
if key_color:
return (
"BLOCKED",
f"MOVE_FORWARD blocked by a {key_color} key at {fwd}. "
"Keys occupy their cell; you cannot walk onto them. "
"Face the key and use PICKUP from your current cell.",
feedback_templates.MOVE_BLOCKED_BY_KEY.format(
key_color=key_color,
position=fwd,
),
)
gate = gate_at_cell(task_spec, prev, fwd[0], fwd[1])
if gate and not gate["open"]:
Expand All @@ -61,33 +67,39 @@ def infer_step_outcome(
switch_list = ", ".join(controllers)
return (
"BLOCKED",
f"MOVE_FORWARD blocked by closed gate {gate['id']} at {fwd}. "
f"Activate switch(es) {switch_list} to open it.",
feedback_templates.MOVE_BLOCKED_BY_GATE_WITH_SWITCHES.format(
gate_id=gate["id"],
position=fwd,
switches=switch_list,
),
)
return (
"BLOCKED",
f"MOVE_FORWARD blocked by closed gate {gate['id']} at {fwd}.",
feedback_templates.MOVE_BLOCKED_BY_GATE.format(
gate_id=gate["id"],
position=fwd,
),
)
return "BLOCKED", "MOVE_FORWARD blocked by wall or closed door/gate."
return "BLOCKED", feedback_templates.MOVE_BLOCKED_GENERIC
if terminated and reward > 0 and curr_pos == goal:
return "DONE", f"Reached goal at {goal}."
return "MOVED", f"Moved to {curr_pos}."
return "DONE", feedback_templates.REACHED_GOAL.format(goal=goal)
return "MOVED", feedback_templates.MOVED_TO.format(position=curr_pos)

if action == "PICKUP":
if (
prev.agent_carrying != curr.agent_carrying
or len(curr.collected_keys) > len(prev.collected_keys)
):
carried = curr.agent_carrying or "a"
return "PICKUP", f"Picked up {carried} key."
return "NOTHING", "Nothing to pick up here."
return "PICKUP", feedback_templates.PICKED_UP_KEY.format(key_color=carried)
return "NOTHING", feedback_templates.NOTHING_TO_PICK_UP

if action == "TOGGLE":
if (
prev.active_switches != curr.active_switches
or prev.open_gates != curr.open_gates
):
return "TOGGLED", "Toggled switch or gate state changed."
return "TOGGLED", feedback_templates.TOGGLED_STATE_CHANGED
fwd = forward_cell(prev)
switch_ahead = switch_at_cell(task_spec, fwd[0], fwd[1])
switch_here = switch_at_cell(task_spec, prev_pos[0], prev_pos[1])
Expand All @@ -96,34 +108,34 @@ def infer_step_outcome(
if switch_ahead["switch_type"] == "hold":
return (
"NOTHING",
f"TOGGLE had no effect. MOVE_FORWARD onto the switch at {fwd} "
"(hold switches activate while you stand on them).",
feedback_templates.TOGGLE_HOLD_SWITCH_HINT.format(position=fwd),
)
return (
"NOTHING",
f"TOGGLE had no effect. MOVE_FORWARD onto the switch at {fwd}, then TOGGLE.",
feedback_templates.TOGGLE_SWITCH_HINT.format(position=fwd),
)
if gate_ahead and not gate_ahead["open"]:
controllers = switches_controlling_gate(task_spec, str(gate_ahead["id"]))
if controllers:
switch_list = ", ".join(controllers)
return (
"NOTHING",
"Gates cannot be toggled directly. "
f"Activate switch(es) {switch_list} instead.",
feedback_templates.GATE_TOGGLE_WITH_SWITCHES.format(
switches=switch_list,
),
)
return "NOTHING", "Gates cannot be toggled directly. Activate a linked switch instead."
return "NOTHING", feedback_templates.GATE_TOGGLE_GENERIC
return (
"NOTHING",
"TOGGLE had no effect. Stand on a switch and TOGGLE, or use PICKUP/keys for doors.",
feedback_templates.TOGGLE_NO_EFFECT,
)

if action == "DONE":
if terminated and reward > 0 and curr_pos == goal:
return "DONE", f"Task complete at {goal}."
return "WRONG_DONE", f"DONE called but not at goal {goal}."
return "DONE", feedback_templates.TASK_COMPLETE.format(goal=goal)
return "WRONG_DONE", feedback_templates.WRONG_DONE.format(goal=goal)

return "INVALID", f"Unknown or unsupported action {action}."
return "INVALID", feedback_templates.UNKNOWN_ACTION.format(action=action)


def format_step_feedback(
Expand All @@ -139,23 +151,27 @@ def format_step_feedback(
)
prev_pos = agent_row_col(prev)
if event_type == "BLOCKED":
return f"BLOCKED — {action}: {event_message} You remain at {prev_pos}.", event_type
return feedback_templates.BLOCKED_FEEDBACK.format(action=action, message=event_message, position=prev_pos), event_type
if event_type == "TURNED":
return f"TURNED — {action}: {event_message}", event_type
return feedback_templates.TURNED_FEEDBACK.format(action=action, message=event_message), event_type
if event_type == "MOVED":
return f"MOVED — {action}: {event_message}", event_type
return feedback_templates.MOVED_FEEDBACK.format(action=action, message=event_message), event_type
if event_type == "DONE":
return f"SUCCESS — {action}: {event_message}", event_type
return feedback_templates.SUCCESS_FEEDBACK.format(action=action, message=event_message), event_type
if event_type == "PICKUP":
return f"PICKUP — {action}: {event_message}", event_type
return feedback_templates.PICKUP_FEEDBACK.format(action=action, message=event_message), event_type
if event_type == "NOTHING":
return f"NOTHING — {action}: {event_message} You remain at {prev_pos}.", event_type
return feedback_templates.NOTHING_FEEDBACK.format(action=action, message=event_message, position=prev_pos), event_type
if event_type == "OPENED":
return f"OPENED — {action}: {event_message}", event_type
return feedback_templates.OPENED_FEEDBACK.format(action=action, message=event_message), event_type
if event_type == "TOGGLED":
return f"TOGGLED — {action}: {event_message}", event_type
return feedback_templates.TOGGLED_FEEDBACK.format(action=action, message=event_message), event_type
if event_type == "WRONG_DONE":
return f"WRONG DONE — {action}: {event_message} You remain at {prev_pos}.", event_type
return feedback_templates.WRONG_DONE_FEEDBACK.format(action=action, message=event_message, position=prev_pos), event_type
if event_type == "INVALID":
return f"INVALID — {action}: {event_message} You remain at {prev_pos}.", event_type
return f"{event_type} — {action}: {event_message}", event_type
return feedback_templates.INVALID_FEEDBACK.format(action=action, message=event_message, position=prev_pos), event_type
return feedback_templates.DEFAULT_FEEDBACK.format(
event_type=event_type,
action=action,
message=event_message,
), event_type
25 changes: 19 additions & 6 deletions interface/observation.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from gridworld.task_spec import TaskSpecification

from interface.renderer import render_user_observation_text, rgb_to_image_block
from prompting_experiments.prompt_templates import observation as observation_templates

ObservationMode = Literal["text_only", "image_text", "image_only"]
ContextWindow = Literal["current", "last3"]
Expand Down Expand Up @@ -51,11 +52,17 @@ def history_text(
if not recs:
return ""

lines = ["Recent history (last 3 steps, oldest first):"]
lines = [observation_templates.RECENT_HISTORY_HEADER]
for rec in recs:
row, col = rec["position_after"]
lines.append(
f" ({int(row)}, {int(col)}) facing {rec['facing_after']} -> {rec['action']} -> {rec['prompt_feedback']}"
observation_templates.RECENT_HISTORY_STEP.format(
row=int(row),
col=int(col),
facing=rec["facing_after"],
action=rec["action"],
feedback=rec["prompt_feedback"],
)
)
return "\n".join(lines)

Expand All @@ -78,16 +85,22 @@ def history_content_blocks(
continue
blocks.append(rgb_to_image_block(rgb))
if observation == "image_only":
blocks.append({"type": "text", "text": f"Action: {rec['action']}\n\n"})
blocks.append(
{
"type": "text",
"text": observation_templates.IMAGE_HISTORY_ACTION.format(
action=rec["action"]
),
}
)

if not blocks:
return []

intro = (
"Recent steps (oldest first). Each image is the maze view from which the "
"following action was chosen; infer pose and environment state from the image.\n\n"
observation_templates.IMAGE_ONLY_HISTORY_INTRO
if observation == "image_only"
else "Recent step views (oldest first):\n\n"
else observation_templates.IMAGE_TEXT_HISTORY_INTRO
)
return [{"type": "text", "text": intro}] + blocks

Expand Down
Loading