From e62a13ab723f503eba90b7d54abf0d9ac7684281 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Mon, 13 Apr 2026 16:37:36 -0700
Subject: [PATCH 01/10] feat: add tool calling support to m serve

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 cli/serve/app.py                              |  43 ++-
 cli/serve/models.py                           |  26 ++
 docs/examples/m_serve/client_tool_calling.py  | 208 +++++++++++++
 .../m_serve/m_serve_example_tool_calling.py   | 177 +++++++++++
 test/cli/test_serve.py                        |  16 +-
 test/cli/test_serve_tool_calling.py           | 290 ++++++++++++++++++
 6 files changed, 750 insertions(+), 10 deletions(-)
 create mode 100644 docs/examples/m_serve/client_tool_calling.py
 create mode 100644 docs/examples/m_serve/m_serve_example_tool_calling.py
 create mode 100644 test/cli/test_serve_tool_calling.py

diff --git a/cli/serve/app.py b/cli/serve/app.py
index 583b28c01..3307978c5 100644
--- a/cli/serve/app.py
+++ b/cli/serve/app.py
@@ -3,10 +3,12 @@
 import asyncio
 import importlib.util
 import inspect
+import json
 import os
 import sys
 import time
 import uuid
+from typing import Literal
 
 try:
     import typer
@@ -26,10 +28,12 @@
 from .models import (
     ChatCompletion,
     ChatCompletionMessage,
+    ChatCompletionMessageToolCall,
     ChatCompletionRequest,
     Choice,
     OpenAIError,
     OpenAIErrorResponse,
+    ToolCallFunction,
 )
 from .streaming import stream_chat_completion_chunks
 
@@ -111,14 +115,14 @@ def _build_model_options(request: ChatCompletionRequest) -> dict:
         "response_format",  # Response format (json_object) - not yet implemented
         "functions",  # Legacy function calling - not yet implemented
         "function_call",  # Legacy function calling - not yet implemented
-        "tools",  # Tool calling - not yet implemented
-        "tool_choice",  # Tool choice - not yet implemented
+        # Tool choice is passed through as-is (not a ModelOption sentinel)
     }
     openai_to_model_option = {
         "temperature": ModelOption.TEMPERATURE,
         "max_tokens": ModelOption.MAX_NEW_TOKENS,
         "seed": ModelOption.SEED,
         "stream": ModelOption.STREAM,
+        "tools": ModelOption.TOOLS,
     }
 
     # Get all non-None fields
@@ -171,6 +175,35 @@ async def endpoint(request: ChatCompletionRequest):
                     model_options=model_options,
                 )
 
+            # Extract tool calls from the ModelOutputThunk if available
+            tool_calls = None
+            finish_reason: Literal[
+                "stop", "length", "content_filter", "tool_calls", "function_call"
+            ] = "stop"
+            if (
+                hasattr(output, "tool_calls")
+                and output.tool_calls is not None
+                and isinstance(output.tool_calls, dict)
+            ):
+                tool_calls = []
+                for tool_name, model_tool_call in output.tool_calls.items():
+                    # Generate a unique ID for this tool call
+                    tool_call_id = f"call_{uuid.uuid4().hex[:24]}"
+
+                    # Serialize the arguments to JSON string
+                    args_json = json.dumps(model_tool_call.args)
+
+                    tool_calls.append(
+                        ChatCompletionMessageToolCall(
+                            id=tool_call_id,
+                            type="function",
+                            function=ToolCallFunction(
+                                name=model_tool_call.name, arguments=args_json
+                            ),
+                        )
+                    )
+                finish_reason = "tool_calls"
+
             # system_fingerprint represents backend config hash, not model name
             # The model name is already in response.model (line 73)
             # Leave as None since we don't track backend config fingerprints yet
@@ -198,9 +231,11 @@ async def endpoint(request: ChatCompletionRequest):
                     Choice(
                         index=0,
                         message=ChatCompletionMessage(
-                            content=output.value, role="assistant"
+                            content=output.value,
+                            role="assistant",
+                            tool_calls=tool_calls,
                         ),
-                        finish_reason="stop",
+                        finish_reason=finish_reason,
                     )
                 ],
                 object="chat.completion",  # type: ignore
diff --git a/cli/serve/models.py b/cli/serve/models.py
index 7e738730e..ba0bd8cca 100644
--- a/cli/serve/models.py
+++ b/cli/serve/models.py
@@ -80,6 +80,29 @@ class ChatCompletionRequest(BaseModel):
     extra: dict[str, Any] = Field(default_factory=dict)
 
 
+class ToolCallFunction(BaseModel):
+    """Function details for a tool call."""
+
+    name: str
+    """The name of the function to call."""
+
+    arguments: str
+    """The arguments to call the function with, as a JSON string."""
+
+
+class ChatCompletionMessageToolCall(BaseModel):
+    """A tool call generated by the model."""
+
+    id: str
+    """The ID of the tool call."""
+
+    type: Literal["function"]
+    """The type of the tool. Currently, only 'function' is supported."""
+
+    function: ToolCallFunction
+    """The function that the model called."""
+
+
 # Taking this from OpenAI types https://github.com/openai/openai-python/blob/main/src/openai/types/chat/chat_completion.py,
 class ChatCompletionMessage(BaseModel):
     content: str | None = None
@@ -91,6 +114,9 @@ class ChatCompletionMessage(BaseModel):
     role: Literal["assistant"]
     """The role of the author of this message."""
 
+    tool_calls: list[ChatCompletionMessageToolCall] | None = None
+    """The tool calls generated by the model, such as function calls."""
+
 
 class Choice(BaseModel):
     index: int
diff --git a/docs/examples/m_serve/client_tool_calling.py b/docs/examples/m_serve/client_tool_calling.py
new file mode 100644
index 000000000..fca522c76
--- /dev/null
+++ b/docs/examples/m_serve/client_tool_calling.py
@@ -0,0 +1,208 @@
+"""Client example for testing tool calling with m serve.
+
+This script demonstrates how to interact with an m serve server
+that supports tool calling using the OpenAI-compatible API.
+
+Usage:
+    1. Start the server:
+       uv run m serve docs/examples/m_serve/m_serve_example_tool_calling.py
+
+    2. Run this client:
+       uv run python docs/examples/m_serve/client_tool_calling.py
+"""
+
+import json
+
+import requests
+
+# Server configuration
+BASE_URL = "http://localhost:8080"
+ENDPOINT = f"{BASE_URL}/v1/chat/completions"
+
+# Define tools in OpenAI format
+tools = [
+    {
+        "type": "function",
+        "function": {
+            "name": "get_weather",
+            "description": "Get the current weather in a given location",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "location": {
+                        "type": "string",
+                        "description": "The city name, e.g. San Francisco",
+                    },
+                    "units": {
+                        "type": "string",
+                        "enum": ["celsius", "fahrenheit"],
+                        "description": "Temperature units",
+                    },
+                },
+                "required": ["location"],
+            },
+        },
+    },
+    {
+        "type": "function",
+        "function": {
+            "name": "calculator",
+            "description": "Evaluate a mathematical expression",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "expression": {
+                        "type": "string",
+                        "description": "The mathematical expression to evaluate",
+                    }
+                },
+                "required": ["expression"],
+            },
+        },
+    },
+]
+
+
+def make_request(messages: list[dict], tools: list[dict] | None = None) -> dict:
+    """Make a request to the m serve API.
+
+    Args:
+        messages: List of message dictionaries
+        tools: Optional list of tool definitions
+
+    Returns:
+        Response dictionary from the API
+    """
+    payload = {
+        "model": "gpt-3.5-turbo",  # Model name (not used by m serve)
+        "messages": messages,
+        "temperature": 0.7,
+    }
+
+    if tools:
+        payload["tools"] = tools
+        payload["tool_choice"] = "auto"
+
+    response = requests.post(ENDPOINT, json=payload, timeout=30)
+    response.raise_for_status()
+    return response.json()
+
+
+def main():
+    """Run example tool calling interactions."""
+    print("=" * 60)
+    print("Tool Calling Example with m serve")
+    print("=" * 60)
+
+    # Example 1: Request that should trigger weather tool
+    print("\n1. Weather Query")
+    print("-" * 60)
+    messages = [{"role": "user", "content": "What's the weather like in Tokyo?"}]
+
+    print(f"User: {messages[0]['content']}")
+    response = make_request(messages, tools=tools)
+
+    choice = response["choices"][0]
+    print(f"\nFinish Reason: {choice['finish_reason']}")
+
+    if choice.get("message", {}).get("tool_calls"):
+        print("\nTool Calls:")
+        for tool_call in choice["message"]["tool_calls"]:
+            func = tool_call["function"]
+            args = json.loads(func["arguments"])
+            print(f"  - {func['name']}({json.dumps(args)})")
+    else:
+        print(f"Assistant: {choice['message']['content']}")
+
+    # Example 2: Request that should trigger calculator tool
+    print("\n\n2. Math Query")
+    print("-" * 60)
+    messages = [{"role": "user", "content": "What is 15 * 23 + 7?"}]
+
+    print(f"User: {messages[0]['content']}")
+    response = make_request(messages, tools=tools)
+
+    choice = response["choices"][0]
+    print(f"\nFinish Reason: {choice['finish_reason']}")
+
+    if choice.get("message", {}).get("tool_calls"):
+        print("\nTool Calls:")
+        for tool_call in choice["message"]["tool_calls"]:
+            func = tool_call["function"]
+            args = json.loads(func["arguments"])
+            print(f"  - {func['name']}({json.dumps(args)})")
+    else:
+        print(f"Assistant: {choice['message']['content']}")
+
+    # Example 3: Request without tools (normal chat)
+    print("\n\n3. Normal Chat (No Tools)")
+    print("-" * 60)
+    messages = [{"role": "user", "content": "Hello! How are you?"}]
+
+    print(f"User: {messages[0]['content']}")
+    response = make_request(messages, tools=None)
+
+    choice = response["choices"][0]
+    print(f"\nFinish Reason: {choice['finish_reason']}")
+    print(f"Assistant: {choice['message']['content']}")
+
+    # Example 4: Multi-turn conversation with tool use
+    print("\n\n4. Multi-turn Conversation")
+    print("-" * 60)
+    messages = [{"role": "user", "content": "What's the weather in Paris?"}]
+
+    print(f"User: {messages[0]['content']}")
+    response = make_request(messages, tools=tools)
+
+    choice = response["choices"][0]
+    assistant_message = choice["message"]
+
+    if assistant_message.get("tool_calls"):
+        print("\nAssistant requested tool calls:")
+        for tool_call in assistant_message["tool_calls"]:
+            func = tool_call["function"]
+            args = json.loads(func["arguments"])
+            print(f"  - {func['name']}({json.dumps(args)})")
+
+            # Simulate tool execution
+            if func["name"] == "get_weather":
+                tool_result = f"The weather in {args['location']} is sunny and 22°C"
+            else:
+                tool_result = "Tool result"
+
+            # Add tool response to conversation
+            messages.append(
+                {
+                    "role": "assistant",
+                    "content": assistant_message.get("content"),
+                    "tool_calls": assistant_message["tool_calls"],
+                }
+            )
+            messages.append(
+                {
+                    "role": "tool",
+                    "tool_call_id": tool_call["id"],
+                    "content": tool_result,
+                }
+            )
+
+        # Get final response after tool execution
+        print("\nGetting final response after tool execution...")
+        response = make_request(messages, tools=tools)
+        choice = response["choices"][0]
+        print(f"Assistant: {choice['message']['content']}")
+
+    print("\n" + "=" * 60)
+    print("Examples completed!")
+    print("=" * 60)
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except requests.exceptions.ConnectionError:
+        print("Error: Could not connect to server.")
+        print("Make sure the server is running:")
+        print("  uv run m serve docs/examples/m_serve/m_serve_example_tool_calling.py")
+    except Exception as e:
+        print(f"Error: {e}")
diff --git a/docs/examples/m_serve/m_serve_example_tool_calling.py b/docs/examples/m_serve/m_serve_example_tool_calling.py
new file mode 100644
index 000000000..e7dbedd29
--- /dev/null
+++ b/docs/examples/m_serve/m_serve_example_tool_calling.py
@@ -0,0 +1,177 @@
+# pytest: ollama, e2e
+
+"""Example demonstrating tool calling with m serve.
+
+This example shows how to use the OpenAI-compatible tool calling API
+with m serve. The server will accept tool definitions and return tool
+calls in the response when the model decides to use them.
+"""
+
+from typing import Any
+
+import mellea
+from cli.serve.models import ChatMessage
+from mellea.core import ModelOutputThunk, Requirement
+from mellea.core.base import AbstractMelleaTool
+from mellea.stdlib.context import ChatContext
+
+session = mellea.start_session(ctx=ChatContext())
+
+
+class GetWeatherTool(AbstractMelleaTool):
+    """Tool for getting weather information."""
+
+    name = "get_weather"
+
+    def run(self, location: str, units: str = "celsius") -> str:
+        """Get the current weather for a location.
+
+        Args:
+            location: The city name
+            units: Temperature units (celsius or fahrenheit)
+
+        Returns:
+            Weather information as a string
+        """
+        # In a real implementation, this would call a weather API
+        return f"The weather in {location} is sunny and 22°{units[0].upper()}"
+
+    @property
+    def as_json_tool(self) -> dict[str, Any]:
+        """Return JSON schema for this tool."""
+        return {
+            "type": "function",
+            "function": {
+                "name": self.name,
+                "description": "Get the current weather in a given location",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city name, e.g. San Francisco",
+                        },
+                        "units": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                            "description": "Temperature units",
+                        },
+                    },
+                    "required": ["location"],
+                },
+            },
+        }
+
+
+class CalculatorTool(AbstractMelleaTool):
+    """Tool for performing calculations."""
+
+    name = "calculator"
+
+    def run(self, expression: str) -> str:
+        """Evaluate a mathematical expression.
+
+        Args:
+            expression: A mathematical expression to evaluate
+
+        Returns:
+            The result of the calculation
+        """
+        try:
+            # In a real implementation, use a safe expression evaluator
+            result = eval(expression)  # noqa: S307
+            return f"The result is {result}"
+        except Exception as e:
+            return f"Error evaluating expression: {e}"
+
+    @property
+    def as_json_tool(self) -> dict[str, Any]:
+        """Return JSON schema for this tool."""
+        return {
+            "type": "function",
+            "function": {
+                "name": self.name,
+                "description": "Evaluate a mathematical expression",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "expression": {
+                            "type": "string",
+                            "description": "The mathematical expression to evaluate, e.g. '2 + 2'",
+                        }
+                    },
+                    "required": ["expression"],
+                },
+            },
+        }
+
+
+# Create tool instances
+weather_tool = GetWeatherTool()
+calculator_tool = CalculatorTool()
+
+# Map tool names to instances for easy lookup
+TOOLS = {weather_tool.name: weather_tool, calculator_tool.name: calculator_tool}
+
+
+def serve(
+    input: list[ChatMessage],
+    requirements: list[str] | None = None,
+    model_options: None | dict = None,
+) -> ModelOutputThunk:
+    """Serve function that handles tool calling.
+
+    This function demonstrates how to use tools with m serve. The tools
+    are passed via model_options and the model can request to call them.
+
+    Args:
+        input: List of chat messages
+        requirements: Optional list of requirement strings
+        model_options: Model options including tools and tool_choice
+
+    Returns:
+        ModelOutputThunk with potential tool calls
+    """
+    requirements = requirements if requirements else []
+    message = input[-1].content
+
+    # Extract tools from model_options if provided
+    tools = None
+    if model_options and "@@@tools@@@" in model_options:
+        # Convert OpenAI tool format to Mellea tool format
+        openai_tools = model_options["@@@tools@@@"]
+        tools = {}
+        for tool_def in openai_tools:
+            tool_name = tool_def["function"]["name"]
+            if tool_name in TOOLS:
+                tools[tool_name] = TOOLS[tool_name]
+
+    # Build model options with tools
+    final_model_options = model_options or {}
+    if tools:
+        final_model_options["@@@tools@@@"] = tools
+
+    # Use instruct to generate response with potential tool calls
+    result = session.instruct(
+        description=message,  # type: ignore
+        requirements=[Requirement(req) for req in requirements],  # type: ignore
+        model_options=final_model_options,
+    )
+
+    return result
+
+
+if __name__ == "__main__":
+    # Example usage (for testing purposes)
+    test_messages = [ChatMessage(role="user", content="What's the weather in Paris?")]
+
+    # Simulate tool definitions being passed
+    test_model_options = {
+        "@@@tools@@@": [weather_tool.as_json_tool, calculator_tool.as_json_tool]
+    }
+
+    response = serve(input=test_messages, model_options=test_model_options)
+
+    print(f"Response: {response.value}")
+    if response.tool_calls:
+        print(f"Tool calls requested: {list(response.tool_calls.keys())}")
diff --git a/test/cli/test_serve.py b/test/cli/test_serve.py
index 515cc82f2..9b2b35dba 100644
--- a/test/cli/test_serve.py
+++ b/test/cli/test_serve.py
@@ -455,18 +455,19 @@ async def test_unsupported_params_excluded_from_model_options(self, mock_module)
         assert "logit_bias" not in model_options
 
     @pytest.mark.asyncio
-    async def test_tool_params_excluded_from_model_options(self, mock_module):
-        """Test that tool-related parameters are excluded from model_options."""
+    async def test_tool_params_passed_to_model_options(self, mock_module):
+        """Test that tool-related parameters are passed to model_options."""
         from cli.serve.models import (
             FunctionDefinition,
             FunctionParameters,
             ToolFunction,
         )
+        from mellea.backends.model_options import ModelOption
 
         request = ChatCompletionRequest(
             model="test-model",
             messages=[ChatMessage(role="user", content="Hello")],
-            # Tool-related parameters that should be excluded
+            # Tool-related parameters
             tools=[
                 ToolFunction(
                     type="function",
@@ -502,9 +503,12 @@ async def test_tool_params_excluded_from_model_options(self, mock_module):
         assert call_args is not None
         model_options = call_args.kwargs["model_options"]
 
-        # Tool-related parameters should NOT be in model_options
-        assert "tools" not in model_options
-        assert "tool_choice" not in model_options
+        # Tools should be passed with ModelOption.TOOLS key
+        assert ModelOption.TOOLS in model_options
+        # tool_choice should be passed through as-is
+        assert "tool_choice" in model_options
+        assert model_options["tool_choice"] == "auto"
+        # Legacy function calling parameters should still be excluded
         assert "functions" not in model_options
         assert "function_call" not in model_options
 
diff --git a/test/cli/test_serve_tool_calling.py b/test/cli/test_serve_tool_calling.py
new file mode 100644
index 000000000..ffdb8f3e3
--- /dev/null
+++ b/test/cli/test_serve_tool_calling.py
@@ -0,0 +1,290 @@
+"""Tests for tool calling support in m serve OpenAI-compatible API server."""
+
+import json
+from typing import Any
+from unittest.mock import Mock
+
+import pytest
+
+from cli.serve.app import make_chat_endpoint
+from cli.serve.models import (
+    ChatCompletion,
+    ChatCompletionRequest,
+    ChatMessage,
+    FunctionDefinition,
+    FunctionParameters,
+    ToolFunction,
+)
+from mellea.core.base import AbstractMelleaTool, ModelOutputThunk, ModelToolCall
+
+
+class MockTool(AbstractMelleaTool):
+    """Mock tool for testing."""
+
+    name = "get_weather"
+
+    def run(self, location: str) -> str:
+        """Mock run method."""
+        return f"Weather in {location} is sunny"
+
+    @property
+    def as_json_tool(self) -> dict[str, Any]:
+        """Return JSON schema for this tool."""
+        return {
+            "type": "function",
+            "function": {
+                "name": "get_weather",
+                "description": "Get the current weather",
+                "parameters": {
+                    "type": "object",
+                    "properties": {"location": {"type": "string"}},
+                    "required": ["location"],
+                },
+            },
+        }
+
+
+@pytest.fixture
+def mock_module():
+    """Create a mock module with a serve function."""
+    module = Mock()
+    module.__name__ = "test_module"
+    return module
+
+
+@pytest.fixture
+def sample_tool_request():
+    """Create a sample ChatCompletionRequest with tools."""
+    return ChatCompletionRequest(
+        model="test-model",
+        messages=[ChatMessage(role="user", content="What's the weather in Paris?")],
+        tools=[
+            ToolFunction(
+                type="function",
+                function=FunctionDefinition(
+                    name="get_weather",
+                    description="Get the current weather in a location",
+                    parameters=FunctionParameters(
+                        RootModel={
+                            "type": "object",
+                            "properties": {
+                                "location": {
+                                    "type": "string",
+                                    "description": "The city name",
+                                }
+                            },
+                            "required": ["location"],
+                        }
+                    ),
+                ),
+            )
+        ],
+        tool_choice="auto",
+    )
+
+
+class TestToolCalling:
+    """Tests for tool calling functionality."""
+
+    @pytest.mark.asyncio
+    async def test_tool_calls_in_response(self, mock_module, sample_tool_request):
+        """Test that tool calls are properly formatted in the response."""
+        # Setup mock output with tool calls
+        mock_output = ModelOutputThunk("I'll check the weather for you.")
+        mock_tool = MockTool()
+        mock_output.tool_calls = {
+            "get_weather": ModelToolCall(
+                name="get_weather", func=mock_tool, args={"location": "Paris"}
+            )
+        }
+        mock_module.serve.return_value = mock_output
+
+        # Create endpoint and call it
+        endpoint = make_chat_endpoint(mock_module)
+        response = await endpoint(sample_tool_request)
+
+        # Verify response structure
+        assert isinstance(response, ChatCompletion)
+        assert response.choices[0].finish_reason == "tool_calls"
+        assert response.choices[0].message.tool_calls is not None
+        assert len(response.choices[0].message.tool_calls) == 1
+
+        # Verify tool call details
+        tool_call = response.choices[0].message.tool_calls[0]
+        assert tool_call.type == "function"
+        assert tool_call.function.name == "get_weather"
+
+        # Parse and verify arguments
+        args = json.loads(tool_call.function.arguments)
+        assert args == {"location": "Paris"}
+
+        # Verify tool call ID format
+        assert tool_call.id.startswith("call_")
+        assert len(tool_call.id) > len("call_")
+
+    @pytest.mark.asyncio
+    async def test_multiple_tool_calls(self, mock_module, sample_tool_request):
+        """Test handling multiple tool calls in a single response."""
+        mock_output = ModelOutputThunk("I'll check multiple locations.")
+        mock_tool = MockTool()
+        mock_output.tool_calls = {
+            "get_weather_paris": ModelToolCall(
+                name="get_weather", func=mock_tool, args={"location": "Paris"}
+            ),
+            "get_weather_london": ModelToolCall(
+                name="get_weather", func=mock_tool, args={"location": "London"}
+            ),
+        }
+        mock_module.serve.return_value = mock_output
+
+        endpoint = make_chat_endpoint(mock_module)
+        response = await endpoint(sample_tool_request)
+
+        # Verify multiple tool calls
+        assert response.choices[0].finish_reason == "tool_calls"
+        assert len(response.choices[0].message.tool_calls) == 2
+
+        # Verify each tool call has unique ID
+        ids = [tc.id for tc in response.choices[0].message.tool_calls]
+        assert len(ids) == len(set(ids)), "Tool call IDs should be unique"
+
+    @pytest.mark.asyncio
+    async def test_no_tool_calls_finish_reason_stop(
+        self, mock_module, sample_tool_request
+    ):
+        """Test that finish_reason is 'stop' when no tool calls are made."""
+        mock_output = ModelOutputThunk("The weather is sunny.")
+        # No tool_calls set
+        mock_module.serve.return_value = mock_output
+
+        endpoint = make_chat_endpoint(mock_module)
+        response = await endpoint(sample_tool_request)
+
+        assert response.choices[0].finish_reason == "stop"
+        assert response.choices[0].message.tool_calls is None
+
+    @pytest.mark.asyncio
+    async def test_tools_passed_to_model_options(
+        self, mock_module, sample_tool_request
+    ):
+        """Test that tools are passed to serve function in model_options."""
+        from mellea.backends.model_options import ModelOption
+
+        mock_output = ModelOutputThunk("Test response")
+        mock_module.serve.return_value = mock_output
+
+        endpoint = make_chat_endpoint(mock_module)
+        await endpoint(sample_tool_request)
+
+        # Verify serve was called with tools in model_options
+        call_args = mock_module.serve.call_args
+        assert call_args is not None
+        model_options = call_args.kwargs["model_options"]
+
+        # Tools should be in model_options with the ModelOption.TOOLS key
+        assert ModelOption.TOOLS in model_options
+        assert model_options[ModelOption.TOOLS] is not None
+
+    @pytest.mark.asyncio
+    async def test_tool_choice_passed_to_model_options(
+        self, mock_module, sample_tool_request
+    ):
+        """Test that tool_choice is passed to serve function in model_options."""
+        mock_output = ModelOutputThunk("Test response")
+        mock_module.serve.return_value = mock_output
+
+        endpoint = make_chat_endpoint(mock_module)
+        await endpoint(sample_tool_request)
+
+        # Verify serve was called with tool_choice in model_options
+        call_args = mock_module.serve.call_args
+        assert call_args is not None
+        model_options = call_args.kwargs["model_options"]
+
+        # tool_choice should be passed through as-is
+        assert "tool_choice" in model_options
+        assert model_options["tool_choice"] == "auto"
+
+    @pytest.mark.asyncio
+    async def test_tool_calls_with_complex_arguments(
+        self, mock_module, sample_tool_request
+    ):
+        """Test tool calls with complex nested arguments."""
+        mock_output = ModelOutputThunk("Processing complex request.")
+        mock_tool = MockTool()
+        mock_output.tool_calls = {
+            "complex_tool": ModelToolCall(
+                name="complex_function",
+                func=mock_tool,
+                args={
+                    "location": "Paris",
+                    "options": {
+                        "units": "celsius",
+                        "include_forecast": True,
+                        "days": 5,
+                    },
+                    "tags": ["weather", "forecast"],
+                },
+            )
+        }
+        mock_module.serve.return_value = mock_output
+
+        endpoint = make_chat_endpoint(mock_module)
+        response = await endpoint(sample_tool_request)
+
+        # Verify complex arguments are properly serialized
+        tool_call = response.choices[0].message.tool_calls[0]
+        args = json.loads(tool_call.function.arguments)
+
+        assert args["location"] == "Paris"
+        assert args["options"]["units"] == "celsius"
+        assert args["options"]["include_forecast"] is True
+        assert args["options"]["days"] == 5
+        assert args["tags"] == ["weather", "forecast"]
+
+    @pytest.mark.asyncio
+    async def test_tool_calls_with_usage_info(self, mock_module, sample_tool_request):
+        """Test that usage info is included alongside tool calls."""
+        mock_output = ModelOutputThunk("Calling tool.")
+        mock_tool = MockTool()
+        mock_output.tool_calls = {
+            "get_weather": ModelToolCall(
+                name="get_weather", func=mock_tool, args={"location": "Paris"}
+            )
+        }
+        mock_output.usage = {
+            "prompt_tokens": 50,
+            "completion_tokens": 20,
+            "total_tokens": 70,
+        }
+        mock_module.serve.return_value = mock_output
+
+        endpoint = make_chat_endpoint(mock_module)
+        response = await endpoint(sample_tool_request)
+
+        # Verify both tool calls and usage are present
+        assert response.choices[0].finish_reason == "tool_calls"
+        assert response.choices[0].message.tool_calls is not None
+        assert response.usage is not None
+        assert response.usage.total_tokens == 70
+
+    @pytest.mark.asyncio
+    async def test_request_without_tools(self, mock_module):
+        """Test that requests without tools still work normally."""
+        request = ChatCompletionRequest(
+            model="test-model",
+            messages=[ChatMessage(role="user", content="Hello")],
+            # No tools specified
+        )
+
+        mock_output = ModelOutputThunk("Hello! How can I help?")
+        mock_module.serve.return_value = mock_output
+
+        endpoint = make_chat_endpoint(mock_module)
+        response = await endpoint(request)
+
+        # Should work normally without tool-related fields
+        assert isinstance(response, ChatCompletion)
+        assert response.choices[0].finish_reason == "stop"
+        assert response.choices[0].message.tool_calls is None
+        assert response.choices[0].message.content == "Hello! How can I help?"

From d6840600fe73805839a91cdf864b27dcf0303685 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Tue, 14 Apr 2026 14:08:54 -0700
Subject: [PATCH 02/10] fix: fixed the bug in m serve where
 finish_reason=tool_calls for empty dict

Fixed the bug where an empty tool_calls dict ({}) incorrectly produced finish_reason="tool_calls" with an empty array instead of finish_reason="stop" with tool_calls=None.

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 cli/serve/app.py                    |  1 +
 test/cli/test_serve_tool_calling.py | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/cli/serve/app.py b/cli/serve/app.py
index 3307978c5..20ddad051 100644
--- a/cli/serve/app.py
+++ b/cli/serve/app.py
@@ -184,6 +184,7 @@ async def endpoint(request: ChatCompletionRequest):
                 hasattr(output, "tool_calls")
                 and output.tool_calls is not None
                 and isinstance(output.tool_calls, dict)
+                and output.tool_calls  # Check dict is not empty
             ):
                 tool_calls = []
                 for tool_name, model_tool_call in output.tool_calls.items():
diff --git a/test/cli/test_serve_tool_calling.py b/test/cli/test_serve_tool_calling.py
index ffdb8f3e3..0b31b7837 100644
--- a/test/cli/test_serve_tool_calling.py
+++ b/test/cli/test_serve_tool_calling.py
@@ -163,6 +163,28 @@ async def test_no_tool_calls_finish_reason_stop(
         assert response.choices[0].finish_reason == "stop"
         assert response.choices[0].message.tool_calls is None
 
+    @pytest.mark.asyncio
+    async def test_empty_tool_calls_dict_finish_reason_stop(
+        self, mock_module, sample_tool_request
+    ):
+        """Test that finish_reason is 'stop' when tool_calls is an empty dict.
+
+        Regression test for bug where empty tool_calls dict {} produces
+        finish_reason='tool_calls' with an empty array instead of
+        finish_reason='stop' with tool_calls=None.
+        """
+        mock_output = ModelOutputThunk("Hello! How can I help?")
+        # Set tool_calls to empty dict (the bug case)
+        mock_output.tool_calls = {}
+        mock_module.serve.return_value = mock_output
+
+        endpoint = make_chat_endpoint(mock_module)
+        response = await endpoint(sample_tool_request)
+
+        # Should behave like no tool calls at all
+        assert response.choices[0].finish_reason == "stop"
+        assert response.choices[0].message.tool_calls is None
+
     @pytest.mark.asyncio
     async def test_tools_passed_to_model_options(
         self, mock_module, sample_tool_request

From 7254c9ba788a647c0c5165147310ae3833bd7396 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Tue, 14 Apr 2026 14:12:15 -0700
Subject: [PATCH 03/10] fix: move message add to outside the loop in
 client_tool_calling.py example

Issue: The assistant message was being added inside the loop for each tool call, causing duplication when multiple tool calls were present.
Fix: Moved the assistant message append outside the loop (before processing tool calls), so it's only added once. Now the loop only adds tool responses.

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 docs/examples/m_serve/client_tool_calling.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/docs/examples/m_serve/client_tool_calling.py b/docs/examples/m_serve/client_tool_calling.py
index fca522c76..6ffc3cc24 100644
--- a/docs/examples/m_serve/client_tool_calling.py
+++ b/docs/examples/m_serve/client_tool_calling.py
@@ -159,6 +159,17 @@ def main():
 
     if assistant_message.get("tool_calls"):
         print("\nAssistant requested tool calls:")
+
+        # Add assistant message once before processing tool calls
+        messages.append(
+            {
+                "role": "assistant",
+                "content": assistant_message.get("content"),
+                "tool_calls": assistant_message["tool_calls"],
+            }
+        )
+
+        # Process each tool call and add tool responses
         for tool_call in assistant_message["tool_calls"]:
             func = tool_call["function"]
             args = json.loads(func["arguments"])
@@ -171,13 +182,6 @@ def main():
                 tool_result = "Tool result"
 
             # Add tool response to conversation
-            messages.append(
-                {
-                    "role": "assistant",
-                    "content": assistant_message.get("content"),
-                    "tool_calls": assistant_message["tool_calls"],
-                }
-            )
             messages.append(
                 {
                     "role": "tool",

From 62b57b9ba775077a23266fb273c4fe8fd8fb07bb Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Tue, 14 Apr 2026 14:18:51 -0700
Subject: [PATCH 04/10] fix: cli app.py loop variable tool_name is never used
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The dict key tool_name is never used — the function name comes from model_tool_call.name. Using .values() instead.

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 cli/serve/app.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/cli/serve/app.py b/cli/serve/app.py
index 20ddad051..cb37d93ce 100644
--- a/cli/serve/app.py
+++ b/cli/serve/app.py
@@ -187,7 +187,7 @@ async def endpoint(request: ChatCompletionRequest):
                 and output.tool_calls  # Check dict is not empty
             ):
                 tool_calls = []
-                for tool_name, model_tool_call in output.tool_calls.items():
+                for model_tool_call in output.tool_calls.values():
                     # Generate a unique ID for this tool call
                     tool_call_id = f"call_{uuid.uuid4().hex[:24]}"
 

From 566eeedc9572b67f51219a930adf9150928d37a6 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Tue, 14 Apr 2026 14:34:29 -0700
Subject: [PATCH 05/10] fix: fix test_mot_init_typing() hasattr was always true

Replaced hasattr() with direct __dict__ membership tests to correctly distinguish:

1. Typed instances (ModelOutputThunk[float](...)) - have __orig_class__ in their instance dict
2. Untyped instances (ModelOutputThunk(...)) - do NOT have __orig_class__ in their instance dict

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 test/core/test_component_typing.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/core/test_component_typing.py b/test/core/test_component_typing.py
index bbc3de9ef..5003fbe34 100644
--- a/test/core/test_component_typing.py
+++ b/test/core/test_component_typing.py
@@ -78,16 +78,16 @@ def session(backend) -> MelleaSession:
 
 def test_mot_init_typing():
     mot = ModelOutputThunk[float](value="1")
-    assert hasattr(mot, "__orig_class__"), (
-        "mots are generics and should have this field"
+    assert "__orig_class__" in mot.__dict__, (
+        "mots are generics and should have this field in instance dict"
     )
     assert get_args(mot.__orig_class__)[0] is float, (  # type: ignore
         f"expected float, got {get_args(mot.__orig_class__)[0]} as mot type"  # type: ignore
     )  # type: ignore
 
     unknown_mot = ModelOutputThunk(value="2")
-    assert not hasattr(unknown_mot, "__orig_class__"), (
-        "unknown mots / mots with no type defined at instantiate don't have this attribute"
+    assert "__orig_class__" not in unknown_mot.__dict__, (
+        "unknown mots / mots with no type defined at instantiate don't have this attribute in instance dict"
     )
 
 

From 5f5639698984e4899a50f59dd3ffd99335ceea98 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Tue, 14 Apr 2026 14:42:14 -0700
Subject: [PATCH 06/10] fix: update m_serve_example_tool_calling.py to use
 safer example tool
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Security issue resolved in `m_serve_example_tool_calling.py`:

**Changes made:**
- Replaced `CalculatorTool` (which used unsafe `eval()` with `# noqa: S307`) with `GetStockPriceTool`
- New tool demonstrates API-calling pattern with mock stock prices (AAPL, GOOGL, MSFT, TSLA)
- Updated all references: `calculator_tool` → `stock_price_tool`
- Maintains the same tool calling demonstration with two tools (weather + stock price)

**Why this is better:**
- Eliminates security risk entirely (no `eval()` or suppressed lints)
- Still demonstrates multiple tools effectively
- Uses safe, realistic API-calling pattern that users can copy
- No dangerous code that could be copy-pasted into production

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 .../m_serve/m_serve_example_tool_calling.py   | 43 ++++++++++---------
 1 file changed, 23 insertions(+), 20 deletions(-)

diff --git a/docs/examples/m_serve/m_serve_example_tool_calling.py b/docs/examples/m_serve/m_serve_example_tool_calling.py
index e7dbedd29..1aa041eee 100644
--- a/docs/examples/m_serve/m_serve_example_tool_calling.py
+++ b/docs/examples/m_serve/m_serve_example_tool_calling.py
@@ -63,26 +63,29 @@ def as_json_tool(self) -> dict[str, Any]:
         }
 
 
-class CalculatorTool(AbstractMelleaTool):
-    """Tool for performing calculations."""
+class GetStockPriceTool(AbstractMelleaTool):
+    """Tool for getting stock price information."""
 
-    name = "calculator"
+    name = "get_stock_price"
 
-    def run(self, expression: str) -> str:
-        """Evaluate a mathematical expression.
+    def run(self, symbol: str) -> str:
+        """Get the current stock price for a symbol.
 
         Args:
-            expression: A mathematical expression to evaluate
+            symbol: The stock ticker symbol (e.g., AAPL, GOOGL)
 
         Returns:
-            The result of the calculation
+            Stock price information as a string
         """
-        try:
-            # In a real implementation, use a safe expression evaluator
-            result = eval(expression)  # noqa: S307
-            return f"The result is {result}"
-        except Exception as e:
-            return f"Error evaluating expression: {e}"
+        # In a real implementation, this would call a stock market API
+        mock_prices = {
+            "AAPL": "$175.43",
+            "GOOGL": "$142.87",
+            "MSFT": "$378.91",
+            "TSLA": "$242.15",
+        }
+        price = mock_prices.get(symbol.upper(), "$100.00")
+        return f"The current price of {symbol.upper()} is {price}"
 
     @property
     def as_json_tool(self) -> dict[str, Any]:
@@ -91,16 +94,16 @@ def as_json_tool(self) -> dict[str, Any]:
             "type": "function",
             "function": {
                 "name": self.name,
-                "description": "Evaluate a mathematical expression",
+                "description": "Get the current stock price for a given ticker symbol",
                 "parameters": {
                     "type": "object",
                     "properties": {
-                        "expression": {
+                        "symbol": {
                             "type": "string",
-                            "description": "The mathematical expression to evaluate, e.g. '2 + 2'",
+                            "description": "The stock ticker symbol, e.g. AAPL, GOOGL",
                         }
                     },
-                    "required": ["expression"],
+                    "required": ["symbol"],
                 },
             },
         }
@@ -108,10 +111,10 @@ def as_json_tool(self) -> dict[str, Any]:
 
 # Create tool instances
 weather_tool = GetWeatherTool()
-calculator_tool = CalculatorTool()
+stock_price_tool = GetStockPriceTool()
 
 # Map tool names to instances for easy lookup
-TOOLS = {weather_tool.name: weather_tool, calculator_tool.name: calculator_tool}
+TOOLS = {weather_tool.name: weather_tool, stock_price_tool.name: stock_price_tool}
 
 
 def serve(
@@ -167,7 +170,7 @@ def serve(
 
     # Simulate tool definitions being passed
     test_model_options = {
-        "@@@tools@@@": [weather_tool.as_json_tool, calculator_tool.as_json_tool]
+        "@@@tools@@@": [weather_tool.as_json_tool, stock_price_tool.as_json_tool]
     }
 
     response = serve(input=test_messages, model_options=test_model_options)

From d2bf9c918513e8a21fd3d1691b74b746ea67f4d1 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Thu, 16 Apr 2026 19:25:55 -0700
Subject: [PATCH 07/10] fix: replace repeated hard-coded string with constant

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 docs/examples/m_serve/m_serve_example_tool_calling.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/docs/examples/m_serve/m_serve_example_tool_calling.py b/docs/examples/m_serve/m_serve_example_tool_calling.py
index 1aa041eee..30f6ee12f 100644
--- a/docs/examples/m_serve/m_serve_example_tool_calling.py
+++ b/docs/examples/m_serve/m_serve_example_tool_calling.py
@@ -11,6 +11,7 @@
 
 import mellea
 from cli.serve.models import ChatMessage
+from mellea.backends import ModelOption
 from mellea.core import ModelOutputThunk, Requirement
 from mellea.core.base import AbstractMelleaTool
 from mellea.stdlib.context import ChatContext
@@ -140,9 +141,9 @@ def serve(
 
     # Extract tools from model_options if provided
     tools = None
-    if model_options and "@@@tools@@@" in model_options:
+    if model_options and ModelOption.TOOLS in model_options:
         # Convert OpenAI tool format to Mellea tool format
-        openai_tools = model_options["@@@tools@@@"]
+        openai_tools = model_options[ModelOption.TOOLS]
         tools = {}
         for tool_def in openai_tools:
             tool_name = tool_def["function"]["name"]
@@ -152,7 +153,7 @@ def serve(
     # Build model options with tools
     final_model_options = model_options or {}
     if tools:
-        final_model_options["@@@tools@@@"] = tools
+        final_model_options[ModelOption.TOOLS] = tools
 
     # Use instruct to generate response with potential tool calls
     result = session.instruct(
@@ -170,7 +171,7 @@ def serve(
 
     # Simulate tool definitions being passed
     test_model_options = {
-        "@@@tools@@@": [weather_tool.as_json_tool, stock_price_tool.as_json_tool]
+        ModelOption.TOOLS: [weather_tool.as_json_tool, stock_price_tool.as_json_tool]
     }
 
     response = serve(input=test_messages, model_options=test_model_options)

From d5169fc945b2aabd89ed859de551325a3e8c3bc6 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Thu, 16 Apr 2026 19:36:12 -0700
Subject: [PATCH 08/10] fix: add TOOL_CHOICE to ModelOptions like TEMPERATURE
 not a sentinel

The pass-thru behavior was not clear enough, so adding it to ModelOptions
where important options are known.  Most of these are sentinels which are
removed (because @@@) but this will be like TEMPERATURE which is passed
through to the backends.

No behavior change, but give a handly constant and a place to look for these.
This does not address all the other possible pass through args.

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 cli/serve/app.py                                      |  2 +-
 docs/examples/m_serve/m_serve_example_tool_calling.py | 10 ++++++----
 mellea/backends/model_options.py                      |  4 ++++
 test/cli/test_serve.py                                |  6 +++---
 test/cli/test_serve_tool_calling.py                   |  7 ++++---
 5 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/cli/serve/app.py b/cli/serve/app.py
index cb37d93ce..83676fedd 100644
--- a/cli/serve/app.py
+++ b/cli/serve/app.py
@@ -115,7 +115,6 @@ def _build_model_options(request: ChatCompletionRequest) -> dict:
         "response_format",  # Response format (json_object) - not yet implemented
         "functions",  # Legacy function calling - not yet implemented
         "function_call",  # Legacy function calling - not yet implemented
-        # Tool choice is passed through as-is (not a ModelOption sentinel)
     }
     openai_to_model_option = {
         "temperature": ModelOption.TEMPERATURE,
@@ -123,6 +122,7 @@ def _build_model_options(request: ChatCompletionRequest) -> dict:
         "seed": ModelOption.SEED,
         "stream": ModelOption.STREAM,
         "tools": ModelOption.TOOLS,
+        "tool_choice": ModelOption.TOOL_CHOICE,
     }
 
     # Get all non-None fields
diff --git a/docs/examples/m_serve/m_serve_example_tool_calling.py b/docs/examples/m_serve/m_serve_example_tool_calling.py
index 30f6ee12f..944ed56bf 100644
--- a/docs/examples/m_serve/m_serve_example_tool_calling.py
+++ b/docs/examples/m_serve/m_serve_example_tool_calling.py
@@ -126,12 +126,13 @@ def serve(
     """Serve function that handles tool calling.
 
     This function demonstrates how to use tools with m serve. The tools
-    are passed via model_options and the model can request to call them.
+    are passed via model_options using ModelOption.TOOLS, and tool_choice
+    can be specified using ModelOption.TOOL_CHOICE.
 
     Args:
         input: List of chat messages
         requirements: Optional list of requirement strings
-        model_options: Model options including tools and tool_choice
+        model_options: Model options including ModelOption.TOOLS and ModelOption.TOOL_CHOICE
 
     Returns:
         ModelOutputThunk with potential tool calls
@@ -169,9 +170,10 @@ def serve(
     # Example usage (for testing purposes)
     test_messages = [ChatMessage(role="user", content="What's the weather in Paris?")]
 
-    # Simulate tool definitions being passed
+    # Simulate tool definitions being passed with tool_choice
     test_model_options = {
-        ModelOption.TOOLS: [weather_tool.as_json_tool, stock_price_tool.as_json_tool]
+        ModelOption.TOOLS: [weather_tool.as_json_tool, stock_price_tool.as_json_tool],
+        ModelOption.TOOL_CHOICE: "auto",  # Can be "none", "auto", or specific tool
     }
 
     response = serve(input=test_messages, model_options=test_model_options)
diff --git a/mellea/backends/model_options.py b/mellea/backends/model_options.py
index f71ddfb53..682c77bc7 100644
--- a/mellea/backends/model_options.py
+++ b/mellea/backends/model_options.py
@@ -22,6 +22,7 @@ class ModelOption:
 
     Attributes:
         TOOLS (str): Sentinel key for a list or dict of tools to expose for tool calling.
+        TOOL_CHOICE (str): Key for tool choice strategy (passed through to the backend).
         MAX_NEW_TOKENS (str): Sentinel key for the maximum number of new tokens to generate.
         SYSTEM_PROMPT (str): Sentinel key for the system prompt string.
         TEMPERATURE (str): Key for the sampling temperature (passed through to the backend).
@@ -34,6 +35,9 @@ class ModelOption:
     TOOLS = "@@@tools@@@"
     """Must be a list[Callable] or a dict[str, Callable] where str is the name of the function."""
 
+    TOOL_CHOICE = "tool_choice"
+    """Controls which tool the model should use. Can be "none", "auto", or a specific tool name."""
+
     MAX_NEW_TOKENS = "@@@max_new_tokens@@@"
     SYSTEM_PROMPT = "@@@system_prompt@@@"
     TEMPERATURE = "temperature"
diff --git a/test/cli/test_serve.py b/test/cli/test_serve.py
index 9b2b35dba..688c01b21 100644
--- a/test/cli/test_serve.py
+++ b/test/cli/test_serve.py
@@ -505,9 +505,9 @@ async def test_tool_params_passed_to_model_options(self, mock_module):
 
         # Tools should be passed with ModelOption.TOOLS key
         assert ModelOption.TOOLS in model_options
-        # tool_choice should be passed through as-is
-        assert "tool_choice" in model_options
-        assert model_options["tool_choice"] == "auto"
+        # tool_choice should be passed through using ModelOption.TOOL_CHOICE
+        assert ModelOption.TOOL_CHOICE in model_options
+        assert model_options[ModelOption.TOOL_CHOICE] == "auto"
         # Legacy function calling parameters should still be excluded
         assert "functions" not in model_options
         assert "function_call" not in model_options
diff --git a/test/cli/test_serve_tool_calling.py b/test/cli/test_serve_tool_calling.py
index 0b31b7837..d1e76cd09 100644
--- a/test/cli/test_serve_tool_calling.py
+++ b/test/cli/test_serve_tool_calling.py
@@ -15,6 +15,7 @@
     FunctionParameters,
     ToolFunction,
 )
+from mellea.backends import ModelOption
 from mellea.core.base import AbstractMelleaTool, ModelOutputThunk, ModelToolCall
 
 
@@ -223,9 +224,9 @@ async def test_tool_choice_passed_to_model_options(
         assert call_args is not None
         model_options = call_args.kwargs["model_options"]
 
-        # tool_choice should be passed through as-is
-        assert "tool_choice" in model_options
-        assert model_options["tool_choice"] == "auto"
+        # tool_choice should be passed through using ModelOption.TOOL_CHOICE
+        assert ModelOption.TOOL_CHOICE in model_options
+        assert model_options[ModelOption.TOOL_CHOICE] == "auto"
 
     @pytest.mark.asyncio
     async def test_tool_calls_with_complex_arguments(

From f6a23eda69eea71838d66046628b4ca6713edf38 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Fri, 17 Apr 2026 11:41:12 -0700
Subject: [PATCH 09/10] fix: fix m serve tool-calling examples

- switch server example to OpenAIBackend
- align tool-calling example with tested Granite model setup
- narrow advertised tools when `tool_choice` selects a specific function
- enable `tool_calls=True` in the serve path
- replace calculator example with stock-price tool
- examples 1/2 as tool-call-only demos
- example 4 as the full tool execution round-trip
- improve client diagnostics for empty/no-tool responses

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
Assisted-by: IBM Bob
---
 docs/examples/m_serve/client_tool_calling.py  | 161 ++++++++++++-----
 .../m_serve/m_serve_example_tool_calling.py   | 168 ++++++++++++++----
 2 files changed, 249 insertions(+), 80 deletions(-)

diff --git a/docs/examples/m_serve/client_tool_calling.py b/docs/examples/m_serve/client_tool_calling.py
index 6ffc3cc24..d68e5d238 100644
--- a/docs/examples/m_serve/client_tool_calling.py
+++ b/docs/examples/m_serve/client_tool_calling.py
@@ -27,48 +27,55 @@
             "name": "get_weather",
             "description": "Get the current weather in a given location",
             "parameters": {
-                "type": "object",
-                "properties": {
-                    "location": {
-                        "type": "string",
-                        "description": "The city name, e.g. San Francisco",
+                "RootModel": {
+                    "type": "object",
+                    "properties": {
+                        "location": {
+                            "type": "string",
+                            "description": "The city name, e.g. San Francisco",
+                        },
+                        "units": {
+                            "type": "string",
+                            "enum": ["celsius", "fahrenheit"],
+                            "description": "Temperature units",
+                        },
                     },
-                    "units": {
-                        "type": "string",
-                        "enum": ["celsius", "fahrenheit"],
-                        "description": "Temperature units",
-                    },
-                },
-                "required": ["location"],
+                    "required": ["location"],
+                }
             },
         },
     },
     {
         "type": "function",
         "function": {
-            "name": "calculator",
-            "description": "Evaluate a mathematical expression",
+            "name": "get_stock_price",
+            "description": "Get the current stock price for a given ticker symbol",
             "parameters": {
-                "type": "object",
-                "properties": {
-                    "expression": {
-                        "type": "string",
-                        "description": "The mathematical expression to evaluate",
-                    }
-                },
-                "required": ["expression"],
+                "RootModel": {
+                    "type": "object",
+                    "properties": {
+                        "symbol": {
+                            "type": "string",
+                            "description": "The stock ticker symbol, e.g. AAPL, GOOGL",
+                        }
+                    },
+                    "required": ["symbol"],
+                }
             },
         },
     },
 ]
 
 
-def make_request(messages: list[dict], tools: list[dict] | None = None) -> dict:
+def make_request(
+    messages: list[dict], tools: list[dict] | None = None, tool_name: str | None = None
+) -> dict:
     """Make a request to the m serve API.
 
     Args:
         messages: List of message dictionaries
         tools: Optional list of tool definitions
+        tool_name: Optional tool name to request explicitly
 
     Returns:
         Response dictionary from the API
@@ -81,13 +88,53 @@ def make_request(messages: list[dict], tools: list[dict] | None = None) -> dict:
 
     if tools:
         payload["tools"] = tools
-        payload["tool_choice"] = "auto"
+        if tool_name is not None:
+            # m serve forwards tool_choice to compatible backends, but the
+            # downstream provider/model may ignore it or treat it as a weak
+            # preference rather than a guarantee. Use an explicit function
+            # selection in this client so the example demonstrates the API
+            # contract even when the model would otherwise decline to call tools.
+            payload["tool_choice"] = {
+                "type": "function",
+                "function": {"name": tool_name},
+            }
+        else:
+            payload["tool_choice"] = "auto"
 
     response = requests.post(ENDPOINT, json=payload, timeout=30)
-    response.raise_for_status()
+
+    if response.status_code >= 400:
+        try:
+            error_payload = response.json()
+        except ValueError:
+            error_payload = {"error": {"message": response.text}}
+
+        error_message = error_payload.get("error", {}).get("message", response.text)
+        raise requests.HTTPError(
+            f"{response.status_code} Server Error: {error_message}", response=response
+        )
+
     return response.json()
 
 
+def _run_local_tool(tool_name: str, args: dict) -> str:
+    """Simulate local execution of the example tools."""
+    if tool_name == "get_weather":
+        units = args.get("units") or "celsius"
+        unit_suffix = "C" if units == "celsius" else "F"
+        return f"The weather in {args['location']} is sunny and 22°{unit_suffix}"
+    if tool_name == "get_stock_price":
+        mock_prices = {
+            "AAPL": "$175.43",
+            "GOOGL": "$142.87",
+            "MSFT": "$378.91",
+            "TSLA": "$242.15",
+        }
+        symbol = args["symbol"].upper()
+        return f"The current price of {symbol} is {mock_prices.get(symbol, '$100.00')}"
+    return "Tool result"
+
+
 def main():
     """Run example tool calling interactions."""
     print("=" * 60)
@@ -100,7 +147,7 @@ def main():
     messages = [{"role": "user", "content": "What's the weather like in Tokyo?"}]
 
     print(f"User: {messages[0]['content']}")
-    response = make_request(messages, tools=tools)
+    response = make_request(messages, tools=tools, tool_name="get_weather")
 
     choice = response["choices"][0]
     print(f"\nFinish Reason: {choice['finish_reason']}")
@@ -111,16 +158,18 @@ def main():
             func = tool_call["function"]
             args = json.loads(func["arguments"])
             print(f"  - {func['name']}({json.dumps(args)})")
-    else:
+    elif choice.get("message", {}).get("content"):
         print(f"Assistant: {choice['message']['content']}")
+    else:
+        print("Assistant returned no content and no tool calls.")
 
-    # Example 2: Request that should trigger calculator tool
-    print("\n\n2. Math Query")
+    # Example 2: Request that should trigger stock price tool
+    print("\n\n2. Stock Price Query")
     print("-" * 60)
-    messages = [{"role": "user", "content": "What is 15 * 23 + 7?"}]
+    messages = [{"role": "user", "content": "What's the current stock price of AAPL?"}]
 
     print(f"User: {messages[0]['content']}")
-    response = make_request(messages, tools=tools)
+    response = make_request(messages, tools=tools, tool_name="get_stock_price")
 
     choice = response["choices"][0]
     print(f"\nFinish Reason: {choice['finish_reason']}")
@@ -131,8 +180,10 @@ def main():
             func = tool_call["function"]
             args = json.loads(func["arguments"])
             print(f"  - {func['name']}({json.dumps(args)})")
-    else:
+    elif choice.get("message", {}).get("content"):
         print(f"Assistant: {choice['message']['content']}")
+    else:
+        print("Assistant returned no content and no tool calls.")
 
     # Example 3: Request without tools (normal chat)
     print("\n\n3. Normal Chat (No Tools)")
@@ -152,7 +203,7 @@ def main():
     messages = [{"role": "user", "content": "What's the weather in Paris?"}]
 
     print(f"User: {messages[0]['content']}")
-    response = make_request(messages, tools=tools)
+    response = make_request(messages, tools=tools, tool_name="get_weather")
 
     choice = response["choices"][0]
     assistant_message = choice["message"]
@@ -169,17 +220,17 @@ def main():
             }
         )
 
+        tool_results: list[str] = []
+
         # Process each tool call and add tool responses
         for tool_call in assistant_message["tool_calls"]:
             func = tool_call["function"]
             args = json.loads(func["arguments"])
             print(f"  - {func['name']}({json.dumps(args)})")
 
-            # Simulate tool execution
-            if func["name"] == "get_weather":
-                tool_result = f"The weather in {args['location']} is sunny and 22°C"
-            else:
-                tool_result = "Tool result"
+            tool_result = _run_local_tool(func["name"], args)
+            tool_results.append(tool_result)
+            print(f"    Result: {tool_result}")
 
             # Add tool response to conversation
             messages.append(
@@ -190,11 +241,32 @@ def main():
                 }
             )
 
-        # Get final response after tool execution
+        # Get final response after tool execution.
+        # Ask for a concise answer that explicitly uses the tool result so the
+        # example output includes the actual weather/price instead of only a
+        # conversational acknowledgement.
+        messages.append(
+            {
+                "role": "user",
+                "content": (
+                    f"Original question: {messages[0]['content']}\n"
+                    f"Tool result: {'; '.join(tool_results)}\n"
+                    "Answer the original question directly using only that tool "
+                    "result. Do not mention unrelated topics or other tools."
+                ),
+            }
+        )
         print("\nGetting final response after tool execution...")
-        response = make_request(messages, tools=tools)
+        response = make_request(messages, tools=None)
         choice = response["choices"][0]
-        print(f"Assistant: {choice['message']['content']}")
+        if choice.get("message", {}).get("content"):
+            print(f"Assistant: {choice['message']['content']}")
+        else:
+            print("Assistant returned no content after tool execution.")
+    elif assistant_message.get("content"):
+        print(f"Assistant: {assistant_message['content']}")
+    else:
+        print("Assistant returned no content and no tool calls.")
 
     print("\n" + "=" * 60)
     print("Examples completed!")
@@ -208,5 +280,12 @@ def main():
         print("Error: Could not connect to server.")
         print("Make sure the server is running:")
         print("  uv run m serve docs/examples/m_serve/m_serve_example_tool_calling.py")
+    except requests.exceptions.HTTPError as e:
+        print(f"Error: {e}")
+        if e.response is not None:
+            try:
+                print("Server response:", json.dumps(e.response.json(), indent=2))
+            except ValueError:
+                print("Server response:", e.response.text)
     except Exception as e:
         print(f"Error: {e}")
diff --git a/docs/examples/m_serve/m_serve_example_tool_calling.py b/docs/examples/m_serve/m_serve_example_tool_calling.py
index 944ed56bf..aac3e68c1 100644
--- a/docs/examples/m_serve/m_serve_example_tool_calling.py
+++ b/docs/examples/m_serve/m_serve_example_tool_calling.py
@@ -2,21 +2,45 @@
 
 """Example demonstrating tool calling with m serve.
 
-This example shows how to use the OpenAI-compatible tool calling API
-with m serve. The server will accept tool definitions and return tool
-calls in the response when the model decides to use them.
+This file supports two distinct usage patterns:
+
+1. Running it directly with ``uv run python ...`` performs a local smoke test
+   using native Mellea tool calling.
+2. Serving it with ``m serve`` exposes an OpenAI-compatible endpoint that
+   accepts OpenAI-style tool definitions in the request.
+
+The direct ``__main__`` smoke test is intentionally separate from the
+OpenAI-compatible server flow because local ``session.instruct(...)`` calls
+should use ``MelleaTool`` objects directly.
 """
 
+import os
 from typing import Any
 
 import mellea
 from cli.serve.models import ChatMessage
 from mellea.backends import ModelOption
+from mellea.backends.model_ids import IBM_GRANITE_4_HYBRID_MICRO
+from mellea.backends.openai import OpenAIBackend
+from mellea.backends.tools import MelleaTool
 from mellea.core import ModelOutputThunk, Requirement
 from mellea.core.base import AbstractMelleaTool
+from mellea.formatters import TemplateFormatter
 from mellea.stdlib.context import ChatContext
+from mellea.stdlib.requirements.tool_reqs import uses_tool
+from mellea.stdlib.session import MelleaSession
+
+_ollama_host = os.environ.get("OLLAMA_HOST", "localhost:11434")
+if not _ollama_host.startswith(("http://", "https://")):
+    _ollama_host = f"http://{_ollama_host}"
 
-session = mellea.start_session(ctx=ChatContext())
+backend = OpenAIBackend(
+    model_id=IBM_GRANITE_4_HYBRID_MICRO.ollama_name,  # type: ignore[arg-type]
+    formatter=TemplateFormatter(model_id=IBM_GRANITE_4_HYBRID_MICRO.hf_model_name),  # type: ignore[arg-type]
+    base_url=f"{_ollama_host}/v1",
+    api_key="ollama",
+)
+session = MelleaSession(backend, ctx=ChatContext())
 
 
 class GetWeatherTool(AbstractMelleaTool):
@@ -24,7 +48,7 @@ class GetWeatherTool(AbstractMelleaTool):
 
     name = "get_weather"
 
-    def run(self, location: str, units: str = "celsius") -> str:
+    def run(self, location: str, units: str | None = "celsius") -> str:
         """Get the current weather for a location.
 
         Args:
@@ -34,8 +58,10 @@ def run(self, location: str, units: str = "celsius") -> str:
         Returns:
             Weather information as a string
         """
+        # Models sometimes emit optional arguments explicitly as null/None.
+        resolved_units = units or "celsius"
         # In a real implementation, this would call a weather API
-        return f"The weather in {location} is sunny and 22°{units[0].upper()}"
+        return f"The weather in {location} is sunny and 22°{resolved_units[0].upper()}"
 
     @property
     def as_json_tool(self) -> dict[str, Any]:
@@ -110,12 +136,56 @@ def as_json_tool(self) -> dict[str, Any]:
         }
 
 
-# Create tool instances
-weather_tool = GetWeatherTool()
-stock_price_tool = GetStockPriceTool()
+# Create tool instances for server-side lookup
+weather_tool_impl = GetWeatherTool()
+stock_price_tool_impl = GetStockPriceTool()
+
+# Native MelleaTool wrappers are only needed for the direct ``__main__`` path.
+# The backend helper used by local ``session.instruct(..., ModelOption.TOOLS=[...])``
+# expects ``MelleaTool`` instances in a list, while the server path below uses the
+# class-based implementations via the ``TOOLS`` lookup.
+weather_tool = MelleaTool(
+    name=weather_tool_impl.name,
+    tool_call=weather_tool_impl.run,
+    as_json_tool=weather_tool_impl.as_json_tool,
+)
+stock_price_tool = MelleaTool(
+    name=stock_price_tool_impl.name,
+    tool_call=stock_price_tool_impl.run,
+    as_json_tool=stock_price_tool_impl.as_json_tool,
+)
+
+# Map tool names to server-side tool implementations for easy lookup
+TOOLS = {
+    weather_tool_impl.name: weather_tool_impl,
+    stock_price_tool_impl.name: stock_price_tool_impl,
+}
+
+
+def _extract_mellea_tools_from_model_options(
+    model_options: dict | None,
+) -> dict[str, AbstractMelleaTool]:
+    """Normalize example tool inputs to native tool instances.
+
+    This example supports only two shapes:
+    - OpenAI-style JSON tool definitions from the server path
+    - native tool objects from the direct ``__main__`` path
+    """
+    if model_options is None or ModelOption.TOOLS not in model_options:
+        return {}
+
+    provided_tools = model_options[ModelOption.TOOLS]
+    tools: dict[str, AbstractMelleaTool] = {}
+
+    for tool_def in provided_tools:
+        if isinstance(tool_def, AbstractMelleaTool):
+            tools[tool_def.name] = tool_def
+        else:
+            tool_name = tool_def["function"]["name"]
+            if tool_name in TOOLS:
+                tools[tool_name] = TOOLS[tool_name]
 
-# Map tool names to instances for easy lookup
-TOOLS = {weather_tool.name: weather_tool, stock_price_tool.name: stock_price_tool}
+    return tools
 
 
 def serve(
@@ -127,7 +197,9 @@ def serve(
 
     This function demonstrates how to use tools with m serve. The tools
     are passed via model_options using ModelOption.TOOLS, and tool_choice
-    can be specified using ModelOption.TOOL_CHOICE.
+    can be specified using ModelOption.TOOL_CHOICE. Mellea forwards that
+    setting to compatible backends, but the downstream provider/model may
+    still ignore it or treat it as a weak preference.
 
     Args:
         input: List of chat messages
@@ -141,43 +213,61 @@ def serve(
     message = input[-1].content
 
     # Extract tools from model_options if provided
-    tools = None
-    if model_options and ModelOption.TOOLS in model_options:
-        # Convert OpenAI tool format to Mellea tool format
-        openai_tools = model_options[ModelOption.TOOLS]
-        tools = {}
-        for tool_def in openai_tools:
-            tool_name = tool_def["function"]["name"]
-            if tool_name in TOOLS:
-                tools[tool_name] = TOOLS[tool_name]
-
-    # Build model options with tools
-    final_model_options = model_options or {}
+    tools = _extract_mellea_tools_from_model_options(model_options)
+
+    # Build model options with tools.
+    # If the caller explicitly selected a single function via tool_choice,
+    # narrow the advertised tool set to that one tool so the backend/model
+    # is not asked to choose among unrelated tools.
+    final_model_options = dict(model_options or {})
+    selected_tool_name: str | None = None
     if tools:
-        final_model_options[ModelOption.TOOLS] = tools
-
-    # Use instruct to generate response with potential tool calls
+        selected_tools = tools
+        if model_options is not None and ModelOption.TOOL_CHOICE in model_options:
+            tool_choice = model_options[ModelOption.TOOL_CHOICE]
+            if isinstance(tool_choice, dict):
+                selected_tool_name = tool_choice.get("function", {}).get("name")
+                if selected_tool_name in tools:
+                    selected_tools = {selected_tool_name: tools[selected_tool_name]}
+        final_model_options[ModelOption.TOOLS] = selected_tools
+
+    # Keep the serve path deterministic for the client example by retrying only
+    # at the request level. Enforcing uses_tool(...) inside session.instruct()
+    # caused noisy server-side failures when the model ignored the tool request
+    # on a particular sample.
     result = session.instruct(
         description=message,  # type: ignore
         requirements=[Requirement(req) for req in requirements],  # type: ignore
         model_options=final_model_options,
+        tool_calls=True,
+        strategy=None,
     )
 
     return result
 
 
 if __name__ == "__main__":
-    # Example usage (for testing purposes)
-    test_messages = [ChatMessage(role="user", content="What's the weather in Paris?")]
-
-    # Simulate tool definitions being passed with tool_choice
-    test_model_options = {
-        ModelOption.TOOLS: [weather_tool.as_json_tool, stock_price_tool.as_json_tool],
-        ModelOption.TOOL_CHOICE: "auto",  # Can be "none", "auto", or specific tool
-    }
-
-    response = serve(input=test_messages, model_options=test_model_options)
+    response = session.instruct(
+        "What's the weather in Boston?",
+        model_options={
+            ModelOption.TOOLS: [weather_tool],
+            # This direct path now uses the OpenAI backend against Ollama's
+            # OpenAI-compatible endpoint, so TOOL_CHOICE is forwarded by
+            # Mellea. Ollama and/or the selected model may still ignore it
+            # or not enforce it strictly in practice.
+            ModelOption.TOOL_CHOICE: "auto",
+            ModelOption.MAX_NEW_TOKENS: 1000,
+        },
+        strategy=None,
+        tool_calls=True,
+    )
 
     print(f"Response: {response.value}")
-    if response.tool_calls:
-        print(f"Tool calls requested: {list(response.tool_calls.keys())}")
+    print(
+        "Tool calls requested:",
+        None if response.tool_calls is None else list(response.tool_calls.keys()),
+    )
+
+    if response.tool_calls and weather_tool.name in response.tool_calls:
+        tool_result = response.tool_calls[weather_tool.name].call_func()
+        print(f"Tool result: {tool_result}")

From 313a497d0f7f9be663b4b38050a13101e6810c29 Mon Sep 17 00:00:00 2001
From: Mark Sturdevant <mark.sturdevant@ibm.com>
Date: Fri, 17 Apr 2026 13:32:39 -0700
Subject: [PATCH 10/10] fix: remove unused imports in example

Signed-off-by: Mark Sturdevant <mark.sturdevant@ibm.com>
---
 docs/examples/m_serve/m_serve_example_tool_calling.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/docs/examples/m_serve/m_serve_example_tool_calling.py b/docs/examples/m_serve/m_serve_example_tool_calling.py
index aac3e68c1..839c91b1b 100644
--- a/docs/examples/m_serve/m_serve_example_tool_calling.py
+++ b/docs/examples/m_serve/m_serve_example_tool_calling.py
@@ -17,7 +17,6 @@
 import os
 from typing import Any
 
-import mellea
 from cli.serve.models import ChatMessage
 from mellea.backends import ModelOption
 from mellea.backends.model_ids import IBM_GRANITE_4_HYBRID_MICRO
@@ -27,7 +26,6 @@
 from mellea.core.base import AbstractMelleaTool
 from mellea.formatters import TemplateFormatter
 from mellea.stdlib.context import ChatContext
-from mellea.stdlib.requirements.tool_reqs import uses_tool
 from mellea.stdlib.session import MelleaSession
 
 _ollama_host = os.environ.get("OLLAMA_HOST", "localhost:11434")