diff --git a/ddtrace/llmobs/_integrations/utils.py b/ddtrace/llmobs/_integrations/utils.py index 11a7a408f84..3fbfb68b7c4 100644 --- a/ddtrace/llmobs/_integrations/utils.py +++ b/ddtrace/llmobs/_integrations/utils.py @@ -1168,9 +1168,9 @@ def openai_construct_tool_call_from_streamed_chunk(stored_tool_calls, tool_call_ def openai_construct_message_from_streamed_chunks(streamed_chunks: list[Any]) -> dict[str, Any]: """Constructs a chat completion message dictionary from streamed chunks. The resulting message dictionary is of form: - {"content": "...", "role": "...", "tool_calls": [...], "finish_reason": "..."} + {"content": "...", "role": "...", "reasoning_content": "...", "tool_calls": [...], "finish_reason": "..."} """ - message: dict[str, Any] = {"content": "", "tool_calls": []} + message: dict[str, Any] = {"content": "", "reasoning_content": "", "tool_calls": []} for chunk in streamed_chunks: if _get_attr(chunk, "usage", None): message["usage"] = chunk.usage @@ -1182,6 +1182,9 @@ def openai_construct_message_from_streamed_chunks(streamed_chunks: list[Any]) -> message["role"] = chunk.delta.role if _get_attr(chunk, "finish_reason", None) and not message.get("finish_reason"): message["finish_reason"] = chunk.finish_reason + chunk_reasoning = _get_attr(chunk.delta, "reasoning_content", "") + if chunk_reasoning: + message["reasoning_content"] += chunk_reasoning chunk_content = _get_attr(chunk.delta, "content", "") if chunk_content: message["content"] += chunk_content @@ -1198,6 +1201,8 @@ def openai_construct_message_from_streamed_chunks(streamed_chunks: list[Any]) -> message["tool_calls"].sort(key=lambda x: x.get("index", 0)) else: message.pop("tool_calls", None) + if not message["reasoning_content"]: + message.pop("reasoning_content", None) message["content"] = message["content"].strip() return message diff --git a/releasenotes/notes/fix-llmobs-streamed-reasoning-content-0da3242ccfaa6063.yaml b/releasenotes/notes/fix-llmobs-streamed-reasoning-content-0da3242ccfaa6063.yaml new file mode 100644 index 00000000000..c8904ca689c --- /dev/null +++ b/releasenotes/notes/fix-llmobs-streamed-reasoning-content-0da3242ccfaa6063.yaml @@ -0,0 +1,6 @@ +--- +fixes: + - | + LLM Observability: Fixes an issue where ``reasoning_content`` was missing from streamed chat completions + in the OpenAI and LiteLLM integrations when an OpenAI-compatible reasoning provider (e.g. DeepSeek, Qwen) + emitted ``delta.reasoning_content`` chunks. The aggregated message now captures reasoning text in the output message, matching non-streaming behavior. diff --git a/tests/llmobs/test_integrations_utils.py b/tests/llmobs/test_integrations_utils.py index eefd436b787..b29e475aae8 100644 --- a/tests/llmobs/test_integrations_utils.py +++ b/tests/llmobs/test_integrations_utils.py @@ -1,6 +1,9 @@ +from types import SimpleNamespace + from ddtrace.llmobs._integrations.utils import _extract_chat_template_from_instructions from ddtrace.llmobs._integrations.utils import _normalize_prompt_variables from ddtrace.llmobs._integrations.utils import _openai_parse_input_response_messages +from ddtrace.llmobs._integrations.utils import openai_construct_message_from_streamed_chunks def test_basic_functionality(): @@ -339,3 +342,51 @@ class FakeResponseReasoningItem: assert len(processed) == 1 assert processed[0]["role"] == "user" assert tool_call_ids == [] + + +def _chunk(content=None, reasoning_content=None, role=None, finish_reason=None): + delta = SimpleNamespace(content=content, reasoning_content=reasoning_content, role=role) + return SimpleNamespace(delta=delta, finish_reason=finish_reason, usage=None, index=0) + + +class TestOpenAIConstructMessageFromStreamedChunks: + def test_reasoning_then_content_chunks_aggregate_both(self): + # OpenAI-compatible reasoning providers (DeepSeek, Qwen, etc.) typically emit + # reasoning_content chunks first, then content chunks. + chunks = [ + _chunk(role="assistant"), + _chunk(reasoning_content="Let me "), + _chunk(reasoning_content="think..."), + _chunk(content="The answer "), + _chunk(content="is 391."), + _chunk(finish_reason="stop"), + ] + message = openai_construct_message_from_streamed_chunks(chunks) + assert message["reasoning_content"] == "Let me think..." + assert message["content"] == "The answer is 391." + assert message["role"] == "assistant" + assert message["finish_reason"] == "stop" + + def test_reasoning_only_stream(self): + chunks = [ + _chunk(role="assistant"), + _chunk(reasoning_content="hmm"), + ] + message = openai_construct_message_from_streamed_chunks(chunks) + assert message["reasoning_content"] == "hmm" + assert message["content"] == "" + + def test_no_reasoning_key_when_absent(self): + chunks = [_chunk(role="assistant"), _chunk(content="hello")] + message = openai_construct_message_from_streamed_chunks(chunks) + assert "reasoning_content" not in message + assert message["content"] == "hello" + + def test_interleaved_reasoning_and_content_in_same_chunk(self): + chunks = [ + _chunk(role="assistant"), + _chunk(reasoning_content="r", content="c"), + ] + message = openai_construct_message_from_streamed_chunks(chunks) + assert message["reasoning_content"] == "r" + assert message["content"] == "c"