diff --git a/extras/chat_template_examples/chat_template_gpt_oss.jinja b/extras/chat_template_examples/chat_template_gpt_oss.jinja
index 8549cdf1be..f86a050b7d 100644
--- a/extras/chat_template_examples/chat_template_gpt_oss.jinja
+++ b/extras/chat_template_examples/chat_template_gpt_oss.jinja
@@ -351,11 +351,28 @@
             {{- raise_exception("Message has tool role, but there was no previous assistant message with a tool call!") }}
         {%- endif %}
         {{- "<|start|>functions." + last_tool_call.name }}
-        {#- Original: {{- " to=assistant<|channel|>commentary<|message|>" + message.content|tojson + "<|end|>" }} #}
-        {#- Actual version that works, does not escape and allows non-json: #}
-        {{- " to=assistant<|channel|>commentary<|message|>" + message.content + "<|end|>" -}}
+        {#- When content is a plain string we render it directly.                                    #}
+        {#- When content is an array (OpenAI multipart format) we join the text items with newlines, #}
+        {#- matching what the model was trained on. JSON-serialising the array would add noise.      #}
+        {%- if message.content is string -%}
+            {{- " to=assistant<|channel|>commentary<|message|>" + message.content + "<|end|>" -}}
+        {%- else -%}
+            {%- set ns = namespace(parts=[]) -%}
+            {%- for item in message.content if item.type == "text" -%}
+                {%- set ns.parts = ns.parts + [item.text] -%}
+            {%- endfor -%}
+            {{- " to=assistant<|channel|>commentary<|message|>" + ns.parts | join("\n") + "<|end|>" -}}
+        {%- endif -%}
     {%- elif message.role == 'user' -%}
-        {{- "<|start|>user<|message|>" + message.content + "<|end|>" }}
+        {%- if message.content is string -%}
+            {{- "<|start|>user<|message|>" + message.content + "<|end|>" }}
+        {%- else -%}
+            {%- set ns = namespace(parts=[]) -%}
+            {%- for item in message.content if item.type == "text" -%}
+                {%- set ns.parts = ns.parts + [item.text] -%}
+            {%- endfor -%}
+            {{- "<|start|>user<|message|>" + ns.parts | join("\n") + "<|end|>" }}
+        {%- endif -%}
     {%- endif -%}
 {%- endfor -%}
 
diff --git a/src/llm/apis/openai_api_handler.hpp b/src/llm/apis/openai_api_handler.hpp
index 9071e6addc..5f9c0ba22d 100644
--- a/src/llm/apis/openai_api_handler.hpp
+++ b/src/llm/apis/openai_api_handler.hpp
@@ -146,6 +146,7 @@ class OpenAIApiHandler {
     std::optional<int> getNumReturnSequences() const;
     StreamOptions getStreamOptions() const;
     const std::string& getProcessedJson() const;
+    // Returns the flat ordered list of image tensors (one per image_url item, in document order).
     const ImageHistory& getImageHistory() const;
     ov::genai::ChatHistory& getChatHistory();
     std::optional<int> getMaxTokens() const;
diff --git a/src/llm/apis/openai_completions.cpp b/src/llm/apis/openai_completions.cpp
index b69b682611..be98b8317e 100644
--- a/src/llm/apis/openai_completions.cpp
+++ b/src/llm/apis/openai_completions.cpp
@@ -196,13 +196,14 @@ absl::Status OpenAIChatCompletionsHandler::parseMessages(std::optional<std::stri
                 continue;
             }
             if (memberName == "content" && member->value.IsArray()) {
-                // Adjust content field format when it is passed as an array of objects (typically with images)
+                // Content passed as an array of objects (OpenAI multipart format).
+                // We preserve the array structure so chat templates can decide how to render it.
+                // image_url items are translated to {type:image} so that VLM chat templates
+                // (which use the OpenVINO GenAI MULTIPART_CONTENT convention) see them correctly.
+                // The corresponding decoded tensors are appended in document order to imageHistory.
                 if (member->value.GetArray().Size() == 0) {
                     return absl::InvalidArgumentError("Invalid message structure - content array is empty");
                 }
-                jsonChanged = true;
-                Value contentText(rapidjson::kStringType);
-                contentText.SetString("", doc.GetAllocator());
                 for (auto& v : member->value.GetArray()) {
                     if (!v.IsObject()) {
                         return absl::InvalidArgumentError("Invalid message structure - content array should contain objects");
@@ -211,14 +212,12 @@ absl::Status OpenAIChatCompletionsHandler::parseMessages(std::optional<std::stri
                     if (!entry.HasMember("type") || !entry["type"].IsString()) {
                         return absl::InvalidArgumentError("Invalid message structure - content object type missing");
                     }
-                    auto entryType = entry["type"].GetString();
-                    if (entryType == std::string("text")) {
+                    std::string entryType = entry["type"].GetString();
+                    if (entryType == "text") {
                         if (!entry.HasMember("text") || !entry["text"].IsString()) {
                             return absl::InvalidArgumentError("Invalid message structure - content text missing");
                         }
-                        contentText = entry["text"];
-                        continue;
-                    } else if (entryType == std::string("image_url")) {
+                    } else if (entryType == "image_url") {
                         if (!entry.HasMember("image_url") || !entry["image_url"].IsObject()) {
                             return absl::InvalidArgumentError("Invalid message structure - content image_url missing");
                         }
@@ -231,18 +230,27 @@ absl::Status OpenAIChatCompletionsHandler::parseMessages(std::optional<std::stri
                         if (!tensorResult.ok()) {
                             return tensorResult.status();
                         }
-                        request.imageHistory.push_back({i, tensorResult.value()});
+                        // Store tensor in flat image list (document order = template rendering order)
+                        request.imageHistory.push_back(std::move(tensorResult.value()));
+                        // Translate image_url item to {type:image} so VLM chat templates
+                        // (which use GenAI MULTIPART_CONTENT convention) see the image in context.
+                        while (v.MemberBegin() != v.MemberEnd()) {
+                            v.RemoveMember(v.MemberBegin());
+                        }
+                        v.AddMember(rapidjson::Value("type", doc.GetAllocator()),
+                                    rapidjson::Value("image", doc.GetAllocator()),
+                                    doc.GetAllocator());
+                        jsonChanged = true;
                     } else {
                         return absl::InvalidArgumentError("Unsupported content type");
                     }
                 }
-                // Pulling out text from nested structure to the "content" field for text and replace whole "content" value for image data
-                // with empty string, since images are stored separately in request.images
-                member->value = contentText;
-                // Add new field to the last message in history if content is text
-                if (member->value.IsString()) {
-                    request.chatHistory.last()[member->name.GetString()] = member->value.GetString();
-                }
+                // Preserve the array (with any image_url translated to {type:image}) in chatHistory.
+                // For the Python Jinja path, processedJson is only written when jsonChanged is true
+                // (i.e. when image_url items were translated or tool_call arguments were injected).
+                // Otherwise the template falls back to payload.body and sees the original OpenAI
+                // format, which is equally correct — template decides how to render content arrays.
+                request.chatHistory.last()[memberName] = rapidJsonValueToJsonContainer(member->value);
             }
         }
         auto lastMessage = request.chatHistory.last();
diff --git a/src/llm/apis/openai_request.hpp b/src/llm/apis/openai_request.hpp
index e1d05282eb..419322f67b 100644
--- a/src/llm/apis/openai_request.hpp
+++ b/src/llm/apis/openai_request.hpp
@@ -32,7 +32,11 @@
 #include "tool_schema_wrapper.hpp"
 
 namespace ovms {
-using ImageHistory = std::vector<std::pair<size_t, ov::Tensor>>;
+// Flat ordered list of image tensors extracted from content arrays.
+// Order matches the sequence of image_url items across all messages,
+// which corresponds to the order of {"type":"image"} items in chatHistory
+// after image_url → image translation.
+using ImageHistory = std::vector<ov::Tensor>;
 
 struct StreamOptions {
     bool includeUsage = false;
diff --git a/src/llm/apis/openai_responses.cpp b/src/llm/apis/openai_responses.cpp
index e5d63985e6..b355363aad 100644
--- a/src/llm/apis/openai_responses.cpp
+++ b/src/llm/apis/openai_responses.cpp
@@ -120,7 +120,13 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional<std::string> allow
                 return absl::InvalidArgumentError("Invalid message structure - content array is empty");
             }
 
-            std::string contentText = "";
+            // Translate Responses API content array into the canonical multipart format
+            // used by chatHistory and VLM chat templates:
+            //   input_text  → {"type":"text",  "text": "<value>"}
+            //   input_image → {"type":"image"} (tensor appended to imageHistory in order)
+            // This mirrors the Chat Completions image_url → image translation so that
+            // both VLM chat templates (GenAI MULTIPART_CONTENT) and Python Jinja2 templates
+            // receive a uniform representation.
             for (auto& contentItem : contentIt->value.GetArray()) {
                 if (!contentItem.IsObject()) {
                     return absl::InvalidArgumentError("input content items must be objects");
@@ -137,7 +143,17 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional<std::string> allow
                     if (textIt == contentObj.MemberEnd() || !textIt->value.IsString()) {
                         return absl::InvalidArgumentError("input_text requires a valid text field");
                     }
-                    contentText = textIt->value.GetString();
+                    // Normalise to {"type":"text","text":"..."} in-place.
+                    std::string textValue = textIt->value.GetString();
+                    while (contentItem.MemberBegin() != contentItem.MemberEnd()) {
+                        contentItem.RemoveMember(contentItem.MemberBegin());
+                    }
+                    contentItem.AddMember(rapidjson::Value("type", doc.GetAllocator()),
+                                         rapidjson::Value("text", doc.GetAllocator()),
+                                         doc.GetAllocator());
+                    contentItem.AddMember(rapidjson::Value("text", doc.GetAllocator()),
+                                         rapidjson::Value(textValue.c_str(), doc.GetAllocator()),
+                                         doc.GetAllocator());
                 } else if (type == "input_image") {
                     std::string imageUrl;
                     auto imageUrlIt = contentObj.FindMember("image_url");
@@ -161,13 +177,21 @@ absl::Status OpenAIResponsesHandler::parseInput(std::optional<std::string> allow
                     if (!tensorResult.ok()) {
                         return tensorResult.status();
                     }
-                    request.imageHistory.push_back({i, tensorResult.value()});
+                    request.imageHistory.push_back(std::move(tensorResult.value()));
+                    // Translate to {"type":"image"} in-place so VLM chat templates see
+                    // the image at the correct position in the content array.
+                    while (contentItem.MemberBegin() != contentItem.MemberEnd()) {
+                        contentItem.RemoveMember(contentItem.MemberBegin());
+                    }
+                    contentItem.AddMember(rapidjson::Value("type", doc.GetAllocator()),
+                                         rapidjson::Value("image", doc.GetAllocator()),
+                                         doc.GetAllocator());
                 } else {
                     return absl::InvalidArgumentError("Unsupported content type. Supported types are input_text and input_image.");
                 }
             }
 
-            request.chatHistory.last()["content"] = contentText;
+            request.chatHistory.last()["content"] = rapidJsonValueToJsonContainer(contentIt->value);
         }
     } else {
         return absl::InvalidArgumentError("input is not a string or array");
diff --git a/src/llm/visual_language_model/continuous_batching/servable.cpp b/src/llm/visual_language_model/continuous_batching/servable.cpp
index 0ef06d22df..3c094aab19 100644
--- a/src/llm/visual_language_model/continuous_batching/servable.cpp
+++ b/src/llm/visual_language_model/continuous_batching/servable.cpp
@@ -19,7 +19,6 @@
 #include <memory>
 #include <stdexcept>
 #include <string>
-#include <unordered_map>
 #include <vector>
 
 #include "../../../logging.hpp"
@@ -74,26 +73,36 @@ absl::Status VisualLanguageModelServable::prepareInputs(std::shared_ptr<GenAiSer
 
         for (size_t i = 0; i < chatHistory.size(); i++) {
             const auto& message = chatHistory[i];
-            if (message["content"].as_string().value_or("").find("<ov_genai_image_") != std::string::npos) {
+            const auto& contentField = message["content"];
+            if (contentField.is_array()) {
+                for (size_t j = 0; j < contentField.size(); j++) {
+                    const auto& item = contentField[j];
+                    if (item["type"].as_string().value_or("") == "text" &&
+                        item["text"].as_string().value_or("").find("<ov_genai_image_") != std::string::npos) {
+                        return absl::InvalidArgumentError("Message contains restricted <ov_genai_image> tag");
+                    }
+                }
+            } else if (contentField.as_string().value_or("").find("<ov_genai_image_") != std::string::npos) {
                 return absl::InvalidArgumentError("Message contains restricted <ov_genai_image> tag");
             }
         }
 
-        const ImageHistory& imageHistory = vlmExecutionContext->apiHandler->getImageHistory();
-        size_t imageIndex = 0;
-        std::unordered_map<size_t, std::string> imageTags;
-        for (const auto& image : imageHistory) {
-            const auto& [chatTurnIndex, imageTensor] = image;
-            std::string imageTag = "<ov_genai_image_" + std::to_string(imageIndex++) + ">\n";
-            imageTags[chatTurnIndex] = imageTags[chatTurnIndex] + imageTag;
-            vlmExecutionContext->inputImages.push_back(imageTensor);
-        }
+        // imageHistory is a flat ordered list of tensors matching the {type:image} items in
+        // chatHistory. Pass them directly to add_request; the chat template applied below will
+        // emit the model-specific image tokens at the correct positions.
+        vlmExecutionContext->inputImages = vlmExecutionContext->apiHandler->getImageHistory();
 
-        for (const auto& [chatTurnIndex, imageTagString] : imageTags) {
-            std::string messageContent = chatHistory[chatTurnIndex]["content"].as_string().value_or("");
-            chatHistory[chatTurnIndex]["content"] = imageTagString + messageContent;
+#if (PYTHON_DISABLE == 0)
+        bool success;
+        if (vlmExecutionContext->apiHandler->getProcessedJson().size() > 0) {
+            success = PyJinjaTemplateProcessor::applyChatTemplate(getProperties()->templateProcessor, getProperties()->modelsPath, vlmExecutionContext->apiHandler->getProcessedJson(), vlmExecutionContext->inputText);
+        } else {
+            success = PyJinjaTemplateProcessor::applyChatTemplate(getProperties()->templateProcessor, getProperties()->modelsPath, vlmExecutionContext->payload.body, vlmExecutionContext->inputText);
         }
-
+        if (!success) {
+            return absl::Status(absl::StatusCode::kInvalidArgument, vlmExecutionContext->inputText);
+        }
+#else
         constexpr bool addGenerationPrompt = true;  // confirm it should be hardcoded
         auto toolsStatus = vlmExecutionContext->apiHandler->parseToolsToJsonContainer();
         if (!toolsStatus.ok()) {
@@ -106,6 +115,7 @@ absl::Status VisualLanguageModelServable::prepareInputs(std::shared_ptr<GenAiSer
         }
         const auto& chatTemplateKwargs = chatTemplateKwargsStatus.value();
         vlmExecutionContext->inputText = properties->tokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs);
+#endif
     } else {
         return absl::InvalidArgumentError("Unsupported endpoint");
     }
diff --git a/src/llm/visual_language_model/legacy/servable.cpp b/src/llm/visual_language_model/legacy/servable.cpp
index bc3ecaf71f..4f36c115c4 100644
--- a/src/llm/visual_language_model/legacy/servable.cpp
+++ b/src/llm/visual_language_model/legacy/servable.cpp
@@ -17,7 +17,6 @@
 #include <memory>
 #include <stdexcept>
 #include <string>
-#include <unordered_map>
 #include <vector>
 
 #include "../../../logging.hpp"
@@ -256,25 +255,36 @@ absl::Status VisualLanguageModelLegacyServable::prepareInputs(std::shared_ptr<Ge
 
         for (size_t i = 0; i < chatHistory.size(); i++) {
             const auto& message = chatHistory[i];
-            if (message["content"].as_string().value_or("").find("<ov_genai_image_") != std::string::npos) {
+            const auto& contentField = message["content"];
+            if (contentField.is_array()) {
+                for (size_t j = 0; j < contentField.size(); j++) {
+                    const auto& item = contentField[j];
+                    if (item["type"].as_string().value_or("") == "text" &&
+                        item["text"].as_string().value_or("").find("<ov_genai_image_") != std::string::npos) {
+                        return absl::InvalidArgumentError("Message contains restricted <ov_genai_image> tag");
+                    }
+                }
+            } else if (contentField.as_string().value_or("").find("<ov_genai_image_") != std::string::npos) {
                 return absl::InvalidArgumentError("Message contains restricted <ov_genai_image> tag");
             }
         }
 
-        const ImageHistory& imageHistory = vlmExecutionContext->apiHandler->getImageHistory();
-        size_t imageIndex = 0;
-        std::unordered_map<size_t, std::string> imageTags;
-        for (const auto& image : imageHistory) {
-            const auto& [chatTurnIndex, imageTensor] = image;
-            std::string imageTag = "<ov_genai_image_" + std::to_string(imageIndex++) + ">\n";
-            imageTags[chatTurnIndex] = imageTags[chatTurnIndex] + imageTag;
-            vlmExecutionContext->inputImages.push_back(imageTensor);
+        // imageHistory is a flat ordered list of tensors matching the {type:image} items in
+        // chatHistory. Pass them directly; the chat template applied below will emit the
+        // model-specific image tokens at the correct positions.
+        vlmExecutionContext->inputImages = vlmExecutionContext->apiHandler->getImageHistory();
+
+#if (PYTHON_DISABLE == 0)
+        bool success;
+        if (vlmExecutionContext->apiHandler->getProcessedJson().size() > 0) {
+            success = PyJinjaTemplateProcessor::applyChatTemplate(getProperties()->templateProcessor, getProperties()->modelsPath, vlmExecutionContext->apiHandler->getProcessedJson(), vlmExecutionContext->inputText);
+        } else {
+            success = PyJinjaTemplateProcessor::applyChatTemplate(getProperties()->templateProcessor, getProperties()->modelsPath, vlmExecutionContext->payload.body, vlmExecutionContext->inputText);
         }
-        for (const auto& [chatTurnIndex, imageTagString] : imageTags) {
-            std::string messageContent = chatHistory[chatTurnIndex]["content"].as_string().value_or("");
-            chatHistory[chatTurnIndex]["content"] = imageTagString + messageContent;
+        if (!success) {
+            return absl::Status(absl::StatusCode::kInvalidArgument, vlmExecutionContext->inputText);
         }
-
+#else
         constexpr bool addGenerationPrompt = true;  // confirm it should be hardcoded
         auto toolsStatus = vlmExecutionContext->apiHandler->parseToolsToJsonContainer();
         if (!toolsStatus.ok()) {
@@ -287,6 +297,7 @@ absl::Status VisualLanguageModelLegacyServable::prepareInputs(std::shared_ptr<Ge
         }
         const auto& chatTemplateKwargs = chatTemplateKwargsStatus.value();
         vlmExecutionContext->inputText = properties->tokenizer.apply_chat_template(chatHistory, addGenerationPrompt, {}, tools, chatTemplateKwargs);
+#endif
     } else {
         return absl::InvalidArgumentError("Unsupported endpoint");
     }
diff --git a/src/test/http_openai_handler_test.cpp b/src/test/http_openai_handler_test.cpp
index a4e6585af0..0ac5e27c3c 100644
--- a/src/test/http_openai_handler_test.cpp
+++ b/src/test/http_openai_handler_test.cpp
@@ -1586,8 +1586,7 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesSucceedsBase64) {
     ASSERT_EQ(apiHandler->parseMessages(), absl::OkStatus());
     const ovms::ImageHistory& imageHistory = apiHandler->getImageHistory();
     ASSERT_EQ(imageHistory.size(), 1);
-    auto [index, image] = imageHistory[0];
-    EXPECT_EQ(index, 0);
+    const auto& image = imageHistory[0];
     EXPECT_EQ(image.get_element_type(), ov::element::u8);
     EXPECT_EQ(image.get_size(), 3);
     std::vector<uint8_t> expectedBytes = {110, 181, 160};
@@ -1595,7 +1594,7 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesSucceedsBase64) {
         EXPECT_EQ(expectedBytes[i], ((uint8_t*)image.data())[i]);
     }
     json = apiHandler->getProcessedJson();
-    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":\"What is in this image?\"}]}"));
+    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is in this image?\"},{\"type\":\"image\"}]}]}"));
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesSucceedsUrlHttp) {
@@ -1627,12 +1626,11 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesSucceedsUrlHttp) {
     ASSERT_EQ(apiHandler->parseMessages(std::nullopt, allowedDomains), absl::OkStatus());
     const ovms::ImageHistory& imageHistory = apiHandler->getImageHistory();
     ASSERT_EQ(imageHistory.size(), 1);
-    auto [index, image] = imageHistory[0];
-    EXPECT_EQ(index, 0);
+    const auto& image = imageHistory[0];
     EXPECT_EQ(image.get_element_type(), ov::element::u8);
     EXPECT_EQ(image.get_size(), 225792);
     json = apiHandler->getProcessedJson();
-    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":\"What is in this image?\"}]}"));
+    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is in this image?\"},{\"type\":\"image\"}]}]}"));
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesSucceedsUrlHttpMultipleAllowedDomains) {
@@ -1664,12 +1662,11 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesSucceedsUrlHttpMultipleAllow
     ASSERT_EQ(apiHandler->parseMessages(std::nullopt, allowedDomains), absl::OkStatus());
     const ovms::ImageHistory& imageHistory = apiHandler->getImageHistory();
     ASSERT_EQ(imageHistory.size(), 1);
-    auto [index, image] = imageHistory[0];
-    EXPECT_EQ(index, 0);
+    const auto& image = imageHistory[0];
     EXPECT_EQ(image.get_element_type(), ov::element::u8);
     EXPECT_EQ(image.get_size(), 225792);
     json = apiHandler->getProcessedJson();
-    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":\"What is in this image?\"}]}"));
+    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is in this image?\"},{\"type\":\"image\"}]}]}"));
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesSucceedsUrlHttps) {
@@ -1701,12 +1698,11 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesSucceedsUrlHttps) {
     ASSERT_EQ(apiHandler->parseMessages(std::nullopt, allowedDomains), absl::OkStatus());
     const ovms::ImageHistory& imageHistory = apiHandler->getImageHistory();
     ASSERT_EQ(imageHistory.size(), 1);
-    auto [index, image] = imageHistory[0];
-    EXPECT_EQ(index, 0);
+    const auto& image = imageHistory[0];
     EXPECT_EQ(image.get_element_type(), ov::element::u8);
     EXPECT_EQ(image.get_size(), 225792);
     json = apiHandler->getProcessedJson();
-    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":\"What is in this image?\"}]}"));
+    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is in this image?\"},{\"type\":\"image\"}]}]}"));
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesSucceedsUrlHttpsAllowedDomainAll) {
@@ -1738,12 +1734,11 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesSucceedsUrlHttpsAllowedDomai
     ASSERT_EQ(apiHandler->parseMessages(std::nullopt, allowedDomains), absl::OkStatus());
     const ovms::ImageHistory& imageHistory = apiHandler->getImageHistory();
     ASSERT_EQ(imageHistory.size(), 1);
-    auto [index, image] = imageHistory[0];
-    EXPECT_EQ(index, 0);
+    const auto& image = imageHistory[0];
     EXPECT_EQ(image.get_element_type(), ov::element::u8);
     EXPECT_EQ(image.get_size(), 225792);
     json = apiHandler->getProcessedJson();
-    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":\"What is in this image?\"}]}"));
+    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is in this image?\"},{\"type\":\"image\"}]}]}"));
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ParsingImageJpegWithNoTextSucceeds) {
@@ -1769,8 +1764,7 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingImageJpegWithNoTextSucceeds) {
     ASSERT_EQ(apiHandler->parseMessages(), absl::OkStatus());
     const ovms::ImageHistory& imageHistory = apiHandler->getImageHistory();
     ASSERT_EQ(imageHistory.size(), 1);
-    auto [index, image] = imageHistory[0];
-    EXPECT_EQ(index, 0);
+    const auto& image = imageHistory[0];
     EXPECT_EQ(image.get_element_type(), ov::element::u8);
     EXPECT_EQ(image.get_size(), 3);
     std::vector<uint8_t> expectedBytes = {54, 245, 241};
@@ -1778,7 +1772,7 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingImageJpegWithNoTextSucceeds) {
         EXPECT_EQ(expectedBytes[i], ((uint8_t*)image.data())[i]);
     }
     json = apiHandler->getProcessedJson();
-    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":\"\"}]}"));
+    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"image\"}]}]}"));
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesImageStringWithNoPrefixFails) {
@@ -1948,12 +1942,11 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesImageLocalFilesystem) {
     ASSERT_EQ(apiHandler->parseMessages(getGenericFullPathForSrcTest("/ovms/src/test")), absl::OkStatus());
     const ovms::ImageHistory& imageHistory = apiHandler->getImageHistory();
     ASSERT_EQ(imageHistory.size(), 1);
-    auto [index, image] = imageHistory[0];
-    EXPECT_EQ(index, 0);
+    const auto& image = imageHistory[0];
     EXPECT_EQ(image.get_element_type(), ov::element::u8);
     EXPECT_EQ(image.get_size(), 3);
     json = apiHandler->getProcessedJson();
-    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":\"What is in this image?\"}]}"));
+    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is in this image?\"},{\"type\":\"image\"}]}]}"));
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesImageLocalFilesystemWithinAllowedPath) {
@@ -1984,12 +1977,11 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesImageLocalFilesystemWithinAl
     ASSERT_EQ(apiHandler->parseMessages(getGenericFullPathForSrcTest("/ovms/src/test/binaryutils")), absl::OkStatus());
     const ovms::ImageHistory& imageHistory = apiHandler->getImageHistory();
     ASSERT_EQ(imageHistory.size(), 1);
-    auto [index, image] = imageHistory[0];
-    EXPECT_EQ(index, 0);
+    const auto& image = imageHistory[0];
     EXPECT_EQ(image.get_element_type(), ov::element::u8);
     EXPECT_EQ(image.get_size(), 3);
     json = apiHandler->getProcessedJson();
-    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":\"What is in this image?\"}]}"));
+    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is in this image?\"},{\"type\":\"image\"}]}]}"));
 }
 
 TEST_F(HttpOpenAIHandlerParsingTest, ParsingMessagesImageLocalFilesystemNotWithinAllowedPath) {
@@ -2140,10 +2132,7 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingMultipleMessagesSucceeds) {
     const ovms::ImageHistory& imageHistory = apiHandler->getImageHistory();
     ASSERT_EQ(imageHistory.size(), 2);
     std::vector<uint8_t> expectedBytes = {110, 181, 160};
-    std::vector<size_t> expectedImageIndexes = {0, 2};
-    size_t i = 0;
-    for (auto [index, image] : imageHistory) {
-        EXPECT_EQ(index, expectedImageIndexes[i++]);
+    for (const auto& image : imageHistory) {
         EXPECT_EQ(image.get_element_type(), ov::element::u8);
         EXPECT_EQ(image.get_size(), 3);
         for (size_t i = 0; i < image.get_size(); i++) {
@@ -2151,10 +2140,10 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParsingMultipleMessagesSucceeds) {
         }
     }
     json = apiHandler->getProcessedJson();
-    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":\"What is in this image?\"},"
-                                "{\"role\":\"assistant\",\"content\":\"No idea my friend.\"},"
-                                "{\"role\":\"user\",\"content\":\"What about this one?\"},"
-                                "{\"role\":\"assistant\",\"content\":\"Same thing. I'm not very good with images.\"},"
+    EXPECT_EQ(json, std::string("{\"model\":\"llama\",\"messages\":[{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What is in this image?\"},{\"type\":\"image\"}]},"
+                                "{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"No idea my friend.\"}]},"
+                                "{\"role\":\"user\",\"content\":[{\"type\":\"text\",\"text\":\"What about this one?\"},{\"type\":\"image\"}]},"
+                                "{\"role\":\"assistant\",\"content\":[{\"type\":\"text\",\"text\":\"Same thing. I'm not very good with images.\"}]},"
                                 "{\"role\":\"user\",\"content\":\"You were not trained with images, were you?\"}]}"));
 }
 
@@ -3610,3 +3599,76 @@ TEST_F(HttpOpenAIHandlerParsingTest, ParseMessagesRegularMessageHasNoToolFields)
     EXPECT_FALSE(history[1].contains("tool_call_id"));
     EXPECT_FALSE(history[1].contains("name"));
 }
+
+TEST_F(HttpOpenAIHandlerParsingTest, ParseMessagesToolContentArrayPreservedInChatHistory) {
+    // Tool responses may arrive with content as an array of {type, text} objects (OpenAI multipart
+    // format). The array is preserved end-to-end so the chat template can decide how to render it.
+    std::string json = R"({
+    "model": "llama",
+    "messages": [
+      {"role": "user", "content": "list jobs"},
+      {"role": "assistant", "content": null, "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "list_jobs", "arguments": "{\"folder_path\":\"ovmsc\"}"}}]},
+      {"role": "tool", "tool_call_id": "call_1", "content": [
+        {"type": "text", "text": "ubuntu"},
+        {"type": "text", "text": "redhat"},
+        {"type": "text", "text": "windows"}
+      ]}
+    ]
+  })";
+    doc.Parse(json.c_str());
+    ASSERT_FALSE(doc.HasParseError());
+    auto apiHandler = std::make_shared<ovms::OpenAIChatCompletionsHandler>(doc, ovms::Endpoint::CHAT_COMPLETIONS, std::chrono::system_clock::now(), *tokenizer);
+    ASSERT_EQ(apiHandler->parseMessages(), absl::OkStatus());
+
+    ov::genai::ChatHistory& history = apiHandler->getChatHistory();
+    ASSERT_EQ(history.size(), 3);
+
+    // Tool message content must be an array with all three items preserved
+    auto toolMsg = history[2];
+    ASSERT_TRUE(toolMsg.contains("content"));
+    ASSERT_TRUE(toolMsg["content"].is_array());
+    ASSERT_EQ(toolMsg["content"].size(), 3);
+    EXPECT_EQ(toolMsg["content"][0]["type"].get_string(), "text");
+    EXPECT_EQ(toolMsg["content"][0]["text"].get_string(), "ubuntu");
+    EXPECT_EQ(toolMsg["content"][1]["text"].get_string(), "redhat");
+    EXPECT_EQ(toolMsg["content"][2]["text"].get_string(), "windows");
+}
+
+TEST_F(HttpOpenAIHandlerParsingTest, ParseMessagesContentArrayPreservedInProcessedJson) {
+    // Content arrays are preserved as-is in processedJson when another mutation triggers
+    // jsonChanged (here: ensureArgumentsInToolCalls adds "arguments" to the tool call).
+    // The Python Jinja template receives the full structured content and decides how to render it.
+    std::string json = R"({
+    "model": "llama",
+    "messages": [
+      {"role": "user", "content": "list jobs"},
+      {"role": "assistant", "content": null, "tool_calls": [{"id": "call_1", "type": "function", "function": {"name": "list_jobs"}}]},
+      {"role": "tool", "tool_call_id": "call_1", "content": [
+        {"type": "text", "text": "ubuntu"},
+        {"type": "text", "text": "redhat"},
+        {"type": "text", "text": "windows"}
+      ]}
+    ]
+  })";
+    doc.Parse(json.c_str());
+    ASSERT_FALSE(doc.HasParseError());
+    auto apiHandler = std::make_shared<ovms::OpenAIChatCompletionsHandler>(doc, ovms::Endpoint::CHAT_COMPLETIONS, std::chrono::system_clock::now(), *tokenizer);
+    ASSERT_EQ(apiHandler->parseMessages(), absl::OkStatus());
+
+    // ensureArgumentsInToolCalls adds "arguments": "{}" → jsonChanged → processedJson is written
+    const std::string& processed = apiHandler->getProcessedJson();
+    ASSERT_FALSE(processed.empty());
+
+    // The tool message content must remain an array in processedJson
+    rapidjson::Document processedDoc;
+    processedDoc.Parse(processed.c_str());
+    ASSERT_FALSE(processedDoc.HasParseError());
+    const auto& messages = processedDoc["messages"].GetArray();
+    ASSERT_EQ(messages.Size(), 3);
+    const auto& toolContent = messages[2]["content"];
+    ASSERT_TRUE(toolContent.IsArray());
+    ASSERT_EQ(toolContent.GetArray().Size(), 3);
+    EXPECT_STREQ(toolContent.GetArray()[0]["text"].GetString(), "ubuntu");
+    EXPECT_STREQ(toolContent.GetArray()[1]["text"].GetString(), "redhat");
+    EXPECT_STREQ(toolContent.GetArray()[2]["text"].GetString(), "windows");
+}