diff --git a/ports/ggml/cuda-13-2.diff b/ports/ggml/cuda-13-2.diff
deleted file mode 100644
index 4aa67d8889e2a6..00000000000000
--- a/ports/ggml/cuda-13-2.diff
+++ /dev/null
@@ -1,13 +0,0 @@
-diff --git a/src/ggml-cuda/CMakeLists.txt b/src/ggml-cuda/CMakeLists.txt
-index 67af1d8..113d0f6 100644
---- a/src/ggml-cuda/CMakeLists.txt
-+++ b/src/ggml-cuda/CMakeLists.txt
-@@ -177,6 +177,8 @@ if (CUDAToolkit_FOUND)
- 
-     if (NOT MSVC)
-         list(APPEND CUDA_CXX_FLAGS -Wno-pedantic)
-+    else()
-+        list(APPEND CUDA_CXX_FLAGS /Zc:preprocessor)
-     endif()
- 
-     list(JOIN   CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED)  # pass host compiler flags as a single argument
diff --git a/ports/ggml/fix-vk-32bit.diff b/ports/ggml/fix-vk-32bit.diff
new file mode 100644
index 00000000000000..8e24a264df75f3
--- /dev/null
+++ b/ports/ggml/fix-vk-32bit.diff
@@ -0,0 +1,71 @@
+diff --git a/src/ggml-vulkan/ggml-vulkan.cpp b/src/ggml-vulkan/ggml-vulkan.cpp
+index 423e01d..3cd5488 100644
+--- a/src/ggml-vulkan/ggml-vulkan.cpp
++++ b/src/ggml-vulkan/ggml-vulkan.cpp
+@@ -2036,7 +2036,7 @@ void vk_memory_logger::log_allocation(vk_buffer_ref buf_ref, size_t size) {
+     allocations[buf->buffer] = size;
+     total_device += device ? size : 0;
+     total_host += device ? 0 : size;
+-    VK_LOG_MEMORY(buf->device->name << ": +" << format_size(size) << " " << type << " at " << buf->buffer << ". Total device: " << format_size(total_device) << ", total host: " << format_size(total_host));
++    VK_LOG_MEMORY(buf->device->name << ": +" << format_size(size) << " " << type << " at " << static_cast<VkBuffer>(buf->buffer) << ". Total device: " << format_size(total_device) << ", total host: " << format_size(total_host));
+ }
+ 
+ void vk_memory_logger::log_deallocation(vk_buffer_ref buf_ref) {
+@@ -2052,10 +2052,10 @@ void vk_memory_logger::log_deallocation(vk_buffer_ref buf_ref) {
+     total_device -= device ? it->second : 0;
+     total_host -= device ? 0 : it->second;
+     if (it != allocations.end()) {
+-        VK_LOG_MEMORY(buf->device->name << ": -" << format_size(it->second) << " " << type << " at " << buf->buffer << ". Total device: " << format_size(total_device) << ", total host: " << format_size(total_host));
++        VK_LOG_MEMORY(buf->device->name << ": -" << format_size(it->second) << " " << type << " at " << static_cast<VkBuffer>(buf->buffer) << ". Total device: " << format_size(total_device) << ", total host: " << format_size(total_host));
+         allocations.erase(it);
+     } else {
+-        VK_LOG_MEMORY("ERROR " << buf->device->name << ": Attempted to deallocate unknown " << type << " memory at " << buf->buffer);
++        VK_LOG_MEMORY("ERROR " << buf->device->name << ": Attempted to deallocate unknown " << type << " memory at " << static_cast<VkBuffer>(buf->buffer));
+     }
+ }
+ 
+@@ -6951,7 +6951,7 @@ static bool ggml_vk_buffer_write_2d_async(vk_context subctx, vk_buffer& dst, siz
+     }
+ 
+     ggml_vk_sync_buffers(nullptr, subctx);
+-    subctx->s->buffer->buf.copyBuffer((VkBuffer)staging_buffer->buffer, (VkBuffer)dst->buffer, slices);
++    subctx->s->buffer->buf.copyBuffer(staging_buffer->buffer, dst->buffer, slices);
+ 
+     if (width == spitch) {
+         deferred_memcpy((uint8_t *)staging_buffer->ptr, src, staging_size, &subctx->in_memcpys);
+@@ -7086,7 +7086,7 @@ static bool ggml_vk_buffer_read_async(vk_context subctx, vk_buffer& src, size_t
+ }
+ 
+ static void ggml_vk_buffer_read_2d(vk_buffer& src, size_t offset, void * dst, size_t spitch, size_t dpitch, size_t width, size_t height) {
+-    VK_LOG_DEBUG("ggml_vk_buffer_read_2d(" << src->buffer << ", " << offset << ", " << width << ", " << height << ")");
++    VK_LOG_DEBUG("ggml_vk_buffer_read_2d(" << static_cast<VkBuffer>(src->buffer) << ", " << offset << ", " << width << ", " << height << ")");
+ 
+     // If the device is not an UMA device the memory is host-accessible through rebar. While writing
+     // through PCIe is sufficient fast reading back data from PCIe is slower than going through
+@@ -7118,7 +7118,7 @@ static void ggml_vk_buffer_read_2d(vk_buffer& src, size_t offset, void * dst, si
+ }
+ 
+ static void ggml_vk_buffer_read(vk_buffer& src, size_t offset, void * dst, size_t size) {
+-    VK_LOG_DEBUG("ggml_vk_buffer_read(" << src->buffer << ", " << offset << ", " << size << ")");
++    VK_LOG_DEBUG("ggml_vk_buffer_read(" << static_cast<VkBuffer>(src->buffer) << ", " << offset << ", " << size << ")");
+     ggml_vk_buffer_read_2d(src, offset, dst, size, size, size, 1);
+ }
+ 
+@@ -7286,7 +7286,7 @@ static void ggml_vk_matmul(
+         uint32_t batch_stride_a, uint32_t batch_stride_b, uint32_t batch_stride_d,
+         uint32_t split_k, uint32_t batch, uint32_t ne02, uint32_t ne12, uint32_t broadcast2, uint32_t broadcast3,
+         uint32_t padded_n) {
+-        VK_LOG_DEBUG("ggml_vk_matmul(a: (" << a.buffer->buffer << ", " << a.offset << ", " << a.size << "), b: (" << b.buffer->buffer << ", " << b.offset << ", " << b.size << "), d: (" << d.buffer->buffer << ", " << d.offset << ", " << d.size << "), split_k: (" << (split_k_buffer.buffer != nullptr ? split_k_buffer.buffer->buffer : VK_NULL_HANDLE) << ", " << split_k_buffer.offset << ", " << split_k_buffer.size << "), m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ", split_k: " << split_k << ", batch: " << batch << ", ne02: " << ne02 << ", ne12: " << ne12 << ", broadcast2: " << broadcast2 << ", broadcast3: " << broadcast3 << ", padded_n: " << padded_n << ")");
++        VK_LOG_DEBUG("ggml_vk_matmul(a: (" << static_cast<VkBuffer>(a.buffer->buffer) << ", " << a.offset << ", " << a.size << "), b: (" << static_cast<VkBuffer>(b.buffer->buffer) << ", " << b.offset << ", " << b.size << "), d: (" << static_cast<VkBuffer>(d.buffer->buffer) << ", " << d.offset << ", " << d.size << "), split_k: (" << (split_k_buffer.buffer != nullptr ? static_cast<VkBuffer>(split_k_buffer.buffer->buffer) : VkBuffer{}) << ", " << split_k_buffer.offset << ", " << split_k_buffer.size << "), m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ", split_k: " << split_k << ", batch: " << batch << ", ne02: " << ne02 << ", ne12: " << ne12 << ", broadcast2: " << broadcast2 << ", broadcast3: " << broadcast3 << ", padded_n: " << padded_n << ")");
+     if (split_k == 1) {
+         ggml_pipeline_request_descriptor_sets(ctx, pipeline, CEIL_DIV(batch, ctx->device->properties.limits.maxComputeWorkGroupCount[2]));
+ 
+@@ -7366,7 +7366,7 @@ static void ggml_vk_matmul_id(
+         uint32_t batch_stride_a, uint32_t batch_stride_b, uint32_t batch_stride_d,
+         uint32_t n_as, uint32_t nei0, uint32_t nei1, uint32_t nbi1, uint32_t ne11,
+         uint32_t padded_n) {
+-    VK_LOG_DEBUG("ggml_vk_matmul_id(a: (" << a.buffer->buffer << ", " << a.offset << ", " << a.size << "), b: (" << b.buffer->buffer << ", " << b.offset << ", " << b.size << "), d: (" << d.buffer->buffer << ", " << d.offset << ", " << d.size << "), ids: (" << ids.buffer->buffer << ", " << ids.offset << ", " << ids.size << "), expert_count: (" << expert_count_buf.buffer->buffer << ", " << expert_count_buf.offset << ", " << expert_count_buf.size << "), " <<
++    VK_LOG_DEBUG("ggml_vk_matmul_id(a: (" << static_cast<VkBuffer>(a.buffer->buffer) << ", " << a.offset << ", " << a.size << "), b: (" << static_cast<VkBuffer>(b.buffer->buffer) << ", " << b.offset << ", " << b.size << "), d: (" << static_cast<VkBuffer>(d.buffer->buffer) << ", " << d.offset << ", " << d.size << "), ids: (" << static_cast<VkBuffer>(ids.buffer->buffer) << ", " << ids.offset << ", " << ids.size << "), expert_count: (" << static_cast<VkBuffer>(expert_count_buf.buffer->buffer) << ", " << expert_count_buf.offset << ", " << expert_count_buf.size << "), " <<
+         "m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", " <<
+         "batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ", " <<
+         "n_as: " << n_as << ", nei0: " << nei0 << ", nei1: " << nei1 << ", nbi1: " << nbi1 << ", ne11: " << ne11 << ")");
diff --git a/ports/ggml/fix-vulkan-spv-shadowing.diff b/ports/ggml/fix-vulkan-spv-shadowing.diff
new file mode 100644
index 00000000000000..ee0f499c52743a
--- /dev/null
+++ b/ports/ggml/fix-vulkan-spv-shadowing.diff
@@ -0,0 +1,66 @@
+diff --git a/src/ggml-vulkan/ggml-vulkan.cpp b/src/ggml-vulkan/ggml-vulkan.cpp
+index 423e01dbff1..0a7931002ab 100644
+--- a/src/ggml-vulkan/ggml-vulkan.cpp
++++ b/src/ggml-vulkan/ggml-vulkan.cpp
+@@ -2149,11 +2149,11 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
+
+     // Patch SPIR-V to enable RTE rounding for FP16, avoiding the need for
+     // separate shader variants compiled with -DRTE16.
+-    std::vector<uint32_t> spv;
++    std::vector<uint32_t> spirv;
+     if (device->float_controls_rte_fp16) {
+         const uint32_t* spv_words = reinterpret_cast<const uint32_t *>(spv_data);
+         size_t word_count = spv_size / sizeof(uint32_t);
+-        spv.assign(spv_words, spv_words + word_count);
++        spirv.assign(spv_words, spv_words + word_count);
+
+         // Find insertion points respecting SPIR-V layout order:
+         //   Header(5) -> OpCapability -> OpExtension -> ... -> OpEntryPoint -> OpExecutionMode -> ...
+@@ -2163,9 +2163,9 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
+         size_t exec_insert_pos = pos;
+         uint32_t entry_point_id = 0;
+
+-        while (pos < spv.size()) {
+-            uint32_t opcode = spv[pos] & spv::OpCodeMask;
+-            uint32_t len    = spv[pos] >> spv::WordCountShift;
++        while (pos < spirv.size()) {
++            uint32_t opcode = spirv[pos] & spv::OpCodeMask;
++            uint32_t len    = spirv[pos] >> spv::WordCountShift;
+             if (len == 0) break;
+
+             if (opcode == spv::OpCapability) {
+@@ -2174,7 +2174,7 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
+             } else if (opcode == spv::OpExtension) {
+                 ext_insert_pos = pos + len;
+             } else if (opcode == spv::OpEntryPoint) {
+-                entry_point_id = spv[pos + 2];
++                entry_point_id = spirv[pos + 2];
+                 exec_insert_pos = pos + len;
+             } else if (opcode == spv::OpExecutionMode || opcode == spv::OpExecutionModeId) {
+                 exec_insert_pos = pos + len;
+@@ -2189,7 +2189,7 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
+
+         // OpExecutionMode %entrypoint RoundingModeRTE 16
+         uint32_t exec_mode[] = { (4u << spv::WordCountShift) | spv::OpExecutionMode, entry_point_id, spv::ExecutionModeRoundingModeRTE, 16 };
+-        spv.insert(spv.begin() + exec_insert_pos, std::begin(exec_mode), std::end(exec_mode));
++        spirv.insert(spirv.begin() + exec_insert_pos, std::begin(exec_mode), std::end(exec_mode));
+
+         // OpExtension "SPV_KHR_float_controls"
+         const char ext_str[] = "SPV_KHR_float_controls";
+@@ -2197,13 +2197,13 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin
+         std::vector<uint32_t> extension(1 + ext_str_words, 0);
+         extension[0] = (uint32_t)((1 + ext_str_words) << spv::WordCountShift) | spv::OpExtension;
+         memcpy(&extension[1], ext_str, sizeof(ext_str));
+-        spv.insert(spv.begin() + ext_insert_pos, extension.begin(), extension.end());
++        spirv.insert(spirv.begin() + ext_insert_pos, extension.begin(), extension.end());
+
+         // OpCapability RoundingModeRTE
+         uint32_t capability[] = { (2u << spv::WordCountShift) | spv::OpCapability, spv::CapabilityRoundingModeRTE };
+-        spv.insert(spv.begin() + cap_insert_pos, std::begin(capability), std::end(capability));
++        spirv.insert(spirv.begin() + cap_insert_pos, std::begin(capability), std::end(capability));
+
+-        shader_module_create_info = vk::ShaderModuleCreateInfo({}, spv.size() * sizeof(uint32_t), spv.data());
++        shader_module_create_info = vk::ShaderModuleCreateInfo({}, spirv.size() * sizeof(uint32_t), spirv.data());
+     }
+
+     pipeline->shader_module = device->device.createShaderModule(shader_module_create_info);
diff --git a/ports/ggml/pkgconfig.diff b/ports/ggml/pkgconfig.diff
index 211e3407e96b2d..30c406696d206a 100644
--- a/ports/ggml/pkgconfig.diff
+++ b/ports/ggml/pkgconfig.diff
@@ -1,8 +1,8 @@
 diff --git a/CMakeLists.txt b/CMakeLists.txt
-index 0211255..1b1732a 100644
+index c97f681..64a456b 100644
 --- a/CMakeLists.txt
 +++ b/CMakeLists.txt
-@@ -333,7 +333,7 @@ if (GGML_STANDALONE)
+@@ -352,7 +352,7 @@ if (GGML_STANDALONE)
          @ONLY)
  
      install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc
@@ -11,7 +11,7 @@ index 0211255..1b1732a 100644
  endif()
  
  #
-@@ -354,6 +354,7 @@ set(variable_set_statements
+@@ -373,6 +373,7 @@ set(variable_set_statements
  set(GGML_SHARED_LIB ${BUILD_SHARED_LIBS})
  
  get_cmake_property(all_variables VARIABLES)
@@ -34,10 +34,10 @@ index 3e0291e..a762733 100644
 +Libs.private: @GGML_PKGCONFIG_LIBS_PRIVATE@
 +Requires.private: @GGML_PKGCONFIG_REQUIRES_PRIVATE@
 diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
-index 628db3f..af611bb 100644
+index 3e48860..1ec27e8 100644
 --- a/src/CMakeLists.txt
 +++ b/src/CMakeLists.txt
-@@ -189,6 +189,10 @@ endif()
+@@ -185,6 +185,10 @@ endif()
  
  # ggml
  
@@ -48,7 +48,7 @@ index 628db3f..af611bb 100644
  if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS)
      message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS")
  endif()
-@@ -241,6 +245,7 @@ target_link_libraries(ggml PUBLIC ggml-base)
+@@ -243,6 +247,7 @@ target_link_libraries(ggml PUBLIC ggml-base)
  
  if (CMAKE_SYSTEM_NAME MATCHES "Linux")
      target_link_libraries(ggml PRIVATE dl)
@@ -56,7 +56,7 @@ index 628db3f..af611bb 100644
  endif()
  
  function(ggml_add_backend_library backend)
-@@ -291,12 +296,20 @@ function(ggml_add_backend backend)
+@@ -296,12 +301,20 @@ function(ggml_add_backend backend)
      string(TOUPPER "GGML_${backend}" backend_id)
      if (${backend_id})
          string(TOLOWER "ggml-${backend}" backend_target)
@@ -77,12 +77,11 @@ index 628db3f..af611bb 100644
      endif()
  endfunction()
  
-@@ -434,11 +447,15 @@ find_library(MATH_LIBRARY m)
- if (MATH_LIBRARY)
-     if (NOT WIN32 OR NOT DEFINED ENV{ONEAPI_ROOT})
-         target_link_libraries(ggml-base PRIVATE m)
-+        string(APPEND GGML_PKGCONFIG_LIBS_PRIVATE " -lm")
-     endif()
+@@ -474,10 +487,14 @@ if (DEFINED MATH_LIBRARY)
+     target_link_libraries(ggml-base PRIVATE ${MATH_LIBRARY})
+ elseif (NOT WIN32 AND NOT DEFINED ENV{ONEAPI_ROOT})
+     target_link_libraries(ggml-base PRIVATE m)
++    string(APPEND GGML_PKGCONFIG_LIBS_PRIVATE " -lm")
  endif()
  
  if (CMAKE_SYSTEM_NAME MATCHES "Android")
@@ -93,11 +92,12 @@ index 628db3f..af611bb 100644
  endif()
  
  if(CMAKE_SYSTEM_NAME MATCHES "visionOS")
-@@ -451,4 +468,10 @@ if (BUILD_SHARED_LIBS)
+@@ -489,5 +506,11 @@ if (BUILD_SHARED_LIBS)
+         set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON)
          target_compile_definitions(${target} PRIVATE GGML_BUILD)
          target_compile_definitions(${target} PUBLIC  GGML_SHARED)
++        string(APPEND GGML_PKGCONFIG_CFLAGS " -DGGML_SHARED -DGGML_BACKEND_SHARED")
      endforeach()
-+    string(APPEND GGML_PKGCONFIG_CFLAGS " -DGGML_SHARED -DGGML_BACKEND_SHARED")
  endif()
 +
 +set(GGML_PKGCONFIG_CFLAGS "${GGML_PKGCONFIG_CFLAGS}" PARENT_SCOPE)
@@ -105,22 +105,22 @@ index 628db3f..af611bb 100644
 +set(GGML_PKGCONFIG_LIBS_PRIVATE "${GGML_PKGCONFIG_LIBS_PRIVATE}" PARENT_SCOPE)
 +set(GGML_PKGCONFIG_REQUIRES_PRIVATE "${GGML_PKGCONFIG_REQUIRES_PRIVATE}" PARENT_SCOPE)
 diff --git a/src/ggml-blas/CMakeLists.txt b/src/ggml-blas/CMakeLists.txt
-index 60ce4b1..058f65c 100644
+index c27dc17..ce1ab21 100644
 --- a/src/ggml-blas/CMakeLists.txt
 +++ b/src/ggml-blas/CMakeLists.txt
-@@ -79,6 +79,7 @@ if (BLAS_FOUND)
+@@ -93,6 +93,7 @@ if (BLAS_FOUND)
      endif()
  
      target_link_libraries     (ggml-blas PRIVATE ${BLAS_LIBRARIES})
 +    set(GGML_PKGCONFIG_REQUIRES_PRIVATE "${GGML_PKGCONFIG_REQUIRES_PRIVATE} cblas" PARENT_SCOPE)
-     target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS})
+     target_include_directories(ggml-blas SYSTEM PRIVATE ${BLAS_INCLUDE_DIRS})
  else()
      message(FATAL_ERROR "BLAS not found, please refer to "
 diff --git a/src/ggml-cpu/CMakeLists.txt b/src/ggml-cpu/CMakeLists.txt
-index e52e050..107cbf5 100644
+index c1c225f..39ef457 100644
 --- a/src/ggml-cpu/CMakeLists.txt
 +++ b/src/ggml-cpu/CMakeLists.txt
-@@ -52,6 +52,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
+@@ -57,6 +57,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
      target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17)
      target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu)
  
@@ -130,7 +130,7 @@ index e52e050..107cbf5 100644
      if (APPLE AND GGML_ACCELERATE)
          find_library(ACCELERATE_FRAMEWORK Accelerate)
          if (ACCELERATE_FRAMEWORK)
-@@ -62,6 +65,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
+@@ -67,6 +70,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
              target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64)
  
              target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK})
@@ -138,7 +138,7 @@ index e52e050..107cbf5 100644
          else()
              message(WARNING "Accelerate framework not found")
          endif()
-@@ -74,6 +78,18 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
+@@ -79,6 +83,18 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
              target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP)
  
              target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX)
@@ -157,7 +157,7 @@ index e52e050..107cbf5 100644
          else()
              set(GGML_OPENMP_ENABLED "OFF" CACHE INTERNAL "")
              message(WARNING "OpenMP not found")
-@@ -96,8 +112,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
+@@ -101,8 +117,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name)
          target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM)
  
          target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind)
@@ -171,7 +171,7 @@ index e52e050..107cbf5 100644
          message(STATUS "ARM detected")
          list(APPEND GGML_CPU_SOURCES
 diff --git a/src/ggml-metal/CMakeLists.txt b/src/ggml-metal/CMakeLists.txt
-index 63418fe..138996a 100644
+index 42054d8..d780361 100644
 --- a/src/ggml-metal/CMakeLists.txt
 +++ b/src/ggml-metal/CMakeLists.txt
 @@ -19,6 +19,11 @@ target_link_libraries(ggml-metal PRIVATE
@@ -187,7 +187,7 @@ index 63418fe..138996a 100644
      add_compile_definitions(GGML_METAL_NDEBUG)
  endif()
 diff --git a/src/ggml-opencl/CMakeLists.txt b/src/ggml-opencl/CMakeLists.txt
-index 681c81b..bac7067 100644
+index 5ed83ee..bb06a50 100644
 --- a/src/ggml-opencl/CMakeLists.txt
 +++ b/src/ggml-opencl/CMakeLists.txt
 @@ -7,6 +7,7 @@ ggml_add_backend_library(${TARGET_NAME}
@@ -199,7 +199,7 @@ index 681c81b..bac7067 100644
  
  if (GGML_OPENCL_PROFILING)
 diff --git a/src/ggml-vulkan/CMakeLists.txt b/src/ggml-vulkan/CMakeLists.txt
-index de01336..3f9524c 100644
+index 715a263..04c142c 100644
 --- a/src/ggml-vulkan/CMakeLists.txt
 +++ b/src/ggml-vulkan/CMakeLists.txt
 @@ -87,6 +87,11 @@ if (Vulkan_FOUND)
diff --git a/ports/ggml/portfile.cmake b/ports/ggml/portfile.cmake
index 575435690e9c70..eae899de514def 100644
--- a/ports/ggml/portfile.cmake
+++ b/ports/ggml/portfile.cmake
@@ -1,8 +1,8 @@
 vcpkg_from_github(
     OUT_SOURCE_PATH SOURCE_PATH
     REPO ggml-org/ggml
-    REF 55bc9320a4aae82af18e23eefd5de319a755d7b9
-    SHA512 9433c9c258bbbfa817051f2ba2a8c8f166ee885c953d3ee27198890d4af8366fdee11ba55514b8b8414c836615e56eceaa98f33a01ecf51846338bc60d34263b
+    REF v${VERSION}
+    SHA512 c49b6498101f50a5ebde158f5707237f6df3af0182e819651003ec5f418144f9ca68cc8434189f2a658ea705269fad2d263cfe4931c12b369883aa5a66182ed9
     HEAD_REF master
     PATCHES
         cmake-config.diff
@@ -10,7 +10,8 @@ vcpkg_from_github(
         relax-link-options.diff
         vulkan-shaders-gen.diff
         fix-dequant_funcs.diff
-        cuda-13-2.diff
+        fix-vulkan-spv-shadowing.diff
+        fix-vk-32bit.diff
 )
 
 vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS
diff --git a/ports/ggml/vcpkg.json b/ports/ggml/vcpkg.json
index 3e66d9d73c6934..f84dea81e773d5 100644
--- a/ports/ggml/vcpkg.json
+++ b/ports/ggml/vcpkg.json
@@ -1,7 +1,6 @@
 {
   "name": "ggml",
-  "version-date": "2025-11-17",
-  "port-version": 2,
+  "version": "0.11.0",
   "description": "Tensor library for machine learning",
   "homepage": "https://github.com/ggml-org/ggml",
   "license": "MIT",
@@ -61,6 +60,7 @@
           "name": "shaderc",
           "host": true
         },
+        "spirv-headers",
         "vulkan"
       ]
     }
diff --git a/ports/llama-cpp/portfile.cmake b/ports/llama-cpp/portfile.cmake
index 42f9ffecc8ccd0..af0e0fa5864c2d 100644
--- a/ports/llama-cpp/portfile.cmake
+++ b/ports/llama-cpp/portfile.cmake
@@ -2,17 +2,24 @@ vcpkg_from_github(
     OUT_SOURCE_PATH SOURCE_PATH
     REPO ggml-org/llama.cpp
     REF b${VERSION}
-    SHA512 879490cdeeef9397b9843730399bd55c7ef57de7e421cfcbaa15bd95416e028e4a0864f237218895a8a1244b7a4dbef9eacda49aa9697628e29774d0fc90d04b
+    SHA512 ef5e21b61ca2961004fc57ad9d4a07191458df4f1749e71a9dc96d653676a6d68d43b7b8c74ebb235f6dffe5c064330cb1124887bc5c119876d7292543321945
     HEAD_REF master
     PATCHES
         cmake-config.diff
         pkgconfig.diff
+        unvendor.diff
 )
 file(REMOVE_RECURSE "${SOURCE_PATH}/ggml/include" "${SOURCE_PATH}/ggml/src")
+file(REMOVE_RECURSE
+    "${SOURCE_PATH}/vendor/cpp-httplib"
+    "${SOURCE_PATH}/vendor/miniaudio"
+    "${SOURCE_PATH}/vendor/nlohmann"
+    "${SOURCE_PATH}/vendor/stb")
 
 vcpkg_check_features(OUT_FEATURE_OPTIONS options
     FEATURES
         download    LLAMA_CURL
+        server      LLAMA_BUILD_SERVER
         tools       LLAMA_BUILD_TOOLS
 )
 
@@ -24,7 +31,6 @@ vcpkg_cmake_configure(
         -DLLAMA_ALL_WARNINGS=OFF
         -DLLAMA_BUILD_TESTS=OFF
         -DLLAMA_BUILD_EXAMPLES=OFF
-        -DLLAMA_BUILD_SERVER=OFF
         -DLLAMA_USE_SYSTEM_GGML=ON
         -DVCPKG_LOCK_FIND_PACKAGE_Git=OFF
 )
@@ -39,21 +45,32 @@ file(RENAME "${CURRENT_PACKAGES_DIR}/bin/convert_hf_to_gguf.py" "${CURRENT_PACKA
 file(REMOVE "${CURRENT_PACKAGES_DIR}/debug/bin/convert_hf_to_gguf.py")
 
 if("tools" IN_LIST FEATURES)
+    set(tool_names
+        llama-batched-bench
+        llama-bench
+        llama-completion
+        llama-cvector-generator
+        llama-export-lora
+        llama-fit-params
+        llama-gguf-split
+        llama-imatrix
+        llama-mtmd-cli
+        llama-perplexity
+        llama-quantize
+        llama-results
+        llama-template-analysis
+        llama-tokenize
+        llama-tts
+    )
+    # https://github.com/ggml-org/llama.cpp/blob/master/tools/parser/CMakeLists.txt#L1
+    if(NOT VCPKG_TARGET_IS_WINDOWS OR VCPKG_LIBRARY_LINKAGE STREQUAL "static")
+        list(APPEND tool_names llama-debug-template-parser)
+    endif()
+    if("server" IN_LIST FEATURES)
+        list(APPEND tool_names llama-cli llama-server)
+    endif()
     vcpkg_copy_tools(
-        TOOL_NAMES
-            llama-batched-bench
-            llama-bench
-            llama-cli
-            llama-cvector-generator
-            llama-export-lora
-            llama-gguf-split
-            llama-imatrix
-            llama-mtmd-cli
-            llama-perplexity
-            llama-quantize
-            llama-run
-            llama-tokenize
-            llama-tts
+        TOOL_NAMES ${tool_names}
         AUTO_CLEAN
     )
 endif()
diff --git a/ports/llama-cpp/unvendor.diff b/ports/llama-cpp/unvendor.diff
new file mode 100644
index 00000000000000..56af6ec07b69d5
--- /dev/null
+++ b/ports/llama-cpp/unvendor.diff
@@ -0,0 +1,82 @@
+diff --git a/CMakeLists.txt b/CMakeLists.txt
+index 310a3dc..1f1495b 100644
+--- a/CMakeLists.txt
++++ b/CMakeLists.txt
+@@ -197,8 +197,9 @@ add_subdirectory(src)
+ #
+
+ if (LLAMA_BUILD_COMMON)
++    find_package(httplib CONFIG REQUIRED)
++    add_library(cpp-httplib ALIAS httplib::httplib)
+     add_subdirectory(common)
+-    add_subdirectory(vendor/cpp-httplib)
+ endif()
+
+ if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
+diff --git a/common/http.h b/common/http.h
+index d3daccd..be18264 100644
+--- a/common/http.h
++++ b/common/http.h
+@@ -1,6 +1,6 @@
+ #pragma once
+
+-#include <cpp-httplib/httplib.h>
++#include <httplib.h>
+
+ struct common_http_url {
+     std::string scheme;
+diff --git a/tools/mtmd/CMakeLists.txt b/tools/mtmd/CMakeLists.txt
+index 35d721d..18eabc4 100644
+--- a/tools/mtmd/CMakeLists.txt
++++ b/tools/mtmd/CMakeLists.txt
+@@ -53,7 +53,6 @@ target_link_libraries     (mtmd PUBLIC ggml llama)
+ target_link_libraries     (mtmd PRIVATE Threads::Threads)
+ target_include_directories(mtmd PUBLIC  .)
+ target_include_directories(mtmd PRIVATE ../..)
+-target_include_directories(mtmd PRIVATE ../../vendor)
+ target_compile_features   (mtmd PRIVATE cxx_std_17)
+
+ if (BUILD_SHARED_LIBS)
+diff --git a/tools/mtmd/mtmd-helper.cpp b/tools/mtmd/mtmd-helper.cpp
+index 4094074..d40ae74 100644
+--- a/tools/mtmd/mtmd-helper.cpp
++++ b/tools/mtmd/mtmd-helper.cpp
+@@ -27,10 +27,10 @@
+ #define MA_NO_ENGINE
+ #define MA_NO_GENERATION
+ #define MA_API static
+-#include "miniaudio/miniaudio.h"
++#include "miniaudio.h"
+
+ #define STB_IMAGE_IMPLEMENTATION
+-#include "stb/stb_image.h"
++#include "stb_image.h"
+
+ #ifdef MTMD_INTERNAL_HEADER
+ #error "mtmd-helper is a public library outside of mtmd. it must not include internal headers"
+diff --git a/tools/server/server-http.cpp b/tools/server/server-http.cpp
+index 6f24f83..40a5c85 100644
+--- a/tools/server/server-http.cpp
++++ b/tools/server/server-http.cpp
+@@ -2,7 +2,7 @@
+ #include "server-http.h"
+ #include "server-common.h"
+
+-#include <cpp-httplib/httplib.h>
++#include <httplib.h>
+
+ #include <functional>
+ #include <string>
+diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp
+index 5a05ca2..3b7aef9 100644
+--- a/tools/server/server-models.cpp
++++ b/tools/server/server-models.cpp
+@@ -5,7 +5,7 @@
+ #include "preset.h"
+ #include "download.h"
+
+-#include <cpp-httplib/httplib.h> // TODO: remove this once we use HTTP client from download.h
++#include <httplib.h> // TODO: remove this once we use HTTP client from download.h
+ #include <sheredom/subprocess.h>
+
+ #include <functional>
diff --git a/ports/llama-cpp/vcpkg.json b/ports/llama-cpp/vcpkg.json
index 5415bb3af1f580..2bb3d8f96e5def 100644
--- a/ports/llama-cpp/vcpkg.json
+++ b/ports/llama-cpp/vcpkg.json
@@ -1,11 +1,13 @@
 {
   "name": "llama-cpp",
-  "version": "7146",
+  "version": "9030",
   "description": "LLM inference in C/C++",
   "homepage": "https://github.com/ggml-org/llama.cpp",
   "license": "MIT",
   "dependencies": [
+    "cpp-httplib",
     "ggml",
+    "nlohmann-json",
     {
       "name": "vcpkg-cmake",
       "host": true
@@ -25,8 +27,15 @@
         }
       ]
     },
+    "server": {
+      "description": "Build llama-server and llama-cli"
+    },
     "tools": {
-      "description": "Build tools"
+      "description": "Build tools",
+      "dependencies": [
+        "miniaudio",
+        "stb"
+      ]
     }
   }
 }
diff --git a/versions/baseline.json b/versions/baseline.json
index 538914e3146ddd..dd4edfc433818b 100644
--- a/versions/baseline.json
+++ b/versions/baseline.json
@@ -3373,8 +3373,8 @@
       "port-version": 0
     },
     "ggml": {
-      "baseline": "2025-11-17",
-      "port-version": 2
+      "baseline": "0.11.0",
+      "port-version": 0
     },
     "ghc-filesystem": {
       "baseline": "1.5.14",
@@ -6073,7 +6073,7 @@
       "port-version": 0
     },
     "llama-cpp": {
-      "baseline": "7146",
+      "baseline": "9030",
       "port-version": 0
     },
     "llfio": {
diff --git a/versions/g-/ggml.json b/versions/g-/ggml.json
index 59cf6e89cb9bf6..de6b40c211b1b8 100644
--- a/versions/g-/ggml.json
+++ b/versions/g-/ggml.json
@@ -1,5 +1,10 @@
 {
   "versions": [
+    {
+      "git-tree": "3300637ff16adc68d2325217ed8d51440889c24f",
+      "version": "0.11.0",
+      "port-version": 0
+    },
     {
       "git-tree": "befbc0e2ba5c226a789bd5ed9f3f515182b53ef1",
       "version-date": "2025-11-17",
diff --git a/versions/l-/llama-cpp.json b/versions/l-/llama-cpp.json
index 9b0d9ad3498c81..131834ee9cb5cb 100644
--- a/versions/l-/llama-cpp.json
+++ b/versions/l-/llama-cpp.json
@@ -1,5 +1,10 @@
 {
   "versions": [
+    {
+      "git-tree": "bb7da511f102de37eabea85b242a21d1d4390c76",
+      "version": "9030",
+      "port-version": 0
+    },
     {
       "git-tree": "a0eeadd7130b262ce1aa255f5c6b1d27fc57a4e1",
       "version": "7146",