diff --git a/ports/ggml/cuda-13-2.diff b/ports/ggml/cuda-13-2.diff deleted file mode 100644 index 4aa67d8889e2a6..00000000000000 --- a/ports/ggml/cuda-13-2.diff +++ /dev/null @@ -1,13 +0,0 @@ -diff --git a/src/ggml-cuda/CMakeLists.txt b/src/ggml-cuda/CMakeLists.txt -index 67af1d8..113d0f6 100644 ---- a/src/ggml-cuda/CMakeLists.txt -+++ b/src/ggml-cuda/CMakeLists.txt -@@ -177,6 +177,8 @@ if (CUDAToolkit_FOUND) - - if (NOT MSVC) - list(APPEND CUDA_CXX_FLAGS -Wno-pedantic) -+ else() -+ list(APPEND CUDA_CXX_FLAGS /Zc:preprocessor) - endif() - - list(JOIN CUDA_CXX_FLAGS " " CUDA_CXX_FLAGS_JOINED) # pass host compiler flags as a single argument diff --git a/ports/ggml/fix-vk-32bit.diff b/ports/ggml/fix-vk-32bit.diff new file mode 100644 index 00000000000000..8e24a264df75f3 --- /dev/null +++ b/ports/ggml/fix-vk-32bit.diff @@ -0,0 +1,71 @@ +diff --git a/src/ggml-vulkan/ggml-vulkan.cpp b/src/ggml-vulkan/ggml-vulkan.cpp +index 423e01d..3cd5488 100644 +--- a/src/ggml-vulkan/ggml-vulkan.cpp ++++ b/src/ggml-vulkan/ggml-vulkan.cpp +@@ -2036,7 +2036,7 @@ void vk_memory_logger::log_allocation(vk_buffer_ref buf_ref, size_t size) { + allocations[buf->buffer] = size; + total_device += device ? size : 0; + total_host += device ? 0 : size; +- VK_LOG_MEMORY(buf->device->name << ": +" << format_size(size) << " " << type << " at " << buf->buffer << ". Total device: " << format_size(total_device) << ", total host: " << format_size(total_host)); ++ VK_LOG_MEMORY(buf->device->name << ": +" << format_size(size) << " " << type << " at " << static_cast(buf->buffer) << ". Total device: " << format_size(total_device) << ", total host: " << format_size(total_host)); + } + + void vk_memory_logger::log_deallocation(vk_buffer_ref buf_ref) { +@@ -2052,10 +2052,10 @@ void vk_memory_logger::log_deallocation(vk_buffer_ref buf_ref) { + total_device -= device ? it->second : 0; + total_host -= device ? 0 : it->second; + if (it != allocations.end()) { +- VK_LOG_MEMORY(buf->device->name << ": -" << format_size(it->second) << " " << type << " at " << buf->buffer << ". Total device: " << format_size(total_device) << ", total host: " << format_size(total_host)); ++ VK_LOG_MEMORY(buf->device->name << ": -" << format_size(it->second) << " " << type << " at " << static_cast(buf->buffer) << ". Total device: " << format_size(total_device) << ", total host: " << format_size(total_host)); + allocations.erase(it); + } else { +- VK_LOG_MEMORY("ERROR " << buf->device->name << ": Attempted to deallocate unknown " << type << " memory at " << buf->buffer); ++ VK_LOG_MEMORY("ERROR " << buf->device->name << ": Attempted to deallocate unknown " << type << " memory at " << static_cast(buf->buffer)); + } + } + +@@ -6951,7 +6951,7 @@ static bool ggml_vk_buffer_write_2d_async(vk_context subctx, vk_buffer& dst, siz + } + + ggml_vk_sync_buffers(nullptr, subctx); +- subctx->s->buffer->buf.copyBuffer((VkBuffer)staging_buffer->buffer, (VkBuffer)dst->buffer, slices); ++ subctx->s->buffer->buf.copyBuffer(staging_buffer->buffer, dst->buffer, slices); + + if (width == spitch) { + deferred_memcpy((uint8_t *)staging_buffer->ptr, src, staging_size, &subctx->in_memcpys); +@@ -7086,7 +7086,7 @@ static bool ggml_vk_buffer_read_async(vk_context subctx, vk_buffer& src, size_t + } + + static void ggml_vk_buffer_read_2d(vk_buffer& src, size_t offset, void * dst, size_t spitch, size_t dpitch, size_t width, size_t height) { +- VK_LOG_DEBUG("ggml_vk_buffer_read_2d(" << src->buffer << ", " << offset << ", " << width << ", " << height << ")"); ++ VK_LOG_DEBUG("ggml_vk_buffer_read_2d(" << static_cast(src->buffer) << ", " << offset << ", " << width << ", " << height << ")"); + + // If the device is not an UMA device the memory is host-accessible through rebar. While writing + // through PCIe is sufficient fast reading back data from PCIe is slower than going through +@@ -7118,7 +7118,7 @@ static void ggml_vk_buffer_read_2d(vk_buffer& src, size_t offset, void * dst, si + } + + static void ggml_vk_buffer_read(vk_buffer& src, size_t offset, void * dst, size_t size) { +- VK_LOG_DEBUG("ggml_vk_buffer_read(" << src->buffer << ", " << offset << ", " << size << ")"); ++ VK_LOG_DEBUG("ggml_vk_buffer_read(" << static_cast(src->buffer) << ", " << offset << ", " << size << ")"); + ggml_vk_buffer_read_2d(src, offset, dst, size, size, size, 1); + } + +@@ -7286,7 +7286,7 @@ static void ggml_vk_matmul( + uint32_t batch_stride_a, uint32_t batch_stride_b, uint32_t batch_stride_d, + uint32_t split_k, uint32_t batch, uint32_t ne02, uint32_t ne12, uint32_t broadcast2, uint32_t broadcast3, + uint32_t padded_n) { +- VK_LOG_DEBUG("ggml_vk_matmul(a: (" << a.buffer->buffer << ", " << a.offset << ", " << a.size << "), b: (" << b.buffer->buffer << ", " << b.offset << ", " << b.size << "), d: (" << d.buffer->buffer << ", " << d.offset << ", " << d.size << "), split_k: (" << (split_k_buffer.buffer != nullptr ? split_k_buffer.buffer->buffer : VK_NULL_HANDLE) << ", " << split_k_buffer.offset << ", " << split_k_buffer.size << "), m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ", split_k: " << split_k << ", batch: " << batch << ", ne02: " << ne02 << ", ne12: " << ne12 << ", broadcast2: " << broadcast2 << ", broadcast3: " << broadcast3 << ", padded_n: " << padded_n << ")"); ++ VK_LOG_DEBUG("ggml_vk_matmul(a: (" << static_cast(a.buffer->buffer) << ", " << a.offset << ", " << a.size << "), b: (" << static_cast(b.buffer->buffer) << ", " << b.offset << ", " << b.size << "), d: (" << static_cast(d.buffer->buffer) << ", " << d.offset << ", " << d.size << "), split_k: (" << (split_k_buffer.buffer != nullptr ? static_cast(split_k_buffer.buffer->buffer) : VkBuffer{}) << ", " << split_k_buffer.offset << ", " << split_k_buffer.size << "), m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ", split_k: " << split_k << ", batch: " << batch << ", ne02: " << ne02 << ", ne12: " << ne12 << ", broadcast2: " << broadcast2 << ", broadcast3: " << broadcast3 << ", padded_n: " << padded_n << ")"); + if (split_k == 1) { + ggml_pipeline_request_descriptor_sets(ctx, pipeline, CEIL_DIV(batch, ctx->device->properties.limits.maxComputeWorkGroupCount[2])); + +@@ -7366,7 +7366,7 @@ static void ggml_vk_matmul_id( + uint32_t batch_stride_a, uint32_t batch_stride_b, uint32_t batch_stride_d, + uint32_t n_as, uint32_t nei0, uint32_t nei1, uint32_t nbi1, uint32_t ne11, + uint32_t padded_n) { +- VK_LOG_DEBUG("ggml_vk_matmul_id(a: (" << a.buffer->buffer << ", " << a.offset << ", " << a.size << "), b: (" << b.buffer->buffer << ", " << b.offset << ", " << b.size << "), d: (" << d.buffer->buffer << ", " << d.offset << ", " << d.size << "), ids: (" << ids.buffer->buffer << ", " << ids.offset << ", " << ids.size << "), expert_count: (" << expert_count_buf.buffer->buffer << ", " << expert_count_buf.offset << ", " << expert_count_buf.size << "), " << ++ VK_LOG_DEBUG("ggml_vk_matmul_id(a: (" << static_cast(a.buffer->buffer) << ", " << a.offset << ", " << a.size << "), b: (" << static_cast(b.buffer->buffer) << ", " << b.offset << ", " << b.size << "), d: (" << static_cast(d.buffer->buffer) << ", " << d.offset << ", " << d.size << "), ids: (" << static_cast(ids.buffer->buffer) << ", " << ids.offset << ", " << ids.size << "), expert_count: (" << static_cast(expert_count_buf.buffer->buffer) << ", " << expert_count_buf.offset << ", " << expert_count_buf.size << "), " << + "m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", " << + "batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ", " << + "n_as: " << n_as << ", nei0: " << nei0 << ", nei1: " << nei1 << ", nbi1: " << nbi1 << ", ne11: " << ne11 << ")"); diff --git a/ports/ggml/fix-vulkan-spv-shadowing.diff b/ports/ggml/fix-vulkan-spv-shadowing.diff new file mode 100644 index 00000000000000..ee0f499c52743a --- /dev/null +++ b/ports/ggml/fix-vulkan-spv-shadowing.diff @@ -0,0 +1,66 @@ +diff --git a/src/ggml-vulkan/ggml-vulkan.cpp b/src/ggml-vulkan/ggml-vulkan.cpp +index 423e01dbff1..0a7931002ab 100644 +--- a/src/ggml-vulkan/ggml-vulkan.cpp ++++ b/src/ggml-vulkan/ggml-vulkan.cpp +@@ -2149,11 +2149,11 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin + + // Patch SPIR-V to enable RTE rounding for FP16, avoiding the need for + // separate shader variants compiled with -DRTE16. +- std::vector spv; ++ std::vector spirv; + if (device->float_controls_rte_fp16) { + const uint32_t* spv_words = reinterpret_cast(spv_data); + size_t word_count = spv_size / sizeof(uint32_t); +- spv.assign(spv_words, spv_words + word_count); ++ spirv.assign(spv_words, spv_words + word_count); + + // Find insertion points respecting SPIR-V layout order: + // Header(5) -> OpCapability -> OpExtension -> ... -> OpEntryPoint -> OpExecutionMode -> ... +@@ -2163,9 +2163,9 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin + size_t exec_insert_pos = pos; + uint32_t entry_point_id = 0; + +- while (pos < spv.size()) { +- uint32_t opcode = spv[pos] & spv::OpCodeMask; +- uint32_t len = spv[pos] >> spv::WordCountShift; ++ while (pos < spirv.size()) { ++ uint32_t opcode = spirv[pos] & spv::OpCodeMask; ++ uint32_t len = spirv[pos] >> spv::WordCountShift; + if (len == 0) break; + + if (opcode == spv::OpCapability) { +@@ -2174,7 +2174,7 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin + } else if (opcode == spv::OpExtension) { + ext_insert_pos = pos + len; + } else if (opcode == spv::OpEntryPoint) { +- entry_point_id = spv[pos + 2]; ++ entry_point_id = spirv[pos + 2]; + exec_insert_pos = pos + len; + } else if (opcode == spv::OpExecutionMode || opcode == spv::OpExecutionModeId) { + exec_insert_pos = pos + len; +@@ -2189,7 +2189,7 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin + + // OpExecutionMode %entrypoint RoundingModeRTE 16 + uint32_t exec_mode[] = { (4u << spv::WordCountShift) | spv::OpExecutionMode, entry_point_id, spv::ExecutionModeRoundingModeRTE, 16 }; +- spv.insert(spv.begin() + exec_insert_pos, std::begin(exec_mode), std::end(exec_mode)); ++ spirv.insert(spirv.begin() + exec_insert_pos, std::begin(exec_mode), std::end(exec_mode)); + + // OpExtension "SPV_KHR_float_controls" + const char ext_str[] = "SPV_KHR_float_controls"; +@@ -2197,13 +2197,13 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin + std::vector extension(1 + ext_str_words, 0); + extension[0] = (uint32_t)((1 + ext_str_words) << spv::WordCountShift) | spv::OpExtension; + memcpy(&extension[1], ext_str, sizeof(ext_str)); +- spv.insert(spv.begin() + ext_insert_pos, extension.begin(), extension.end()); ++ spirv.insert(spirv.begin() + ext_insert_pos, extension.begin(), extension.end()); + + // OpCapability RoundingModeRTE + uint32_t capability[] = { (2u << spv::WordCountShift) | spv::OpCapability, spv::CapabilityRoundingModeRTE }; +- spv.insert(spv.begin() + cap_insert_pos, std::begin(capability), std::end(capability)); ++ spirv.insert(spirv.begin() + cap_insert_pos, std::begin(capability), std::end(capability)); + +- shader_module_create_info = vk::ShaderModuleCreateInfo({}, spv.size() * sizeof(uint32_t), spv.data()); ++ shader_module_create_info = vk::ShaderModuleCreateInfo({}, spirv.size() * sizeof(uint32_t), spirv.data()); + } + + pipeline->shader_module = device->device.createShaderModule(shader_module_create_info); diff --git a/ports/ggml/pkgconfig.diff b/ports/ggml/pkgconfig.diff index 211e3407e96b2d..30c406696d206a 100644 --- a/ports/ggml/pkgconfig.diff +++ b/ports/ggml/pkgconfig.diff @@ -1,8 +1,8 @@ diff --git a/CMakeLists.txt b/CMakeLists.txt -index 0211255..1b1732a 100644 +index c97f681..64a456b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt -@@ -333,7 +333,7 @@ if (GGML_STANDALONE) +@@ -352,7 +352,7 @@ if (GGML_STANDALONE) @ONLY) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/ggml.pc @@ -11,7 +11,7 @@ index 0211255..1b1732a 100644 endif() # -@@ -354,6 +354,7 @@ set(variable_set_statements +@@ -373,6 +373,7 @@ set(variable_set_statements set(GGML_SHARED_LIB ${BUILD_SHARED_LIBS}) get_cmake_property(all_variables VARIABLES) @@ -34,10 +34,10 @@ index 3e0291e..a762733 100644 +Libs.private: @GGML_PKGCONFIG_LIBS_PRIVATE@ +Requires.private: @GGML_PKGCONFIG_REQUIRES_PRIVATE@ diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt -index 628db3f..af611bb 100644 +index 3e48860..1ec27e8 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt -@@ -189,6 +189,10 @@ endif() +@@ -185,6 +185,10 @@ endif() # ggml @@ -48,7 +48,7 @@ index 628db3f..af611bb 100644 if (GGML_BACKEND_DL AND NOT BUILD_SHARED_LIBS) message(FATAL_ERROR "GGML_BACKEND_DL requires BUILD_SHARED_LIBS") endif() -@@ -241,6 +245,7 @@ target_link_libraries(ggml PUBLIC ggml-base) +@@ -243,6 +247,7 @@ target_link_libraries(ggml PUBLIC ggml-base) if (CMAKE_SYSTEM_NAME MATCHES "Linux") target_link_libraries(ggml PRIVATE dl) @@ -56,7 +56,7 @@ index 628db3f..af611bb 100644 endif() function(ggml_add_backend_library backend) -@@ -291,12 +296,20 @@ function(ggml_add_backend backend) +@@ -296,12 +301,20 @@ function(ggml_add_backend backend) string(TOUPPER "GGML_${backend}" backend_id) if (${backend_id}) string(TOLOWER "ggml-${backend}" backend_target) @@ -77,12 +77,11 @@ index 628db3f..af611bb 100644 endif() endfunction() -@@ -434,11 +447,15 @@ find_library(MATH_LIBRARY m) - if (MATH_LIBRARY) - if (NOT WIN32 OR NOT DEFINED ENV{ONEAPI_ROOT}) - target_link_libraries(ggml-base PRIVATE m) -+ string(APPEND GGML_PKGCONFIG_LIBS_PRIVATE " -lm") - endif() +@@ -474,10 +487,14 @@ if (DEFINED MATH_LIBRARY) + target_link_libraries(ggml-base PRIVATE ${MATH_LIBRARY}) + elseif (NOT WIN32 AND NOT DEFINED ENV{ONEAPI_ROOT}) + target_link_libraries(ggml-base PRIVATE m) ++ string(APPEND GGML_PKGCONFIG_LIBS_PRIVATE " -lm") endif() if (CMAKE_SYSTEM_NAME MATCHES "Android") @@ -93,11 +92,12 @@ index 628db3f..af611bb 100644 endif() if(CMAKE_SYSTEM_NAME MATCHES "visionOS") -@@ -451,4 +468,10 @@ if (BUILD_SHARED_LIBS) +@@ -489,5 +506,11 @@ if (BUILD_SHARED_LIBS) + set_target_properties(${target} PROPERTIES POSITION_INDEPENDENT_CODE ON) target_compile_definitions(${target} PRIVATE GGML_BUILD) target_compile_definitions(${target} PUBLIC GGML_SHARED) ++ string(APPEND GGML_PKGCONFIG_CFLAGS " -DGGML_SHARED -DGGML_BACKEND_SHARED") endforeach() -+ string(APPEND GGML_PKGCONFIG_CFLAGS " -DGGML_SHARED -DGGML_BACKEND_SHARED") endif() + +set(GGML_PKGCONFIG_CFLAGS "${GGML_PKGCONFIG_CFLAGS}" PARENT_SCOPE) @@ -105,22 +105,22 @@ index 628db3f..af611bb 100644 +set(GGML_PKGCONFIG_LIBS_PRIVATE "${GGML_PKGCONFIG_LIBS_PRIVATE}" PARENT_SCOPE) +set(GGML_PKGCONFIG_REQUIRES_PRIVATE "${GGML_PKGCONFIG_REQUIRES_PRIVATE}" PARENT_SCOPE) diff --git a/src/ggml-blas/CMakeLists.txt b/src/ggml-blas/CMakeLists.txt -index 60ce4b1..058f65c 100644 +index c27dc17..ce1ab21 100644 --- a/src/ggml-blas/CMakeLists.txt +++ b/src/ggml-blas/CMakeLists.txt -@@ -79,6 +79,7 @@ if (BLAS_FOUND) +@@ -93,6 +93,7 @@ if (BLAS_FOUND) endif() target_link_libraries (ggml-blas PRIVATE ${BLAS_LIBRARIES}) + set(GGML_PKGCONFIG_REQUIRES_PRIVATE "${GGML_PKGCONFIG_REQUIRES_PRIVATE} cblas" PARENT_SCOPE) - target_include_directories(ggml-blas PRIVATE ${BLAS_INCLUDE_DIRS}) + target_include_directories(ggml-blas SYSTEM PRIVATE ${BLAS_INCLUDE_DIRS}) else() message(FATAL_ERROR "BLAS not found, please refer to " diff --git a/src/ggml-cpu/CMakeLists.txt b/src/ggml-cpu/CMakeLists.txt -index e52e050..107cbf5 100644 +index c1c225f..39ef457 100644 --- a/src/ggml-cpu/CMakeLists.txt +++ b/src/ggml-cpu/CMakeLists.txt -@@ -52,6 +52,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name) +@@ -57,6 +57,9 @@ function(ggml_add_cpu_backend_variant_impl tag_name) target_compile_features(${GGML_CPU_NAME} PRIVATE c_std_11 cxx_std_17) target_include_directories(${GGML_CPU_NAME} PRIVATE . ggml-cpu) @@ -130,7 +130,7 @@ index e52e050..107cbf5 100644 if (APPLE AND GGML_ACCELERATE) find_library(ACCELERATE_FRAMEWORK Accelerate) if (ACCELERATE_FRAMEWORK) -@@ -62,6 +65,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name) +@@ -67,6 +70,7 @@ function(ggml_add_cpu_backend_variant_impl tag_name) target_compile_definitions(${GGML_CPU_NAME} PRIVATE ACCELERATE_LAPACK_ILP64) target_link_libraries(${GGML_CPU_NAME} PRIVATE ${ACCELERATE_FRAMEWORK}) @@ -138,7 +138,7 @@ index e52e050..107cbf5 100644 else() message(WARNING "Accelerate framework not found") endif() -@@ -74,6 +78,18 @@ function(ggml_add_cpu_backend_variant_impl tag_name) +@@ -79,6 +83,18 @@ function(ggml_add_cpu_backend_variant_impl tag_name) target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_OPENMP) target_link_libraries(${GGML_CPU_NAME} PRIVATE OpenMP::OpenMP_C OpenMP::OpenMP_CXX) @@ -157,7 +157,7 @@ index e52e050..107cbf5 100644 else() set(GGML_OPENMP_ENABLED "OFF" CACHE INTERNAL "") message(WARNING "OpenMP not found") -@@ -96,8 +112,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name) +@@ -101,8 +117,12 @@ function(ggml_add_cpu_backend_variant_impl tag_name) target_compile_definitions(${GGML_CPU_NAME} PRIVATE GGML_USE_CPU_HBM) target_link_libraries(${GGML_CPU_NAME} PUBLIC memkind) @@ -171,7 +171,7 @@ index e52e050..107cbf5 100644 message(STATUS "ARM detected") list(APPEND GGML_CPU_SOURCES diff --git a/src/ggml-metal/CMakeLists.txt b/src/ggml-metal/CMakeLists.txt -index 63418fe..138996a 100644 +index 42054d8..d780361 100644 --- a/src/ggml-metal/CMakeLists.txt +++ b/src/ggml-metal/CMakeLists.txt @@ -19,6 +19,11 @@ target_link_libraries(ggml-metal PRIVATE @@ -187,7 +187,7 @@ index 63418fe..138996a 100644 add_compile_definitions(GGML_METAL_NDEBUG) endif() diff --git a/src/ggml-opencl/CMakeLists.txt b/src/ggml-opencl/CMakeLists.txt -index 681c81b..bac7067 100644 +index 5ed83ee..bb06a50 100644 --- a/src/ggml-opencl/CMakeLists.txt +++ b/src/ggml-opencl/CMakeLists.txt @@ -7,6 +7,7 @@ ggml_add_backend_library(${TARGET_NAME} @@ -199,7 +199,7 @@ index 681c81b..bac7067 100644 if (GGML_OPENCL_PROFILING) diff --git a/src/ggml-vulkan/CMakeLists.txt b/src/ggml-vulkan/CMakeLists.txt -index de01336..3f9524c 100644 +index 715a263..04c142c 100644 --- a/src/ggml-vulkan/CMakeLists.txt +++ b/src/ggml-vulkan/CMakeLists.txt @@ -87,6 +87,11 @@ if (Vulkan_FOUND) diff --git a/ports/ggml/portfile.cmake b/ports/ggml/portfile.cmake index 575435690e9c70..eae899de514def 100644 --- a/ports/ggml/portfile.cmake +++ b/ports/ggml/portfile.cmake @@ -1,8 +1,8 @@ vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO ggml-org/ggml - REF 55bc9320a4aae82af18e23eefd5de319a755d7b9 - SHA512 9433c9c258bbbfa817051f2ba2a8c8f166ee885c953d3ee27198890d4af8366fdee11ba55514b8b8414c836615e56eceaa98f33a01ecf51846338bc60d34263b + REF v${VERSION} + SHA512 c49b6498101f50a5ebde158f5707237f6df3af0182e819651003ec5f418144f9ca68cc8434189f2a658ea705269fad2d263cfe4931c12b369883aa5a66182ed9 HEAD_REF master PATCHES cmake-config.diff @@ -10,7 +10,8 @@ vcpkg_from_github( relax-link-options.diff vulkan-shaders-gen.diff fix-dequant_funcs.diff - cuda-13-2.diff + fix-vulkan-spv-shadowing.diff + fix-vk-32bit.diff ) vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS diff --git a/ports/ggml/vcpkg.json b/ports/ggml/vcpkg.json index 3e66d9d73c6934..f84dea81e773d5 100644 --- a/ports/ggml/vcpkg.json +++ b/ports/ggml/vcpkg.json @@ -1,7 +1,6 @@ { "name": "ggml", - "version-date": "2025-11-17", - "port-version": 2, + "version": "0.11.0", "description": "Tensor library for machine learning", "homepage": "https://github.com/ggml-org/ggml", "license": "MIT", @@ -61,6 +60,7 @@ "name": "shaderc", "host": true }, + "spirv-headers", "vulkan" ] } diff --git a/ports/llama-cpp/portfile.cmake b/ports/llama-cpp/portfile.cmake index 42f9ffecc8ccd0..af0e0fa5864c2d 100644 --- a/ports/llama-cpp/portfile.cmake +++ b/ports/llama-cpp/portfile.cmake @@ -2,17 +2,24 @@ vcpkg_from_github( OUT_SOURCE_PATH SOURCE_PATH REPO ggml-org/llama.cpp REF b${VERSION} - SHA512 879490cdeeef9397b9843730399bd55c7ef57de7e421cfcbaa15bd95416e028e4a0864f237218895a8a1244b7a4dbef9eacda49aa9697628e29774d0fc90d04b + SHA512 ef5e21b61ca2961004fc57ad9d4a07191458df4f1749e71a9dc96d653676a6d68d43b7b8c74ebb235f6dffe5c064330cb1124887bc5c119876d7292543321945 HEAD_REF master PATCHES cmake-config.diff pkgconfig.diff + unvendor.diff ) file(REMOVE_RECURSE "${SOURCE_PATH}/ggml/include" "${SOURCE_PATH}/ggml/src") +file(REMOVE_RECURSE + "${SOURCE_PATH}/vendor/cpp-httplib" + "${SOURCE_PATH}/vendor/miniaudio" + "${SOURCE_PATH}/vendor/nlohmann" + "${SOURCE_PATH}/vendor/stb") vcpkg_check_features(OUT_FEATURE_OPTIONS options FEATURES download LLAMA_CURL + server LLAMA_BUILD_SERVER tools LLAMA_BUILD_TOOLS ) @@ -24,7 +31,6 @@ vcpkg_cmake_configure( -DLLAMA_ALL_WARNINGS=OFF -DLLAMA_BUILD_TESTS=OFF -DLLAMA_BUILD_EXAMPLES=OFF - -DLLAMA_BUILD_SERVER=OFF -DLLAMA_USE_SYSTEM_GGML=ON -DVCPKG_LOCK_FIND_PACKAGE_Git=OFF ) @@ -39,21 +45,32 @@ file(RENAME "${CURRENT_PACKAGES_DIR}/bin/convert_hf_to_gguf.py" "${CURRENT_PACKA file(REMOVE "${CURRENT_PACKAGES_DIR}/debug/bin/convert_hf_to_gguf.py") if("tools" IN_LIST FEATURES) + set(tool_names + llama-batched-bench + llama-bench + llama-completion + llama-cvector-generator + llama-export-lora + llama-fit-params + llama-gguf-split + llama-imatrix + llama-mtmd-cli + llama-perplexity + llama-quantize + llama-results + llama-template-analysis + llama-tokenize + llama-tts + ) + # https://github.com/ggml-org/llama.cpp/blob/master/tools/parser/CMakeLists.txt#L1 + if(NOT VCPKG_TARGET_IS_WINDOWS OR VCPKG_LIBRARY_LINKAGE STREQUAL "static") + list(APPEND tool_names llama-debug-template-parser) + endif() + if("server" IN_LIST FEATURES) + list(APPEND tool_names llama-cli llama-server) + endif() vcpkg_copy_tools( - TOOL_NAMES - llama-batched-bench - llama-bench - llama-cli - llama-cvector-generator - llama-export-lora - llama-gguf-split - llama-imatrix - llama-mtmd-cli - llama-perplexity - llama-quantize - llama-run - llama-tokenize - llama-tts + TOOL_NAMES ${tool_names} AUTO_CLEAN ) endif() diff --git a/ports/llama-cpp/unvendor.diff b/ports/llama-cpp/unvendor.diff new file mode 100644 index 00000000000000..56af6ec07b69d5 --- /dev/null +++ b/ports/llama-cpp/unvendor.diff @@ -0,0 +1,82 @@ +diff --git a/CMakeLists.txt b/CMakeLists.txt +index 310a3dc..1f1495b 100644 +--- a/CMakeLists.txt ++++ b/CMakeLists.txt +@@ -197,8 +197,9 @@ add_subdirectory(src) + # + + if (LLAMA_BUILD_COMMON) ++ find_package(httplib CONFIG REQUIRED) ++ add_library(cpp-httplib ALIAS httplib::httplib) + add_subdirectory(common) +- add_subdirectory(vendor/cpp-httplib) + endif() + + if (LLAMA_BUILD_COMMON AND LLAMA_BUILD_TESTS AND NOT CMAKE_JS_VERSION) +diff --git a/common/http.h b/common/http.h +index d3daccd..be18264 100644 +--- a/common/http.h ++++ b/common/http.h +@@ -1,6 +1,6 @@ + #pragma once + +-#include ++#include + + struct common_http_url { + std::string scheme; +diff --git a/tools/mtmd/CMakeLists.txt b/tools/mtmd/CMakeLists.txt +index 35d721d..18eabc4 100644 +--- a/tools/mtmd/CMakeLists.txt ++++ b/tools/mtmd/CMakeLists.txt +@@ -53,7 +53,6 @@ target_link_libraries (mtmd PUBLIC ggml llama) + target_link_libraries (mtmd PRIVATE Threads::Threads) + target_include_directories(mtmd PUBLIC .) + target_include_directories(mtmd PRIVATE ../..) +-target_include_directories(mtmd PRIVATE ../../vendor) + target_compile_features (mtmd PRIVATE cxx_std_17) + + if (BUILD_SHARED_LIBS) +diff --git a/tools/mtmd/mtmd-helper.cpp b/tools/mtmd/mtmd-helper.cpp +index 4094074..d40ae74 100644 +--- a/tools/mtmd/mtmd-helper.cpp ++++ b/tools/mtmd/mtmd-helper.cpp +@@ -27,10 +27,10 @@ + #define MA_NO_ENGINE + #define MA_NO_GENERATION + #define MA_API static +-#include "miniaudio/miniaudio.h" ++#include "miniaudio.h" + + #define STB_IMAGE_IMPLEMENTATION +-#include "stb/stb_image.h" ++#include "stb_image.h" + + #ifdef MTMD_INTERNAL_HEADER + #error "mtmd-helper is a public library outside of mtmd. it must not include internal headers" +diff --git a/tools/server/server-http.cpp b/tools/server/server-http.cpp +index 6f24f83..40a5c85 100644 +--- a/tools/server/server-http.cpp ++++ b/tools/server/server-http.cpp +@@ -2,7 +2,7 @@ + #include "server-http.h" + #include "server-common.h" + +-#include ++#include + + #include + #include +diff --git a/tools/server/server-models.cpp b/tools/server/server-models.cpp +index 5a05ca2..3b7aef9 100644 +--- a/tools/server/server-models.cpp ++++ b/tools/server/server-models.cpp +@@ -5,7 +5,7 @@ + #include "preset.h" + #include "download.h" + +-#include // TODO: remove this once we use HTTP client from download.h ++#include // TODO: remove this once we use HTTP client from download.h + #include + + #include diff --git a/ports/llama-cpp/vcpkg.json b/ports/llama-cpp/vcpkg.json index 5415bb3af1f580..2bb3d8f96e5def 100644 --- a/ports/llama-cpp/vcpkg.json +++ b/ports/llama-cpp/vcpkg.json @@ -1,11 +1,13 @@ { "name": "llama-cpp", - "version": "7146", + "version": "9030", "description": "LLM inference in C/C++", "homepage": "https://github.com/ggml-org/llama.cpp", "license": "MIT", "dependencies": [ + "cpp-httplib", "ggml", + "nlohmann-json", { "name": "vcpkg-cmake", "host": true @@ -25,8 +27,15 @@ } ] }, + "server": { + "description": "Build llama-server and llama-cli" + }, "tools": { - "description": "Build tools" + "description": "Build tools", + "dependencies": [ + "miniaudio", + "stb" + ] } } } diff --git a/versions/baseline.json b/versions/baseline.json index 538914e3146ddd..dd4edfc433818b 100644 --- a/versions/baseline.json +++ b/versions/baseline.json @@ -3373,8 +3373,8 @@ "port-version": 0 }, "ggml": { - "baseline": "2025-11-17", - "port-version": 2 + "baseline": "0.11.0", + "port-version": 0 }, "ghc-filesystem": { "baseline": "1.5.14", @@ -6073,7 +6073,7 @@ "port-version": 0 }, "llama-cpp": { - "baseline": "7146", + "baseline": "9030", "port-version": 0 }, "llfio": { diff --git a/versions/g-/ggml.json b/versions/g-/ggml.json index 59cf6e89cb9bf6..de6b40c211b1b8 100644 --- a/versions/g-/ggml.json +++ b/versions/g-/ggml.json @@ -1,5 +1,10 @@ { "versions": [ + { + "git-tree": "3300637ff16adc68d2325217ed8d51440889c24f", + "version": "0.11.0", + "port-version": 0 + }, { "git-tree": "befbc0e2ba5c226a789bd5ed9f3f515182b53ef1", "version-date": "2025-11-17", diff --git a/versions/l-/llama-cpp.json b/versions/l-/llama-cpp.json index 9b0d9ad3498c81..131834ee9cb5cb 100644 --- a/versions/l-/llama-cpp.json +++ b/versions/l-/llama-cpp.json @@ -1,5 +1,10 @@ { "versions": [ + { + "git-tree": "bb7da511f102de37eabea85b242a21d1d4390c76", + "version": "9030", + "port-version": 0 + }, { "git-tree": "a0eeadd7130b262ce1aa255f5c6b1d27fc57a4e1", "version": "7146",