-
Notifications
You must be signed in to change notification settings - Fork 7.5k
ggml update to 0.11.0, llama-cpp update to 9030 #51551
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Merged
Merged
Changes from all commits
Commits
Show all changes
13 commits
Select commit
Hold shift + click to select a range
647dc0c
[ggml] update to 0.10.2
miyanyan ffe5547
update versions
miyanyan 3e4cc43
ggml update to 0.11.0, llama-cpp update 9030 to
miyanyan 27e36e8
update versions
miyanyan 3f80360
unvendor, cli/server was controlled by LLAMA_BUILD_SERVER
miyanyan 59cc353
Update llama-cpp.json
miyanyan 17cdb7f
Merge branch 'master' into ggml-0.10.2
BillyONeal 5adebd0
remove cuda-13-2.diff, use add_library(cpp-httplib ALIAS httplib::htt…
miyanyan 723db62
fix vulkan errors
miyanyan 6a99083
copy llama-debug-template-parser tool
miyanyan 0bb960d
update versions
miyanyan 1daebc9
fix vulkan 32bit build error
miyanyan fbbaa3b
Update ggml.json
miyanyan File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
This file was deleted.
Oops, something went wrong.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,71 @@ | ||
| diff --git a/src/ggml-vulkan/ggml-vulkan.cpp b/src/ggml-vulkan/ggml-vulkan.cpp | ||
| index 423e01d..3cd5488 100644 | ||
| --- a/src/ggml-vulkan/ggml-vulkan.cpp | ||
| +++ b/src/ggml-vulkan/ggml-vulkan.cpp | ||
| @@ -2036,7 +2036,7 @@ void vk_memory_logger::log_allocation(vk_buffer_ref buf_ref, size_t size) { | ||
| allocations[buf->buffer] = size; | ||
| total_device += device ? size : 0; | ||
| total_host += device ? 0 : size; | ||
| - VK_LOG_MEMORY(buf->device->name << ": +" << format_size(size) << " " << type << " at " << buf->buffer << ". Total device: " << format_size(total_device) << ", total host: " << format_size(total_host)); | ||
| + VK_LOG_MEMORY(buf->device->name << ": +" << format_size(size) << " " << type << " at " << static_cast<VkBuffer>(buf->buffer) << ". Total device: " << format_size(total_device) << ", total host: " << format_size(total_host)); | ||
| } | ||
|
|
||
| void vk_memory_logger::log_deallocation(vk_buffer_ref buf_ref) { | ||
| @@ -2052,10 +2052,10 @@ void vk_memory_logger::log_deallocation(vk_buffer_ref buf_ref) { | ||
| total_device -= device ? it->second : 0; | ||
| total_host -= device ? 0 : it->second; | ||
| if (it != allocations.end()) { | ||
| - VK_LOG_MEMORY(buf->device->name << ": -" << format_size(it->second) << " " << type << " at " << buf->buffer << ". Total device: " << format_size(total_device) << ", total host: " << format_size(total_host)); | ||
| + VK_LOG_MEMORY(buf->device->name << ": -" << format_size(it->second) << " " << type << " at " << static_cast<VkBuffer>(buf->buffer) << ". Total device: " << format_size(total_device) << ", total host: " << format_size(total_host)); | ||
| allocations.erase(it); | ||
| } else { | ||
| - VK_LOG_MEMORY("ERROR " << buf->device->name << ": Attempted to deallocate unknown " << type << " memory at " << buf->buffer); | ||
| + VK_LOG_MEMORY("ERROR " << buf->device->name << ": Attempted to deallocate unknown " << type << " memory at " << static_cast<VkBuffer>(buf->buffer)); | ||
| } | ||
| } | ||
|
|
||
| @@ -6951,7 +6951,7 @@ static bool ggml_vk_buffer_write_2d_async(vk_context subctx, vk_buffer& dst, siz | ||
| } | ||
|
|
||
| ggml_vk_sync_buffers(nullptr, subctx); | ||
| - subctx->s->buffer->buf.copyBuffer((VkBuffer)staging_buffer->buffer, (VkBuffer)dst->buffer, slices); | ||
| + subctx->s->buffer->buf.copyBuffer(staging_buffer->buffer, dst->buffer, slices); | ||
|
|
||
| if (width == spitch) { | ||
| deferred_memcpy((uint8_t *)staging_buffer->ptr, src, staging_size, &subctx->in_memcpys); | ||
| @@ -7086,7 +7086,7 @@ static bool ggml_vk_buffer_read_async(vk_context subctx, vk_buffer& src, size_t | ||
| } | ||
|
|
||
| static void ggml_vk_buffer_read_2d(vk_buffer& src, size_t offset, void * dst, size_t spitch, size_t dpitch, size_t width, size_t height) { | ||
| - VK_LOG_DEBUG("ggml_vk_buffer_read_2d(" << src->buffer << ", " << offset << ", " << width << ", " << height << ")"); | ||
| + VK_LOG_DEBUG("ggml_vk_buffer_read_2d(" << static_cast<VkBuffer>(src->buffer) << ", " << offset << ", " << width << ", " << height << ")"); | ||
|
|
||
| // If the device is not an UMA device the memory is host-accessible through rebar. While writing | ||
| // through PCIe is sufficient fast reading back data from PCIe is slower than going through | ||
| @@ -7118,7 +7118,7 @@ static void ggml_vk_buffer_read_2d(vk_buffer& src, size_t offset, void * dst, si | ||
| } | ||
|
|
||
| static void ggml_vk_buffer_read(vk_buffer& src, size_t offset, void * dst, size_t size) { | ||
| - VK_LOG_DEBUG("ggml_vk_buffer_read(" << src->buffer << ", " << offset << ", " << size << ")"); | ||
| + VK_LOG_DEBUG("ggml_vk_buffer_read(" << static_cast<VkBuffer>(src->buffer) << ", " << offset << ", " << size << ")"); | ||
| ggml_vk_buffer_read_2d(src, offset, dst, size, size, size, 1); | ||
| } | ||
|
|
||
| @@ -7286,7 +7286,7 @@ static void ggml_vk_matmul( | ||
| uint32_t batch_stride_a, uint32_t batch_stride_b, uint32_t batch_stride_d, | ||
| uint32_t split_k, uint32_t batch, uint32_t ne02, uint32_t ne12, uint32_t broadcast2, uint32_t broadcast3, | ||
| uint32_t padded_n) { | ||
| - VK_LOG_DEBUG("ggml_vk_matmul(a: (" << a.buffer->buffer << ", " << a.offset << ", " << a.size << "), b: (" << b.buffer->buffer << ", " << b.offset << ", " << b.size << "), d: (" << d.buffer->buffer << ", " << d.offset << ", " << d.size << "), split_k: (" << (split_k_buffer.buffer != nullptr ? split_k_buffer.buffer->buffer : VK_NULL_HANDLE) << ", " << split_k_buffer.offset << ", " << split_k_buffer.size << "), m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ", split_k: " << split_k << ", batch: " << batch << ", ne02: " << ne02 << ", ne12: " << ne12 << ", broadcast2: " << broadcast2 << ", broadcast3: " << broadcast3 << ", padded_n: " << padded_n << ")"); | ||
| + VK_LOG_DEBUG("ggml_vk_matmul(a: (" << static_cast<VkBuffer>(a.buffer->buffer) << ", " << a.offset << ", " << a.size << "), b: (" << static_cast<VkBuffer>(b.buffer->buffer) << ", " << b.offset << ", " << b.size << "), d: (" << static_cast<VkBuffer>(d.buffer->buffer) << ", " << d.offset << ", " << d.size << "), split_k: (" << (split_k_buffer.buffer != nullptr ? static_cast<VkBuffer>(split_k_buffer.buffer->buffer) : VkBuffer{}) << ", " << split_k_buffer.offset << ", " << split_k_buffer.size << "), m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ", split_k: " << split_k << ", batch: " << batch << ", ne02: " << ne02 << ", ne12: " << ne12 << ", broadcast2: " << broadcast2 << ", broadcast3: " << broadcast3 << ", padded_n: " << padded_n << ")"); | ||
| if (split_k == 1) { | ||
| ggml_pipeline_request_descriptor_sets(ctx, pipeline, CEIL_DIV(batch, ctx->device->properties.limits.maxComputeWorkGroupCount[2])); | ||
|
|
||
| @@ -7366,7 +7366,7 @@ static void ggml_vk_matmul_id( | ||
| uint32_t batch_stride_a, uint32_t batch_stride_b, uint32_t batch_stride_d, | ||
| uint32_t n_as, uint32_t nei0, uint32_t nei1, uint32_t nbi1, uint32_t ne11, | ||
| uint32_t padded_n) { | ||
| - VK_LOG_DEBUG("ggml_vk_matmul_id(a: (" << a.buffer->buffer << ", " << a.offset << ", " << a.size << "), b: (" << b.buffer->buffer << ", " << b.offset << ", " << b.size << "), d: (" << d.buffer->buffer << ", " << d.offset << ", " << d.size << "), ids: (" << ids.buffer->buffer << ", " << ids.offset << ", " << ids.size << "), expert_count: (" << expert_count_buf.buffer->buffer << ", " << expert_count_buf.offset << ", " << expert_count_buf.size << "), " << | ||
| + VK_LOG_DEBUG("ggml_vk_matmul_id(a: (" << static_cast<VkBuffer>(a.buffer->buffer) << ", " << a.offset << ", " << a.size << "), b: (" << static_cast<VkBuffer>(b.buffer->buffer) << ", " << b.offset << ", " << b.size << "), d: (" << static_cast<VkBuffer>(d.buffer->buffer) << ", " << d.offset << ", " << d.size << "), ids: (" << static_cast<VkBuffer>(ids.buffer->buffer) << ", " << ids.offset << ", " << ids.size << "), expert_count: (" << static_cast<VkBuffer>(expert_count_buf.buffer->buffer) << ", " << expert_count_buf.offset << ", " << expert_count_buf.size << "), " << | ||
| "m: " << m << ", n: " << n << ", k: " << k << ", stride_a: " << stride_a << ", stride_b: " << stride_b << ", stride_d: " << stride_d << ", " << | ||
| "batch_stride_a: " << batch_stride_a << ", batch_stride_b: " << batch_stride_b << ", batch_stride_d: " << batch_stride_d << ", " << | ||
| "n_as: " << n_as << ", nei0: " << nei0 << ", nei1: " << nei1 << ", nbi1: " << nbi1 << ", ne11: " << ne11 << ")"); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,66 @@ | ||
| diff --git a/src/ggml-vulkan/ggml-vulkan.cpp b/src/ggml-vulkan/ggml-vulkan.cpp | ||
| index 423e01dbff1..0a7931002ab 100644 | ||
| --- a/src/ggml-vulkan/ggml-vulkan.cpp | ||
| +++ b/src/ggml-vulkan/ggml-vulkan.cpp | ||
| @@ -2149,11 +2149,11 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin | ||
|
|
||
| // Patch SPIR-V to enable RTE rounding for FP16, avoiding the need for | ||
| // separate shader variants compiled with -DRTE16. | ||
| - std::vector<uint32_t> spv; | ||
| + std::vector<uint32_t> spirv; | ||
| if (device->float_controls_rte_fp16) { | ||
| const uint32_t* spv_words = reinterpret_cast<const uint32_t *>(spv_data); | ||
| size_t word_count = spv_size / sizeof(uint32_t); | ||
| - spv.assign(spv_words, spv_words + word_count); | ||
| + spirv.assign(spv_words, spv_words + word_count); | ||
|
|
||
| // Find insertion points respecting SPIR-V layout order: | ||
| // Header(5) -> OpCapability -> OpExtension -> ... -> OpEntryPoint -> OpExecutionMode -> ... | ||
| @@ -2163,9 +2163,9 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin | ||
| size_t exec_insert_pos = pos; | ||
| uint32_t entry_point_id = 0; | ||
|
|
||
| - while (pos < spv.size()) { | ||
| - uint32_t opcode = spv[pos] & spv::OpCodeMask; | ||
| - uint32_t len = spv[pos] >> spv::WordCountShift; | ||
| + while (pos < spirv.size()) { | ||
| + uint32_t opcode = spirv[pos] & spv::OpCodeMask; | ||
| + uint32_t len = spirv[pos] >> spv::WordCountShift; | ||
| if (len == 0) break; | ||
|
|
||
| if (opcode == spv::OpCapability) { | ||
| @@ -2174,7 +2174,7 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin | ||
| } else if (opcode == spv::OpExtension) { | ||
| ext_insert_pos = pos + len; | ||
| } else if (opcode == spv::OpEntryPoint) { | ||
| - entry_point_id = spv[pos + 2]; | ||
| + entry_point_id = spirv[pos + 2]; | ||
| exec_insert_pos = pos + len; | ||
| } else if (opcode == spv::OpExecutionMode || opcode == spv::OpExecutionModeId) { | ||
| exec_insert_pos = pos + len; | ||
| @@ -2189,7 +2189,7 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin | ||
|
|
||
| // OpExecutionMode %entrypoint RoundingModeRTE 16 | ||
| uint32_t exec_mode[] = { (4u << spv::WordCountShift) | spv::OpExecutionMode, entry_point_id, spv::ExecutionModeRoundingModeRTE, 16 }; | ||
| - spv.insert(spv.begin() + exec_insert_pos, std::begin(exec_mode), std::end(exec_mode)); | ||
| + spirv.insert(spirv.begin() + exec_insert_pos, std::begin(exec_mode), std::end(exec_mode)); | ||
|
|
||
| // OpExtension "SPV_KHR_float_controls" | ||
| const char ext_str[] = "SPV_KHR_float_controls"; | ||
| @@ -2197,13 +2197,13 @@ static void ggml_vk_create_pipeline_func(vk_device& device, vk_pipeline& pipelin | ||
| std::vector<uint32_t> extension(1 + ext_str_words, 0); | ||
| extension[0] = (uint32_t)((1 + ext_str_words) << spv::WordCountShift) | spv::OpExtension; | ||
| memcpy(&extension[1], ext_str, sizeof(ext_str)); | ||
| - spv.insert(spv.begin() + ext_insert_pos, extension.begin(), extension.end()); | ||
| + spirv.insert(spirv.begin() + ext_insert_pos, extension.begin(), extension.end()); | ||
|
|
||
| // OpCapability RoundingModeRTE | ||
| uint32_t capability[] = { (2u << spv::WordCountShift) | spv::OpCapability, spv::CapabilityRoundingModeRTE }; | ||
| - spv.insert(spv.begin() + cap_insert_pos, std::begin(capability), std::end(capability)); | ||
| + spirv.insert(spirv.begin() + cap_insert_pos, std::begin(capability), std::end(capability)); | ||
|
|
||
| - shader_module_create_info = vk::ShaderModuleCreateInfo({}, spv.size() * sizeof(uint32_t), spv.data()); | ||
| + shader_module_create_info = vk::ShaderModuleCreateInfo({}, spirv.size() * sizeof(uint32_t), spirv.data()); | ||
| } | ||
|
|
||
| pipeline->shader_module = device->device.createShaderModule(shader_module_create_info); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,16 +1,17 @@ | ||
| vcpkg_from_github( | ||
| OUT_SOURCE_PATH SOURCE_PATH | ||
| REPO ggml-org/ggml | ||
| REF 55bc9320a4aae82af18e23eefd5de319a755d7b9 | ||
| SHA512 9433c9c258bbbfa817051f2ba2a8c8f166ee885c953d3ee27198890d4af8366fdee11ba55514b8b8414c836615e56eceaa98f33a01ecf51846338bc60d34263b | ||
| REF v${VERSION} | ||
| SHA512 c49b6498101f50a5ebde158f5707237f6df3af0182e819651003ec5f418144f9ca68cc8434189f2a658ea705269fad2d263cfe4931c12b369883aa5a66182ed9 | ||
| HEAD_REF master | ||
| PATCHES | ||
| cmake-config.diff | ||
| pkgconfig.diff | ||
| relax-link-options.diff | ||
| vulkan-shaders-gen.diff | ||
| fix-dequant_funcs.diff | ||
| cuda-13-2.diff | ||
| fix-vulkan-spv-shadowing.diff | ||
| fix-vk-32bit.diff | ||
|
Comment on lines
+13
to
+14
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note for reviewers: these patches have been turned into upstream PRs: |
||
| ) | ||
|
|
||
| vcpkg_check_features(OUT_FEATURE_OPTIONS FEATURE_OPTIONS | ||
|
|
||
Oops, something went wrong.
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Uh oh!
There was an error while loading. Please reload this page.