diff --git a/.cargo/config.toml b/.cargo/config.toml new file mode 100644 index 0000000..dd4dfb9 --- /dev/null +++ b/.cargo/config.toml @@ -0,0 +1,4 @@ +[build] +# Limit parallel codegen units to reduce peak memory usage during compilation, +# especially when building candle-flash-attn with CUDA. +jobs = 2 diff --git a/.gitignore b/.gitignore index 41db16e..4252e1a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ __pycache__ *.dsq baselines session +build.log diff --git a/Cargo.lock b/Cargo.lock index 65cb2d8..9055ece 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -39,9 +39,9 @@ dependencies = [ [[package]] name = "aho-corasick" -version = "1.1.3" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +checksum = "ddd31a130427c27518df266943a5308ed92d4b226cc639f5a8f1002816174301" dependencies = [ "memchr", ] @@ -52,12 +52,6 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -69,9 +63,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.21" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43d5b281e737544384e969a5ccad3f1cdd24b48086a0fc1b2a5262a26b8f4f4a" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" dependencies = [ "anstyle", "anstyle-parse", @@ -84,47 +78,44 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.13" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5192cca8006f1fd4f7237516f40fa183bb07f8fbdfedaa0036de5ea9b0b45e78" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" [[package]] name = "anstyle-parse" -version = "0.2.7" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.4" +version = "1.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e231f6134f61b71076a3eab506c379d4f36122f2af15a9ff04415ea4c3339e2" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anstyle-wincon" -version = "3.0.10" +version = "3.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3e0633414522a32ffaac8ac6cc8f748e090c5717661fddeea04219e2344f5f2a" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" dependencies = [ "anstyle", "once_cell_polyfill", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "anyhow" -version = "1.0.100" +version = "1.0.102" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a23eb6b1614318a8071c9b2521f36b424b2c83db5eb3a0fead4a6c0809af6e61" -dependencies = [ - "backtrace", -] +checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c" [[package]] name = "arbitrary" @@ -207,7 +198,7 @@ dependencies = [ "miniz_oxide", "object", "rustc-demangle", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -259,9 +250,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.10.0" +version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "812e12b5285cc515a9c72a5c1d3b6d46a19dac5acfef5265968c166106e31dd3" +checksum = "c4512299f36f043ab09a583e57bceb5a5aab7a73db1805848e8fef3c9e8c78b3" [[package]] name = "block" @@ -289,15 +280,15 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.19.0" +version = "3.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" +checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb" [[package]] name = "bytemuck" -version = "1.24.0" +version = "1.25.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fbdf580320f38b612e485521afda1ee26d10cc9884efaaa750d383e13e3c5f4" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" dependencies = [ "bytemuck_derive", ] @@ -327,9 +318,9 @@ checksum = "8f1fe948ff07f4bd06c30984e69f5b4899c516a3ef74f34df92a2df2ab535495" [[package]] name = "bytes" -version = "1.10.1" +version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71b6127be86fdcfddb610f7182ac57211d4b18a3e9c82eb2d17662f2227ad6a" +checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" [[package]] name = "candle-core" @@ -342,7 +333,7 @@ dependencies = [ "candle-kernels", "candle-metal-kernels", "candle-ug", - "cudarc 0.19.1", + "cudarc 0.19.7", "float8 0.6.1", "gemm 0.19.0", "half", @@ -354,27 +345,37 @@ dependencies = [ "num_cpus", "objc2-foundation", "objc2-metal", - "rand 0.9.2", + "rand 0.9.4", "rand_distr", "rayon", "safetensors 0.7.0", - "thiserror 2.0.17", - "yoke 0.8.1", - "zip 7.4.0", + "thiserror 2.0.18", + "yoke 0.8.2", + "zip 7.2.0", ] [[package]] name = "candle-flash-attn" -version = "0.9.1" +version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb38a5bfae09c4ae73fd00039e5eaf97a7d6d9400cc35ee8e603fc4a5f9cb0a3" +checksum = "c94ddd2e7bb828777b0a8d999ed40d2d6c3c96c9ef2a3111a69e0d96efc436d2" dependencies = [ "anyhow", "bindgen_cuda", "candle-core", + "candle-flash-attn-build", "half", ] +[[package]] +name = "candle-flash-attn-build" +version = "0.9.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4bd79da06f2a3b831cb4f5a1ee393d6f2c5a913e28f5000c678a84108519a78c" +dependencies = [ + "anyhow", +] + [[package]] name = "candle-kernels" version = "0.9.2" @@ -395,7 +396,7 @@ dependencies = [ "objc2-foundation", "objc2-metal", "once_cell", - "thiserror 2.0.17", + "thiserror 2.0.18", "tracing", ] @@ -416,7 +417,7 @@ dependencies = [ "rayon", "safetensors 0.7.0", "serde", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -441,9 +442,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.41" +version = "1.2.62" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac9fe6cdbb24b6ade63616c0a0688e45bb56732262c158df3c0c4bea4ca47cb7" +checksum = "a1dce859f0832a7d088c4f1119888ab94ef4b5d6795d1ce05afb7fe159d79f98" dependencies = [ "find-msvc-tools", "shlex", @@ -463,23 +464,22 @@ checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] name = "chrono" -version = "0.4.41" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "c673075a2e0e5f4a1dde27ce9dee1ea4558c7ffe648f576438a20ca1d2acc4b0" dependencies = [ - "android-tzdata", "iana-time-zone", "js-sys", "num-traits", "wasm-bindgen", - "windows-link 0.1.3", + "windows-link", ] [[package]] name = "clap" -version = "4.5.50" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2cfd7bf8a6017ddaa4e32ffe7403d547790db06bd171c1c53926faab501623" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" dependencies = [ "clap_builder", "clap_derive", @@ -487,9 +487,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.50" +version = "4.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a4c05b9e80c5ccd3a7ef080ad7b6ba7d6fc00a985b8b157197075677c82c7a0" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" dependencies = [ "anstream", "anstyle", @@ -499,9 +499,9 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.49" +version = "4.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a0b5487afeab2deb2ff4e03a807ad1a03ac532ff5a2cee5d86884440c7f7671" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" dependencies = [ "heck", "proc-macro2", @@ -511,15 +511,15 @@ dependencies = [ [[package]] name = "clap_lex" -version = "0.7.6" +version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1d728cc89cf3aee9ff92b05e62b19ee65a02b5702cff7d5a377e32c6ae29d8d" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" [[package]] name = "colorchoice" -version = "1.0.4" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" [[package]] name = "compact_str" @@ -549,6 +549,18 @@ dependencies = [ "windows-sys 0.59.0", ] +[[package]] +name = "console" +version = "0.16.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d64e8af5551369d19cf50138de61f1c42074ab970f74e99be916646777f8fc87" +dependencies = [ + "encode_unicode", + "libc", + "unicode-width", + "windows-sys 0.61.2", +] + [[package]] name = "cookie" version = "0.18.1" @@ -638,9 +650,9 @@ checksum = "460fbee9c2c2f33933d720630a6a0bac33ba7053db5344fac858d4b8952d77d5" [[package]] name = "crypto-common" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" +checksum = "78c8292055d1c1df0cce5d180393dc8cce0abec0a7102adb6c7b1eef6016d60a" dependencies = [ "generic-array", "typenum", @@ -658,9 +670,8 @@ dependencies = [ [[package]] name = "cudarc" -version = "0.19.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60d4882b3e023670c25b8a3e1ac97349070f8e5400807c091b70e0bdad6e9b93" +version = "0.19.7" +source = "git+https://github.com/coreylowman/cudarc?branch=main#3e5d38b5fe5ec81c934bdc2c7207f181772e307d" dependencies = [ "float8 0.7.0", "half", @@ -704,9 +715,9 @@ dependencies = [ [[package]] name = "dary_heap" -version = "0.3.8" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06d2e3287df1c007e74221c49ca10a95d557349e54b3a75dc2fb14712c751f04" +checksum = "8b1e3a325bc115f096c8b77bbf027a7c2592230e70be2d985be950d3d5e60ebe" dependencies = [ "serde", ] @@ -719,7 +730,7 @@ dependencies = [ "deepseek-ocr-core", "deepseek-ocr-infer-deepseek", "hf-hub", - "indicatif", + "indicatif 0.17.11", "once_cell", "reqwest 0.11.27", "serde", @@ -776,7 +787,7 @@ dependencies = [ "ndarray", "ndarray-npy", "once_cell", - "rand 0.8.5", + "rand 0.8.6", "rayon", "serde", "serde_json", @@ -804,7 +815,7 @@ dependencies = [ "deepseek-ocr-dsq-models", "deepseek-ocr-dsq-writer", "half", - "indicatif", + "indicatif 0.17.11", "memmap2", "rayon", "safetensors 0.4.5", @@ -862,7 +873,7 @@ dependencies = [ "ndarray", "ndarray-npy", "once_cell", - "rand 0.8.5", + "rand 0.8.6", "rayon", "serde", "serde_json", @@ -919,7 +930,7 @@ dependencies = [ "memmap2", "ndarray", "ndarray-npy", - "rand 0.8.5", + "rand 0.8.6", "safetensors 0.4.5", "serde", "serde_json", @@ -943,7 +954,7 @@ dependencies = [ "deepseek-ocr-infer-glm", "deepseek-ocr-infer-paddleocr", "image", - "reqwest 0.12.24", + "reqwest 0.12.28", "rocket", "serde", "serde_json", @@ -957,9 +968,9 @@ dependencies = [ [[package]] name = "deranged" -version = "0.5.4" +version = "0.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a41953f86f8a05768a6cda24def994fd2f424b04ec5c719cf89989779f199071" +checksum = "7cd812cc2bc1d69d4764bd80df88b4317eaef9e773c75226407d9bc0876b211c" dependencies = [ "powerfmt", ] @@ -1032,7 +1043,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b035a542cf7abf01f2e3c4d5a7acbaebfefe120ae4efc7bde3df98186e4b8af7" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.1", "proc-macro2", "proc-macro2-diagnostics", "quote", @@ -1113,16 +1124,16 @@ dependencies = [ "libc", "option-ext", "redox_users 0.5.2", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] name = "dispatch2" -version = "0.3.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89a09f22a6c6069a18470eb92d2298acf25463f14256d24778e1230d789a2aec" +checksum = "1e0e367e4e7da84520dedcac1901e4da967309406d1e51017ae1abfb97adbd38" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.1", "objc2", ] @@ -1208,7 +1219,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -1222,21 +1233,21 @@ dependencies = [ [[package]] name = "fast_image_resize" -version = "5.3.0" +version = "5.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bd1eda71e8af93f8b00e189404235d82f4de77ea4a0d182b44a7f03994d647c" +checksum = "fbc7fe45cf92b43817ff62a3723e862b85bd1d06288f63007f7645d1d2f7a060" dependencies = [ "cfg-if", "document-features", "num-traits", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] name = "fastrand" -version = "2.3.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" +checksum = "9f1f227452a390804cdb637b74a86990f2a7d7ba4b7d5693aac9b4dd6defd8d6" [[package]] name = "fdeflate" @@ -1263,27 +1274,25 @@ dependencies = [ [[package]] name = "filetime" -version = "0.2.25" +version = "0.2.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586" +checksum = "5c287a33c7f0a620c38e641e7f60827713987b3c0f26e8ddc9462cc69cf75759" dependencies = [ "cfg-if", "libc", - "libredox", - "windows-sys 0.59.0", ] [[package]] name = "find-msvc-tools" -version = "0.1.4" +version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52051878f80a721bb68ebfbc930e07b65ba72f2da88968ea5c06fd6ca3d3a127" +checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582" [[package]] name = "flate2" -version = "1.1.4" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dc5a4e564e38c699f2880d3fda590bedc2e69f3f84cd48b457bd892ce61d0aa9" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ "crc32fast", "miniz_oxide", @@ -1295,10 +1304,10 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "719a903cc23e4a89e87962c2a80fdb45cdaad0983a89bd150bb57b4c8571a7d5" dependencies = [ - "cudarc 0.19.1", + "cudarc 0.19.7", "half", "num-traits", - "rand 0.9.2", + "rand 0.9.4", "rand_distr", ] @@ -1317,6 +1326,12 @@ version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" +[[package]] +name = "foldhash" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" + [[package]] name = "foldhash" version = "0.2.0" @@ -1361,9 +1376,9 @@ dependencies = [ [[package]] name = "futures" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "65bc07b1a8bc7c85c5f2e110c476c7389b4554ba72af57d8445ea63a576b0876" +checksum = "8b147ee9d1f6d097cef9ce628cd2ee62288d963e16fb287bd9286455b241382d" dependencies = [ "futures-channel", "futures-core", @@ -1375,9 +1390,9 @@ dependencies = [ [[package]] name = "futures-channel" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2dff15bf788c671c1934e366d07e30c1814a8ef514e1af724a602e8a2fbe1b10" +checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d" dependencies = [ "futures-core", "futures-sink", @@ -1385,21 +1400,21 @@ dependencies = [ [[package]] name = "futures-core" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05f29059c0c2090612e8d742178b0580d2dc940c837851ad723096f87af6663e" +checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d" [[package]] name = "futures-io" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9e5c1b78ca4aae1ac06c48a526a655760685149f0d465d21f37abfe57ce075c6" +checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718" [[package]] name = "futures-macro" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" +checksum = "e835b70203e41293343137df5c0664546da5745f82ec9b84d40be8336958447b" dependencies = [ "proc-macro2", "quote", @@ -1408,21 +1423,21 @@ dependencies = [ [[package]] name = "futures-sink" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e575fab7d1e0dcb8d0c7bcf9a63ee213816ab51902e6d244a95819acacf1d4f7" +checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893" [[package]] name = "futures-task" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f90f7dce0722e95104fcb095585910c0977252f286e354b5e3bd38902cd99988" +checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393" [[package]] name = "futures-util" -version = "0.3.31" +version = "0.3.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9fa08315bb612088cc391249efdc3bc77536f16c91f6cf495e6fbe85b20a4a81" +checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6" dependencies = [ "futures-channel", "futures-core", @@ -1432,7 +1447,6 @@ dependencies = [ "futures-task", "memchr", "pin-project-lite", - "pin-utils", "slab", ] @@ -1689,9 +1703,9 @@ dependencies = [ [[package]] name = "generic-array" -version = "0.14.9" +version = "0.14.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bb6743198531e02858aeaea5398fcc883e71851fcbcb5a2f773e2fb6cb1edf2" +checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" dependencies = [ "typenum", "version_check", @@ -1699,9 +1713,9 @@ dependencies = [ [[package]] name = "getrandom" -version = "0.2.16" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "335ff9f135e4384c8150d6f27c6daed433577f86b4750418338c01a1a2528592" +checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0" dependencies = [ "cfg-if", "js-sys", @@ -1719,11 +1733,24 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "r-efi", + "r-efi 5.3.0", "wasip2", "wasm-bindgen", ] +[[package]] +name = "getrandom" +version = "0.4.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555" +dependencies = [ + "cfg-if", + "libc", + "r-efi 6.0.0", + "wasip2", + "wasip3", +] + [[package]] name = "getset" version = "0.1.6" @@ -1777,23 +1804,39 @@ dependencies = [ "cfg-if", "crunchy", "num-traits", - "rand 0.9.2", + "rand 0.9.4", "rand_distr", "zerocopy", ] [[package]] name = "hashbrown" -version = "0.16.0" +version = "0.15.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5419bdc4f6a9207fbeba6d11b604d481addf78ecd10c11ad51e76c2f6482748d" +checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" +dependencies = [ + "foldhash 0.1.5", +] + +[[package]] +name = "hashbrown" +version = "0.16.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.2.0", "serde", + "serde_core", ] +[[package]] +name = "hashbrown" +version = "0.17.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed5909b6e89a2db4456e54cd5f673791d7eca6732202bbf2a9cc504fe2f9b84a" + [[package]] name = "heck" version = "0.5.0" @@ -1813,15 +1856,15 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97" dependencies = [ "dirs 6.0.0", - "http 1.3.1", - "indicatif", + "http 1.4.0", + "indicatif 0.17.11", "libc", "log", - "rand 0.9.2", - "reqwest 0.12.24", + "rand 0.9.4", + "reqwest 0.12.28", "serde", "serde_json", - "thiserror 2.0.17", + "thiserror 2.0.18", "ureq", "windows-sys 0.60.2", ] @@ -1839,12 +1882,11 @@ dependencies = [ [[package]] name = "http" -version = "1.3.1" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4a85d31aea989eead29a3aaf9e1115a180df8282431156e533de47660892565" +checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a" dependencies = [ "bytes", - "fnv", "itoa", ] @@ -1866,7 +1908,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http 1.3.1", + "http 1.4.0", ] [[package]] @@ -1877,7 +1919,7 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "pin-project-lite", ] @@ -1920,20 +1962,19 @@ dependencies = [ [[package]] name = "hyper" -version = "1.7.0" +version = "1.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eb3aa54a13a0dfe7fbe3a59e0c76093041720fdc77b110cc0fc260fafb4dc51e" +checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca" dependencies = [ "atomic-waker", "bytes", "futures-channel", "futures-core", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "httparse", "itoa", "pin-project-lite", - "pin-utils", "smallvec", "tokio", "want", @@ -1955,40 +1996,38 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.7" +version = "0.27.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" +checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f" dependencies = [ - "http 1.3.1", - "hyper 1.7.0", + "http 1.4.0", + "hyper 1.9.0", "hyper-util", - "rustls 0.23.34", - "rustls-pki-types", + "rustls 0.23.40", "tokio", "tokio-rustls 0.26.4", "tower-service", - "webpki-roots 1.0.3", + "webpki-roots 1.0.7", ] [[package]] name = "hyper-util" -version = "0.1.17" +version = "0.1.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c6995591a8f1380fcb4ba966a252a4b29188d51d2b89e3a252f5305be65aea8" +checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0" dependencies = [ "base64 0.22.1", "bytes", "futures-channel", - "futures-core", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", - "hyper 1.7.0", + "hyper 1.9.0", "ipnet", "libc", "percent-encoding", "pin-project-lite", - "socket2 0.6.1", + "socket2 0.6.3", "tokio", "tower-service", "tracing", @@ -1996,9 +2035,9 @@ dependencies = [ [[package]] name = "iana-time-zone" -version = "0.1.63" +version = "0.1.65" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0c919e5debc312ad217002b8048a17b7d83f80703865bbfcfebb0458b0b27d8" +checksum = "e31bc9ad994ba00e440a8aa5c9ef0ec67d5cb5e5cb0cc7f8b744a35b389cc470" dependencies = [ "android_system_properties", "core-foundation-sys", @@ -2020,22 +2059,23 @@ dependencies = [ [[package]] name = "icu_collections" -version = "2.0.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "200072f5d0e3614556f94a9930d5dc3e0662a652823904c3a75dc3b0af7fee47" +checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c" dependencies = [ "displaydoc", "potential_utf", - "yoke 0.8.1", + "utf8_iter", + "yoke 0.8.2", "zerofrom", "zerovec", ] [[package]] name = "icu_locale_core" -version = "2.0.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cde2700ccaed3872079a65fb1a78f6c0a36c91570f28755dda67bc8f7d9f00a" +checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29" dependencies = [ "displaydoc", "litemap", @@ -2046,11 +2086,10 @@ dependencies = [ [[package]] name = "icu_normalizer" -version = "2.0.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "436880e8e18df4d7bbc06d58432329d6458cc84531f7ac5f024e93deadb37979" +checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4" dependencies = [ - "displaydoc", "icu_collections", "icu_normalizer_data", "icu_properties", @@ -2061,49 +2100,51 @@ dependencies = [ [[package]] name = "icu_normalizer_data" -version = "2.0.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" +checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38" [[package]] name = "icu_properties" -version = "2.0.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" +checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de" dependencies = [ - "displaydoc", "icu_collections", "icu_locale_core", "icu_properties_data", "icu_provider", - "potential_utf", "zerotrie", "zerovec", ] [[package]] name = "icu_properties_data" -version = "2.0.1" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" +checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14" [[package]] name = "icu_provider" -version = "2.0.0" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03c80da27b5f4187909049ee2d72f276f0d9f99a42c306bd0131ecfe04d8e5af" +checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421" dependencies = [ "displaydoc", "icu_locale_core", - "stable_deref_trait", - "tinystr", "writeable", - "yoke 0.8.1", + "yoke 0.8.2", "zerofrom", "zerotrie", "zerovec", ] +[[package]] +name = "id-arena" +version = "2.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954" + [[package]] name = "ident_case" version = "1.0.1" @@ -2123,9 +2164,9 @@ dependencies = [ [[package]] name = "idna_adapter" -version = "1.2.1" +version = "1.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3acae9609540aa318d1bc588455225fb2085b9ed0c4f6bd0d9d5bcd86f1a0344" +checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714" dependencies = [ "icu_normalizer", "icu_properties", @@ -2133,9 +2174,9 @@ dependencies = [ [[package]] name = "image" -version = "0.25.8" +version = "0.25.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "529feb3e6769d234375c4cf1ee2ce713682b8e76538cb13f9fc23e1400a591e7" +checksum = "85ab80394333c02fe689eaf900ab500fbd0c2213da414687ebf995a65d5a6104" dependencies = [ "bytemuck", "byteorder-lite", @@ -2148,12 +2189,12 @@ dependencies = [ [[package]] name = "indexmap" -version = "2.12.0" +version = "2.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6717a8d2a5a929a1a2eb43a12812498ed141a0bcfb7e8f7844fbdbe4303bba9f" +checksum = "d466e9454f08e4a911e14806c24e16fba1b4c121d1ea474396f396069cf949d9" dependencies = [ "equivalent", - "hashbrown", + "hashbrown 0.17.1", "serde", "serde_core", ] @@ -2164,13 +2205,26 @@ version = "0.17.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" dependencies = [ - "console", + "console 0.15.11", "number_prefix", "portable-atomic", "unicode-width", "web-time", ] +[[package]] +name = "indicatif" +version = "0.18.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25470f23803092da7d239834776d653104d551bc4d7eacaf31e6837854b8e9eb" +dependencies = [ + "console 0.16.3", + "portable-atomic", + "unicode-width", + "unit-prefix", + "web-time", +] + [[package]] name = "inlinable_string" version = "0.1.15" @@ -2201,19 +2255,9 @@ dependencies = [ [[package]] name = "ipnet" -version = "2.11.0" +version = "2.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" - -[[package]] -name = "iri-string" -version = "0.7.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dbc5ebe9c3a1a7a5127f920a418f7585e9e758e911d0466ed004f393b0e380b2" -dependencies = [ - "memchr", - "serde", -] +checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2" [[package]] name = "is-terminal" @@ -2223,7 +2267,7 @@ checksum = "3640c1c38b8e4e43584d8df18be5fc6b0aa314ce6ebf51b53313d4306cca8e46" dependencies = [ "hermit-abi", "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -2243,16 +2287,18 @@ dependencies = [ [[package]] name = "itoa" -version = "1.0.15" +version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" +checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682" [[package]] name = "js-sys" -version = "0.3.81" +version = "0.3.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec48937a97411dcb524a265206ccd4c90bb711fca92b2792c407f268825b9305" +checksum = "67df7112613f8bfd9150013a0314e196f4800d3201ae742489d999db2f979f08" dependencies = [ + "cfg-if", + "futures-util", "once_cell", "wasm-bindgen", ] @@ -2263,11 +2309,17 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +[[package]] +name = "leb128fmt" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "09edd9e8b54e49e587e4f6295a7d29c3ea94d469cb40ab8ca70b288248a81db2" + [[package]] name = "libc" -version = "0.2.177" +version = "0.2.186" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +checksum = "68ab91017fe16c622486840e4c83c9a37afeff978bd239b5293d61ece587de66" [[package]] name = "libloading" @@ -2276,7 +2328,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d7c4b02199fee7c5d21a5ae7d8cfa79a6ef5bb2fc834d6e9058e89c825efdc55" dependencies = [ "cfg-if", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -2286,37 +2338,35 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "754ca22de805bb5744484a5b151a9e1a8e837d5dc232c2d7d8c2e3492edc8b60" dependencies = [ "cfg-if", - "windows-link 0.2.1", + "windows-link", ] [[package]] name = "libm" -version = "0.2.15" +version = "0.2.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9fbbcab51052fe104eb5e5d351cf728d30a5be1fe14d9be8a3b097481fb97de" +checksum = "b6d2cec3eae94f9f509c767b45932f1ada8350c4bdb85af2fcab4a3c14807981" [[package]] name = "libredox" -version = "0.1.10" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "416f7e718bdb06000964960ffa43b4335ad4012ae8b99060261aa4a8088d5ccb" +checksum = "e02f3bb43d335493c96bf3fd3a321600bf6bd07ed34bc64118e9293bdffea46c" dependencies = [ - "bitflags 2.10.0", "libc", - "redox_syscall", ] [[package]] name = "linux-raw-sys" -version = "0.11.0" +version = "0.12.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df1d3c3b53da64cf5760482273a98e575c651a67eec7f77df96b5b642de8f039" +checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53" [[package]] name = "litemap" -version = "0.8.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" +checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0" [[package]] name = "litrs" @@ -2335,9 +2385,9 @@ dependencies = [ [[package]] name = "log" -version = "0.4.28" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34080505efa8e45a4b816c349525ebe327ceaa8559756f0356cba97ef3bf7432" +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "loom" @@ -2406,15 +2456,15 @@ dependencies = [ [[package]] name = "memchr" -version = "2.7.6" +version = "2.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f52b00d39961fc5b2736ea853c9cc86238e165017a493d1d5c8eac6bdc4cc273" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "memmap2" -version = "0.9.8" +version = "0.9.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" +checksum = "714098028fe011992e1c3962653c96b2d578c4b4bce9036e15ff220319b1e0e3" dependencies = [ "libc", "stable_deref_trait", @@ -2426,7 +2476,7 @@ version = "0.29.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ecfd3296f8c56b7c1f6fbac3c71cefa9d78ce009850c45000015f206dc7fa21" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.1", "block", "core-graphics-types", "foreign-types", @@ -2465,9 +2515,9 @@ checksum = "c505b3e17ed6b70a7ed2e67fbb2c560ee327353556120d6e72f5232b6880d536" [[package]] name = "mio" -version = "1.1.0" +version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69d83b0086dc8ecf3ce9ae2874b2d1290252e2a30720bea58a5c6639b0092873" +checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1" dependencies = [ "libc", "wasi", @@ -2498,9 +2548,9 @@ dependencies = [ [[package]] name = "moxcms" -version = "0.7.7" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c588e11a3082784af229e23e8e4ecf5bcc6fbe4f69101e0421ce8d79da7f0b40" +checksum = "bb85c154ba489f01b25c0d36ae69a87e4a1c73a72631fc6c0eb6dde34a73e44b" dependencies = [ "num-traits", "pxfm", @@ -2515,7 +2565,7 @@ dependencies = [ "bytes", "encoding_rs", "futures-util", - "http 1.3.1", + "http 1.4.0", "httparse", "memchr", "mime", @@ -2610,9 +2660,9 @@ dependencies = [ [[package]] name = "num-conv" -version = "0.1.0" +version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +checksum = "c6673768db2d862beb9b39a78fdcb1a69439615d5794a1be50caa9bc92c81967" [[package]] name = "num-integer" @@ -2682,9 +2732,9 @@ dependencies = [ [[package]] name = "objc2" -version = "0.6.3" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b7c2599ce0ec54857b29ce62166b0ed9b4f6f1a70ccc9a71165b6154caca8c05" +checksum = "3a12a8ed07aefc768292f076dc3ac8c48f3781c8f2d5851dd3d98950e8c5a89f" dependencies = [ "objc2-encode", ] @@ -2695,7 +2745,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.1", "dispatch2", "objc2", ] @@ -2712,7 +2762,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3e0adef53c21f888deb4fa59fc59f7eb17404926ee8a6f59f5df0fd7f9f3272" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.1", "block2", "libc", "objc2", @@ -2725,7 +2775,7 @@ version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a0125f776a10d00af4152d74616409f0d4a2053a6f57fa5b7d6aa2854ac04794" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.1", "block2", "dispatch2", "objc2", @@ -2786,9 +2836,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.21.3" +version = "1.21.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +checksum = "9f7c3e4beb33f85d45ae3e3a1792185706c8e16d043238c593331cc7cd313b50" [[package]] name = "once_cell_polyfill" @@ -2798,11 +2848,11 @@ checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" [[package]] name = "onig" -version = "6.5.1" +version = "6.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" +checksum = "0cc3cbf698f9438986c11a880c90a6d04b9de27575afd28bbf45b154b6c709e2" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.1", "libc", "once_cell", "onig_sys", @@ -2810,9 +2860,9 @@ dependencies = [ [[package]] name = "onig_sys" -version = "69.9.1" +version = "69.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc" +checksum = "1e68317604e77e53b85896388e1a803c1d21b74c899ec9e5e1112db90735edd7" dependencies = [ "cc", "pkg-config", @@ -2850,7 +2900,7 @@ dependencies = [ "libc", "redox_syscall", "smallvec", - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -2890,9 +2940,9 @@ checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] name = "pest" -version = "2.8.3" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "989e7521a040efde50c3ab6bbadafbe15ab6dc042686926be59ac35d74607df4" +checksum = "e0848c601009d37dfa3430c4666e147e49cdcf1b92ecd3e63657d8a5f19da662" dependencies = [ "memchr", "ucd-trie", @@ -2900,9 +2950,9 @@ dependencies = [ [[package]] name = "pest_derive" -version = "2.8.3" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "187da9a3030dbafabbbfb20cb323b976dc7b7ce91fcd84f2f74d6e31d378e2de" +checksum = "11f486f1ea21e6c10ed15d5a7c77165d0ee443402f0780849d1768e7d9d6fe77" dependencies = [ "pest", "pest_generator", @@ -2910,9 +2960,9 @@ dependencies = [ [[package]] name = "pest_generator" -version = "2.8.3" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b401d98f5757ebe97a26085998d6c0eecec4995cad6ab7fc30ffdf4b052843" +checksum = "8040c4647b13b210a963c1ed407c1ff4fdfa01c31d6d2a098218702e6664f94f" dependencies = [ "pest", "pest_meta", @@ -2923,9 +2973,9 @@ dependencies = [ [[package]] name = "pest_meta" -version = "2.8.3" +version = "2.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72f27a2cfee9f9039c4d86faa5af122a0ac3851441a34865b8a043b46be0065a" +checksum = "89815c69d36021a140146f26659a81d6c2afa33d216d736dd4be5381a7362220" dependencies = [ "pest", "sha2", @@ -2933,29 +2983,23 @@ dependencies = [ [[package]] name = "pin-project-lite" -version = "0.2.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b3cff922bd51709b605d9ead9aa71031d81447142d828eb4a6eba76fe619f9b" - -[[package]] -name = "pin-utils" -version = "0.1.0" +version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd" [[package]] name = "pkg-config" -version = "0.3.32" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" +checksum = "19f132c84eca552bf34cab8ec81f1c1dcc229b811638f9d283dceabe58c5569e" [[package]] name = "png" -version = "0.18.0" +version = "0.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97baced388464909d42d89643fe4361939af9b7ce7a31ee32a168f832a70f2a0" +checksum = "60769b8b31b2a9f263dae2776c37b1b28ae246943cf719eb6946a1db05128a61" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.1", "crc32fast", "fdeflate", "flate2", @@ -2964,24 +3008,24 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.11.1" +version = "1.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f84267b20a16ea918e43c6a88433c2d54fa145c92a811b5b047ccbe153674483" +checksum = "c33a9471896f1c69cecef8d20cbe2f7accd12527ce60845ff44c153bb2a21b49" [[package]] name = "portable-atomic-util" -version = "0.2.4" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8a2f0d8d040d7848a709caf78912debcc3f33ee4b3cac47d73d1e1069e83507" +checksum = "c2a106d1259c23fac8e543272398ae0e3c0b8d33c88ed73d0cc71b0f1d902618" dependencies = [ "portable-atomic", ] [[package]] name = "potential_utf" -version = "0.1.3" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84df19adbe5b5a0782edcab45899906947ab039ccf4573713735ee7de1e6b08a" +checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564" dependencies = [ "zerovec", ] @@ -3001,6 +3045,16 @@ dependencies = [ "zerocopy", ] +[[package]] +name = "prettyplease" +version = "0.2.37" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "479ca8adacdd7ce8f1fb39ce9ecccbfe93a3f1344b3d0d97f20bc0196208f62b" +dependencies = [ + "proc-macro2", + "syn", +] + [[package]] name = "proc-macro-error-attr2" version = "2.0.0" @@ -3025,9 +3079,9 @@ dependencies = [ [[package]] name = "proc-macro2" -version = "1.0.101" +version = "1.0.106" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89ae43fd86e4158d6db51ad8e2b80f313af9cc74f5c0e03ccb87de09998732de" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" dependencies = [ "unicode-ident", ] @@ -3084,12 +3138,9 @@ checksum = "40e24eee682d89fb193496edf918a7f407d30175b2e785fe057e4392dfd182e0" [[package]] name = "pxfm" -version = "0.1.25" +version = "0.1.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3cbdf373972bf78df4d3b518d07003938e2c7d1fb5891e55f9cb6df57009d84" -dependencies = [ - "num-traits", -] +checksum = "e0c5ccf5294c6ccd63a74f1565028353830a9c2f5eb0c682c355c471726a6e3f" [[package]] name = "py_literal" @@ -3115,10 +3166,10 @@ dependencies = [ "pin-project-lite", "quinn-proto", "quinn-udp", - "rustc-hash 2.1.1", - "rustls 0.23.34", - "socket2 0.6.1", - "thiserror 2.0.17", + "rustc-hash 2.1.2", + "rustls 0.23.40", + "socket2 0.6.3", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -3126,20 +3177,20 @@ dependencies = [ [[package]] name = "quinn-proto" -version = "0.11.13" +version = "0.11.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1906b49b0c3bc04b5fe5d86a77925ae6524a19b816ae38ce1e426255f1d8a31" +checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098" dependencies = [ "bytes", "getrandom 0.3.4", "lru-slab", - "rand 0.9.2", + "rand 0.9.4", "ring", - "rustc-hash 2.1.1", - "rustls 0.23.34", + "rustc-hash 2.1.2", + "rustls 0.23.40", "rustls-pki-types", "slab", - "thiserror 2.0.17", + "thiserror 2.0.18", "tinyvec", "tracing", "web-time", @@ -3154,16 +3205,16 @@ dependencies = [ "cfg_aliases", "libc", "once_cell", - "socket2 0.6.1", + "socket2 0.6.3", "tracing", "windows-sys 0.60.2", ] [[package]] name = "quote" -version = "1.0.41" +version = "1.0.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce25767e7b499d1b604768e7cde645d14cc8584231ea6b295e9c9eb22c02e1d1" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" dependencies = [ "proc-macro2", ] @@ -3174,11 +3225,17 @@ version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" +[[package]] +name = "r-efi" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf" + [[package]] name = "rand" -version = "0.8.5" +version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404" +checksum = "5ca0ecfa931c29007047d1bc58e623ab12e5590e8c7cc53200d5202b69266d8a" dependencies = [ "libc", "rand_chacha 0.3.1", @@ -3187,12 +3244,12 @@ dependencies = [ [[package]] name = "rand" -version = "0.9.2" +version = "0.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6db2770f06117d490610c7488547d543617b21bfa07796d7a12f6f1bd53850d1" +checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea" dependencies = [ "rand_chacha 0.9.0", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -3212,7 +3269,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" dependencies = [ "ppv-lite86", - "rand_core 0.9.3", + "rand_core 0.9.5", ] [[package]] @@ -3221,14 +3278,14 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", ] [[package]] name = "rand_core" -version = "0.9.3" +version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99d9a13982dcf210057a8a78572b2217b667c3beacbf3a0d8b454f6f82837d38" +checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c" dependencies = [ "getrandom 0.3.4", ] @@ -3240,7 +3297,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6a8615d50dcf34fa31f7ab52692afec947c4dd0ab803cc87cb3b0b4570ff7463" dependencies = [ "num-traits", - "rand 0.9.2", + "rand 0.9.4", ] [[package]] @@ -3249,7 +3306,7 @@ version = "11.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "498cd0dc59d73224351ee52a95fee0f1a617a2eae0e7d9d720cc622c73a54186" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.1", ] [[package]] @@ -3260,9 +3317,9 @@ checksum = "60a357793950651c4ed0f3f52338f53b2f809f32d83a07f72909fa13e4c6c1e3" [[package]] name = "rayon" -version = "1.11.0" +version = "1.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "368f01d005bf8fd9b1206fb6fa653e6c4a81ceb1466406b81792d87c5677a58f" +checksum = "fb39b166781f92d482534ef4b4b1b2568f42613b53e5b6c160e24cfbfa30926d" dependencies = [ "either", "rayon-core", @@ -3301,7 +3358,7 @@ version = "0.5.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.1", ] [[package]] @@ -3310,7 +3367,7 @@ version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "libredox", "thiserror 1.0.69", ] @@ -3321,9 +3378,9 @@ version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ - "getrandom 0.2.16", + "getrandom 0.2.17", "libredox", - "thiserror 2.0.17", + "thiserror 2.0.18", ] [[package]] @@ -3348,14 +3405,14 @@ dependencies = [ [[package]] name = "regex" -version = "1.12.2" +version = "1.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843bc0191f75f3e22651ae5f1e72939ab2f72a4bc30fa80a066bd66edefc24d4" +checksum = "e10754a14b9137dd7b1e3e5b0493cc9171fdd105e0ab477f51b72e7f3ac0e276" dependencies = [ "aho-corasick", "memchr", - "regex-automata 0.4.13", - "regex-syntax 0.8.8", + "regex-automata 0.4.14", + "regex-syntax 0.8.10", ] [[package]] @@ -3369,13 +3426,13 @@ dependencies = [ [[package]] name = "regex-automata" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5276caf25ac86c8d810222b3dbb938e512c55c6831a10f3e6ed1c93b84041f1c" +checksum = "6e1dd4122fc1595e8162618945476892eefca7b88c52820e74af6262213cae8f" dependencies = [ "aho-corasick", "memchr", - "regex-syntax 0.8.8", + "regex-syntax 0.8.10", ] [[package]] @@ -3386,9 +3443,9 @@ checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" [[package]] name = "regex-syntax" -version = "0.8.8" +version = "0.8.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" +checksum = "dc897dd8d9e8bd1ed8cdad82b5966c3e0ecae09fb1907d58efaa013543185d0a" [[package]] name = "reqwest" @@ -3433,27 +3490,27 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.24" +version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d0946410b9f7b082a427e4ef5c8ff541a88b357bc6c637c40db3a68ac70a36f" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ "base64 0.22.1", "bytes", "futures-channel", "futures-core", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", "http-body-util", - "hyper 1.7.0", - "hyper-rustls 0.27.7", + "hyper 1.9.0", + "hyper-rustls 0.27.9", "hyper-util", "js-sys", "log", "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.34", + "rustls 0.23.40", "rustls-pki-types", "serde", "serde_json", @@ -3470,7 +3527,7 @@ dependencies = [ "wasm-bindgen-futures", "wasm-streams", "web-sys", - "webpki-roots 1.0.3", + "webpki-roots 1.0.7", ] [[package]] @@ -3481,7 +3538,7 @@ checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7" dependencies = [ "cc", "cfg-if", - "getrandom 0.2.16", + "getrandom 0.2.17", "libc", "untrusted", "windows-sys 0.52.0", @@ -3508,7 +3565,7 @@ dependencies = [ "num_cpus", "parking_lot", "pin-project-lite", - "rand 0.8.5", + "rand 0.8.6", "ref-cast", "rocket_codegen", "rocket_http", @@ -3571,9 +3628,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.26" +version = "0.1.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f7d92ca342cea22a06f2121d944b4fd82af56988c270852495420f961d4ace" +checksum = "b50b8869d9fc858ce7266cce0194bd74df58b9d0e3f6df3a9fc8eb470d95c09d" [[package]] name = "rustc-hash" @@ -3583,21 +3640,21 @@ checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2" [[package]] name = "rustc-hash" -version = "2.1.1" +version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "357703d41365b4b27c590e3ed91eabb1b663f07c4c084095e60cbed4362dff0d" +checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe" [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "b6fe4565b9518b83ef4f91bb47ce29620ca828bd32cb7e408f0062e9930ba190" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.1", "errno", "libc", "linux-raw-sys", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -3614,15 +3671,15 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.34" +version = "0.23.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a9586e9ee2b4f8fab52a0048ca7334d7024eef48e2cb9407e3497bb7cab7fa7" +checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b" dependencies = [ "log", "once_cell", "ring", "rustls-pki-types", - "rustls-webpki 0.103.7", + "rustls-webpki 0.103.13", "subtle", "zeroize", ] @@ -3638,9 +3695,9 @@ dependencies = [ [[package]] name = "rustls-pki-types" -version = "1.12.0" +version = "1.14.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "229a4a4c221013e7e1f1a043678c5cc39fe5171437c88fb47151a21e6f5b5c79" +checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9" dependencies = [ "web-time", "zeroize", @@ -3658,9 +3715,9 @@ dependencies = [ [[package]] name = "rustls-webpki" -version = "0.103.7" +version = "0.103.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e10b3f4191e8a80e6b43eebabfac91e5dcecebb27a71f04e820c47ec41d314bf" +checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e" dependencies = [ "ring", "rustls-pki-types", @@ -3675,9 +3732,9 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d" [[package]] name = "ryu" -version = "1.0.20" +version = "1.0.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" +checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f" [[package]] name = "safetensors" @@ -3695,7 +3752,7 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "675656c1eabb620b921efea4f9199f97fc86e36dd6ffd1fbbe48d0f59a4987f5" dependencies = [ - "hashbrown", + "hashbrown 0.16.1", "serde", "serde_json", ] @@ -3731,6 +3788,12 @@ dependencies = [ "untrusted", ] +[[package]] +name = "semver" +version = "1.0.28" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a7852d02fc848982e0c167ef163aaff9cd91dc640ba85e263cb1ce46fae51cd" + [[package]] name = "seq-macro" version = "0.3.6" @@ -3769,15 +3832,15 @@ dependencies = [ [[package]] name = "serde_json" -version = "1.0.145" +version = "1.0.149" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +checksum = "83fc039473c5595ace860d8c4fafa220ff474b3fc6bfdb4293327f1a37e94d86" dependencies = [ "itoa", "memchr", - "ryu", "serde", "serde_core", + "zmij", ] [[package]] @@ -3829,24 +3892,25 @@ checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" [[package]] name = "signal-hook-registry" -version = "1.4.6" +version = "1.4.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b2a4719bff48cee6b39d12c020eeb490953ad2443b7055bd0b21fca26bd8c28b" +checksum = "c4db69cba1110affc0e9f7bcd48bbf87b3f4fc7c61fc9155afd4c469eb3d6c1b" dependencies = [ + "errno", "libc", ] [[package]] name = "simd-adler32" -version = "0.3.7" +version = "0.3.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d66dc143e6b11c1eddc06d5c423cfc97062865baf299914ab64caa38182078fe" +checksum = "703d5c7ef118737c72f1af64ad2f6f8c5e1921f818cdcb97b8fe6fc69bf66214" [[package]] name = "slab" -version = "0.4.11" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" +checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5" [[package]] name = "smallvec" @@ -3866,12 +3930,12 @@ dependencies = [ [[package]] name = "socket2" -version = "0.6.1" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17129e116933cf371d018bb80ae557e889637989d8638274fb25622827b03881" +checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e" dependencies = [ "libc", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -3966,9 +4030,9 @@ checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292" [[package]] name = "syn" -version = "2.0.107" +version = "2.0.117" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2a26dbd934e5451d21ef060c018dae56fc073894c5a7896f882928a76e6d081b" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" dependencies = [ "proc-macro2", "quote", @@ -4007,7 +4071,7 @@ version = "0.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "01198a2debb237c62b6826ec7081082d951f46dbb64b0e8c7649a452230d1dfc" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.1", "byteorder", "enum-as-inner", "libc", @@ -4038,9 +4102,9 @@ dependencies = [ [[package]] name = "tar" -version = "0.4.44" +version = "0.4.45" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" +checksum = "22692a6476a21fa75fdfc11d452fda482af402c008cdbaf3476414e122040973" dependencies = [ "filetime", "libc", @@ -4049,15 +4113,15 @@ dependencies = [ [[package]] name = "tempfile" -version = "3.23.0" +version = "3.27.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d31c77bdf42a745371d260a26ca7163f1e0924b64afa0b688e61b5a9fa02f16" +checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd" dependencies = [ "fastrand", - "getrandom 0.3.4", + "getrandom 0.4.2", "once_cell", "rustix", - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4071,11 +4135,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63587ca0f12b72a0600bcba1d40081f830876000bb46dd2337a3051618f4fc8" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.17", + "thiserror-impl 2.0.18", ] [[package]] @@ -4091,9 +4155,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.17" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ff15c8ecd7de3849db632e14d18d2571fa09dfc5ed93479bc4485c7a517c913" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", @@ -4117,30 +4181,30 @@ dependencies = [ [[package]] name = "time" -version = "0.3.44" +version = "0.3.47" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91e7d9e3bb61134e77bde20dd4825b97c010155709965fedf0f49bb138e52a9d" +checksum = "743bd48c283afc0388f9b8827b976905fb217ad9e647fae3a379a9283c4def2c" dependencies = [ "deranged", "itoa", "num-conv", "powerfmt", - "serde", + "serde_core", "time-core", "time-macros", ] [[package]] name = "time-core" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" +checksum = "7694e1cfe791f8d31026952abf09c69ca6f6fa4e1a1229e18988f06a04a12dca" [[package]] name = "time-macros" -version = "0.2.24" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" +checksum = "2e70e4c5a0e0a8a4823ad65dfe1a6930e4f4d756dcd9dd7939022b5e8c501215" dependencies = [ "num-conv", "time-core", @@ -4148,9 +4212,9 @@ dependencies = [ [[package]] name = "tinystr" -version = "0.8.1" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5d4f6d1145dcb577acf783d4e601bc1d76a13337bb54e6233add580b07344c8b" +checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d" dependencies = [ "displaydoc", "zerovec", @@ -4158,9 +4222,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.10.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" +checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3" dependencies = [ "tinyvec_macros", ] @@ -4173,9 +4237,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" [[package]] name = "tokenizers" -version = "0.22.1" +version = "0.22.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6475a27088c98ea96d00b39a9ddfb63780d1ad4cceb6f48374349a96ab2b7842" +checksum = "b238e22d44a15349529690fb07bd645cf58149a1b1e44d6cb5bd1641ff1a6223" dependencies = [ "ahash", "aho-corasick", @@ -4184,22 +4248,22 @@ dependencies = [ "derive_builder", "esaxx-rs", "getrandom 0.3.4", - "indicatif", + "indicatif 0.18.4", "itertools", "log", "macro_rules_attribute", "monostate", "onig", "paste", - "rand 0.9.2", + "rand 0.9.4", "rayon", "rayon-cond", "regex", - "regex-syntax 0.8.8", + "regex-syntax 0.8.10", "serde", "serde_json", "spm_precompiled", - "thiserror 2.0.17", + "thiserror 2.0.18", "unicode-normalization-alignments", "unicode-segmentation", "unicode_categories", @@ -4207,25 +4271,25 @@ dependencies = [ [[package]] name = "tokio" -version = "1.48.0" +version = "1.52.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff360e02eab121e0bc37a2d3b4d4dc622e6eda3a8e5253d5435ecf5bd4c68408" +checksum = "8fc7f01b389ac15039e4dc9531aa973a135d7a4135281b12d7c1bc79fd57fffe" dependencies = [ "bytes", "libc", "mio", "pin-project-lite", "signal-hook-registry", - "socket2 0.6.1", + "socket2 0.6.3", "tokio-macros", "windows-sys 0.61.2", ] [[package]] name = "tokio-macros" -version = "2.6.0" +version = "2.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" +checksum = "385a6cb71ab9ab790c5fe8d67f1645e6c450a7ce006a33de03daa956cf70a496" dependencies = [ "proc-macro2", "quote", @@ -4248,15 +4312,15 @@ version = "0.26.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61" dependencies = [ - "rustls 0.23.34", + "rustls 0.23.40", "tokio", ] [[package]] name = "tokio-stream" -version = "0.1.17" +version = "0.1.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eca58d7bba4a75707817a2c44174253f9236b2d5fbd055602e9d5c07c139a047" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" dependencies = [ "futures-core", "pin-project-lite", @@ -4265,9 +4329,9 @@ dependencies = [ [[package]] name = "tokio-util" -version = "0.7.16" +version = "0.7.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "14307c986784f72ef81c89db7d9e28d6ac26d16213b109ea501696195e6e3ce5" +checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098" dependencies = [ "bytes", "futures-core", @@ -4319,9 +4383,9 @@ checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" [[package]] name = "tower" -version = "0.5.2" +version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d039ad9159c98b70ecfd540b2573b97f7f52c3e8d9f8ad57a24b916a536975f9" +checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4" dependencies = [ "futures-core", "futures-util", @@ -4334,20 +4398,20 @@ dependencies = [ [[package]] name = "tower-http" -version = "0.6.6" +version = "0.6.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "adc82fd73de2a9722ac5da747f12383d2bfdb93591ee6c58486e0097890f05f2" +checksum = "68d6fdd9f81c2819c9a8b0e0cd91660e7746a8e6ea2ba7c6b2b057985f6bcb51" dependencies = [ - "bitflags 2.10.0", + "bitflags 2.11.1", "bytes", "futures-util", - "http 1.3.1", + "http 1.4.0", "http-body 1.0.1", - "iri-string", "pin-project-lite", "tower", "tower-layer", "tower-service", + "url", ] [[package]] @@ -4364,9 +4428,9 @@ checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3" [[package]] name = "tracing" -version = "0.1.41" +version = "0.1.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "784e0ac535deb450455cbfa28a6f0df145ea1bb7ae51b821cf5e7927fdcfbdd0" +checksum = "63e71662fa4b2a2c3a26f570f037eb95bb1f85397f3cd8076caed2f026a6d100" dependencies = [ "pin-project-lite", "tracing-attributes", @@ -4375,9 +4439,9 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.30" +version = "0.1.31" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" +checksum = "7490cfa5ec963746568740651ac6781f701c9c5ea257c58e057f3ba8cf69e8da" dependencies = [ "proc-macro2", "quote", @@ -4386,9 +4450,9 @@ dependencies = [ [[package]] name = "tracing-core" -version = "0.1.34" +version = "0.1.36" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" +checksum = "db97caf9d906fbde555dd62fa95ddba9eecfd14cb388e4f491a66d74cd5fb79a" dependencies = [ "once_cell", "valuable", @@ -4431,15 +4495,15 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "typed-path" -version = "0.12.2" +version = "0.12.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3015e6ce46d5ad8751e4a772543a30c7511468070e98e64e20165f8f81155b64" +checksum = "8e28f89b80c87b8fb0cf04ab448d5dd0dd0ade2f8891bae878de66a75a28600e" [[package]] name = "typenum" -version = "1.19.0" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "562d481066bde0658276a35467c4af00bdc6ee726305698a55b86e61d7ad82bb" +checksum = "40ce102ab67701b8526c123c1bab5cbe42d7040ccfd0f64af1a385808d2f43de" [[package]] name = "ubyte" @@ -4516,9 +4580,9 @@ dependencies = [ [[package]] name = "unicode-ident" -version = "1.0.19" +version = "1.0.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f63a545481291138910575129486daeaf8ac54aee4387fe7906919f7830c7d9d" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" [[package]] name = "unicode-normalization-alignments" @@ -4531,9 +4595,9 @@ dependencies = [ [[package]] name = "unicode-segmentation" -version = "1.12.0" +version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493" +checksum = "9629274872b2bfaf8d66f5f15725007f635594914870f65218920345aa11aa8c" [[package]] name = "unicode-width" @@ -4553,6 +4617,12 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" +[[package]] +name = "unit-prefix" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" + [[package]] name = "untrusted" version = "0.9.0" @@ -4569,7 +4639,7 @@ dependencies = [ "flate2", "log", "once_cell", - "rustls 0.23.34", + "rustls 0.23.40", "rustls-pki-types", "serde", "serde_json", @@ -4580,9 +4650,9 @@ dependencies = [ [[package]] name = "url" -version = "2.5.7" +version = "2.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08bc136a29a3d1758e07a9cca267be308aeebf5cfd5a10f3f67ab2097683ef5b" +checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed" dependencies = [ "form_urlencoded", "idna", @@ -4604,11 +4674,11 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.18.1" +version = "1.23.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "ddd74a9687298c6858e9b88ec8935ec45d22e8fd5e6394fa1bd4e99a87789c76" dependencies = [ - "getrandom 0.3.4", + "getrandom 0.4.2", "js-sys", "wasm-bindgen", ] @@ -4652,58 +4722,50 @@ checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasip2" -version = "1.0.1+wasi-0.2.4" +version = "1.0.3+wasi-0.2.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0562428422c63773dad2c345a1882263bbf4d65cf3f42e90921f787ef5ad58e7" +checksum = "20064672db26d7cdc89c7798c48a0fdfac8213434a1186e5ef29fd560ae223d6" dependencies = [ - "wit-bindgen", + "wit-bindgen 0.57.1", ] [[package]] -name = "wasm-bindgen" -version = "0.2.104" +name = "wasip3" +version = "0.4.0+wasi-0.3.0-rc-2026-01-06" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1da10c01ae9f1ae40cbfac0bac3b1e724b320abfcf52229f80b547c0d250e2d" +checksum = "5428f8bf88ea5ddc08faddef2ac4a67e390b88186c703ce6dbd955e1c145aca5" dependencies = [ - "cfg-if", - "once_cell", - "rustversion", - "wasm-bindgen-macro", - "wasm-bindgen-shared", + "wit-bindgen 0.51.0", ] [[package]] -name = "wasm-bindgen-backend" -version = "0.2.104" +name = "wasm-bindgen" +version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "671c9a5a66f49d8a47345ab942e2cb93c7d1d0339065d4f8139c486121b43b19" +checksum = "49ace1d07c165b0864824eee619580c4689389afa9dc9ed3a4c75040d82e6790" dependencies = [ - "bumpalo", - "log", - "proc-macro2", - "quote", - "syn", + "cfg-if", + "once_cell", + "rustversion", + "wasm-bindgen-macro", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-futures" -version = "0.4.54" +version = "0.4.71" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e038d41e478cc73bae0ff9b36c60cff1c98b8f38f8d7e8061e79ee63608ac5c" +checksum = "96492d0d3ffba25305a7dc88720d250b1401d7edca02cc3bcd50633b424673b8" dependencies = [ - "cfg-if", "js-sys", - "once_cell", "wasm-bindgen", - "web-sys", ] [[package]] name = "wasm-bindgen-macro" -version = "0.2.104" +version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ca60477e4c59f5f2986c50191cd972e3a50d8a95603bc9434501cf156a9a119" +checksum = "8e68e6f4afd367a562002c05637acb8578ff2dea1943df76afb9e83d177c8578" dependencies = [ "quote", "wasm-bindgen-macro-support", @@ -4711,26 +4773,48 @@ dependencies = [ [[package]] name = "wasm-bindgen-macro-support" -version = "0.2.104" +version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9f07d2f20d4da7b26400c9f4a0511e6e0345b040694e8a75bd41d578fa4421d7" +checksum = "d95a9ec35c64b2a7cb35d3fead40c4238d0940c86d107136999567a4703259f2" dependencies = [ + "bumpalo", "proc-macro2", "quote", "syn", - "wasm-bindgen-backend", "wasm-bindgen-shared", ] [[package]] name = "wasm-bindgen-shared" -version = "0.2.104" +version = "0.2.121" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bad67dc8b2a1a6e5448428adec4c3e84c43e561d8c9ee8a9e5aabeb193ec41d1" +checksum = "c4e0100b01e9f0d03189a92b96772a1fb998639d981193d7dbab487302513441" dependencies = [ "unicode-ident", ] +[[package]] +name = "wasm-encoder" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "990065f2fe63003fe337b932cfb5e3b80e0b4d0f5ff650e6985b1048f62c8319" +dependencies = [ + "leb128fmt", + "wasmparser", +] + +[[package]] +name = "wasm-metadata" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb0e353e6a2fbdc176932bbaab493762eb1255a7900fe0fea1a2f96c296cc909" +dependencies = [ + "anyhow", + "indexmap", + "wasm-encoder", + "wasmparser", +] + [[package]] name = "wasm-streams" version = "0.4.2" @@ -4744,11 +4828,23 @@ dependencies = [ "web-sys", ] +[[package]] +name = "wasmparser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47b807c72e1bac69382b3a6fb3dbe8ea4c0ed87ff5629b8685ae6b9a611028fe" +dependencies = [ + "bitflags 2.11.1", + "hashbrown 0.15.5", + "indexmap", + "semver", +] + [[package]] name = "web-sys" -version = "0.3.81" +version = "0.3.98" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9367c417a924a74cae129e6a2ae3b47fabb1f8995595ab474029da749a8be120" +checksum = "4b572dff8bcf38bad0fa19729c89bb5748b2b9b1d8be70cf90df697e3a8f32aa" dependencies = [ "js-sys", "wasm-bindgen", @@ -4776,14 +4872,14 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.3", + "webpki-roots 1.0.7", ] [[package]] name = "webpki-roots" -version = "1.0.3" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32b130c0d2d49f8b6889abc456e795e82525204f27c42cf767cf0d7734e089b8" +checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d" dependencies = [ "rustls-pki-types", ] @@ -4810,7 +4906,7 @@ version = "0.1.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22" dependencies = [ - "windows-sys 0.60.2", + "windows-sys 0.61.2", ] [[package]] @@ -4830,22 +4926,22 @@ dependencies = [ [[package]] name = "windows-core" -version = "0.61.2" +version = "0.62.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" +checksum = "b8e83a14d34d0623b51dce9581199302a221863196a1dde71a7663a4c2be9deb" dependencies = [ "windows-implement", "windows-interface", - "windows-link 0.1.3", + "windows-link", "windows-result", "windows-strings", ] [[package]] name = "windows-implement" -version = "0.60.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", @@ -4854,21 +4950,15 @@ dependencies = [ [[package]] name = "windows-interface" -version = "0.59.1" +version = "0.59.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", "syn", ] -[[package]] -name = "windows-link" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" - [[package]] name = "windows-link" version = "0.2.1" @@ -4877,20 +4967,20 @@ checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" [[package]] name = "windows-result" -version = "0.3.4" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" +checksum = "7781fa89eaf60850ac3d2da7af8e5242a5ea78d1a11c49bf2910bb5a73853eb5" dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] name = "windows-strings" -version = "0.4.2" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" +checksum = "7837d08f69c77cf6b07689544538e017c1bfcf57e34b4c0ff58e6c2cd3b37091" dependencies = [ - "windows-link 0.1.3", + "windows-link", ] [[package]] @@ -4935,7 +5025,7 @@ version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" dependencies = [ - "windows-link 0.2.1", + "windows-link", ] [[package]] @@ -4975,7 +5065,7 @@ version = "0.53.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3" dependencies = [ - "windows-link 0.2.1", + "windows-link", "windows_aarch64_gnullvm 0.53.1", "windows_aarch64_msvc 0.53.1", "windows_i686_gnu 0.53.1", @@ -5126,9 +5216,9 @@ checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650" [[package]] name = "winnow" -version = "0.7.13" +version = "0.7.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21a0236b59786fed61e2a80582dd500fe61f18b5dca67a4a067d0bc9039339cf" +checksum = "df79d97927682d2fd8adb29682d1140b343be4ac0f08fd68b7765d9c059d3945" dependencies = [ "memchr", ] @@ -5145,21 +5235,109 @@ dependencies = [ [[package]] name = "wit-bindgen" -version = "0.46.0" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d7249219f66ced02969388cf2bb044a09756a083d0fab1e566056b04d9fbcaa5" +dependencies = [ + "wit-bindgen-rust-macro", +] + +[[package]] +name = "wit-bindgen" +version = "0.57.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ebf944e87a7c253233ad6766e082e3cd714b5d03812acc24c318f549614536e" + +[[package]] +name = "wit-bindgen-core" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea61de684c3ea68cb082b7a88508a8b27fcc8b797d738bfc99a82facf1d752dc" +dependencies = [ + "anyhow", + "heck", + "wit-parser", +] + +[[package]] +name = "wit-bindgen-rust" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b7c566e0f4b284dd6561c786d9cb0142da491f46a9fbed79ea69cdad5db17f21" +dependencies = [ + "anyhow", + "heck", + "indexmap", + "prettyplease", + "syn", + "wasm-metadata", + "wit-bindgen-core", + "wit-component", +] + +[[package]] +name = "wit-bindgen-rust-macro" +version = "0.51.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0c0f9bfd77e6a48eccf51359e3ae77140a7f50b1e2ebfe62422d8afdaffab17a" +dependencies = [ + "anyhow", + "prettyplease", + "proc-macro2", + "quote", + "syn", + "wit-bindgen-core", + "wit-bindgen-rust", +] + +[[package]] +name = "wit-component" +version = "0.244.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f17a85883d4e6d00e8a97c586de764dabcc06133f7f1d55dce5cdc070ad7fe59" +checksum = "9d66ea20e9553b30172b5e831994e35fbde2d165325bec84fc43dbf6f4eb9cb2" +dependencies = [ + "anyhow", + "bitflags 2.11.1", + "indexmap", + "log", + "serde", + "serde_derive", + "serde_json", + "wasm-encoder", + "wasm-metadata", + "wasmparser", + "wit-parser", +] + +[[package]] +name = "wit-parser" +version = "0.244.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ecc8ac4bc1dc3381b7f59c34f00b67e18f910c2c0f50015669dde7def656a736" +dependencies = [ + "anyhow", + "id-arena", + "indexmap", + "log", + "semver", + "serde", + "serde_derive", + "serde_json", + "unicode-xid", + "wasmparser", +] [[package]] name = "writeable" -version = "0.6.1" +version = "0.6.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" +checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4" [[package]] name = "xattr" -version = "1.5.0" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0d65cbf2f12c15564212d48f4e3dfb87923d25d611f2aed18f4cb23f0413d89e" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" dependencies = [ "libc", "rustix", @@ -5188,12 +5366,12 @@ dependencies = [ [[package]] name = "yoke" -version = "0.8.1" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72d6e5c6afb84d73944e5cedb052c4680d5657337201555f9f2a16b7406d4954" +checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca" dependencies = [ "stable_deref_trait", - "yoke-derive 0.8.0", + "yoke-derive 0.8.2", "zerofrom", ] @@ -5211,9 +5389,9 @@ dependencies = [ [[package]] name = "yoke-derive" -version = "0.8.0" +version = "0.8.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" +checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e" dependencies = [ "proc-macro2", "quote", @@ -5223,18 +5401,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.27" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0894878a5fa3edfd6da3f88c4805f4c8558e2b996227a3d864f47fe11e38282c" +checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.27" +version = "0.8.48" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88d2b8d9c68ad2b9e4340d7832716a4d21a22a1154777ad56ea55c51a9cf3831" +checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4" dependencies = [ "proc-macro2", "quote", @@ -5243,18 +5421,18 @@ dependencies = [ [[package]] name = "zerofrom" -version = "0.1.6" +version = "0.1.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50cc42e0333e05660c3587f3bf9d0478688e15d870fab3346451ce7f8c9fbea5" +checksum = "0ec05a11813ea801ff6d75110ad09cd0824ddba17dfe17128ea0d5f68e6c5272" dependencies = [ "zerofrom-derive", ] [[package]] name = "zerofrom-derive" -version = "0.1.6" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" +checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1" dependencies = [ "proc-macro2", "quote", @@ -5270,31 +5448,31 @@ checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0" [[package]] name = "zerotrie" -version = "0.2.2" +version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36f0bbd478583f79edad978b407914f61b2972f5af6fa089686016be8f9af595" +checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf" dependencies = [ "displaydoc", - "yoke 0.8.1", + "yoke 0.8.2", "zerofrom", ] [[package]] name = "zerovec" -version = "0.11.4" +version = "0.11.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7aa2bd55086f1ab526693ecbe444205da57e25f4489879da80635a46d90e73b" +checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239" dependencies = [ - "yoke 0.8.1", + "yoke 0.8.2", "zerofrom", "zerovec-derive", ] [[package]] name = "zerovec-derive" -version = "0.11.1" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" +checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555" dependencies = [ "proc-macro2", "quote", @@ -5314,15 +5492,15 @@ dependencies = [ "flate2", "indexmap", "memchr", - "thiserror 2.0.17", + "thiserror 2.0.18", "zopfli", ] [[package]] name = "zip" -version = "7.4.0" +version = "7.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc12baa6db2b15a140161ce53d72209dacea594230798c24774139b54ecaa980" +checksum = "c42e33efc22a0650c311c2ef19115ce232583abbe80850bc8b66509ebef02de0" dependencies = [ "crc32fast", "indexmap", @@ -5330,11 +5508,17 @@ dependencies = [ "typed-path", ] +[[package]] +name = "zmij" +version = "1.0.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8848ee67ecc8aedbaf3e4122217aff892639231befc6a1b58d29fff4c2cabaa" + [[package]] name = "zopfli" -version = "0.8.2" +version = "0.8.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "edfc5ee405f504cd4984ecc6f14d02d55cfda60fa4b689434ef4102aae150cd7" +checksum = "f05cd8797d63865425ff89b5c4a48804f35ba0ce8d125800027ad6017d2b5249" dependencies = [ "bumpalo", "crc32fast", @@ -5344,15 +5528,35 @@ dependencies = [ [[package]] name = "zune-core" -version = "0.4.12" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3f423a2c17029964870cfaabb1f13dfab7d092a62a29a89264f4d36990ca414a" +checksum = "cb8a0807f7c01457d0379ba880ba6322660448ddebc890ce29bb64da71fb40f9" [[package]] name = "zune-jpeg" -version = "0.4.21" +version = "0.5.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "29ce2c8a9384ad323cf564b67da86e21d3cfdff87908bc1223ed5c99bc792713" +checksum = "27bc9d5b815bc103f142aa054f561d9187d191692ec7c2d1e2b4737f8dbd7296" dependencies = [ "zune-core", ] + +[[patch.unused]] +name = "candle-core" +version = "0.10.2" +source = "git+https://github.com/huggingface/candle?branch=main#3df8203a2ab0f7d12866ef392d5ea7504b0255e4" + +[[patch.unused]] +name = "candle-flash-attn" +version = "0.10.2" +source = "git+https://github.com/huggingface/candle?branch=main#3df8203a2ab0f7d12866ef392d5ea7504b0255e4" + +[[patch.unused]] +name = "candle-nn" +version = "0.10.2" +source = "git+https://github.com/huggingface/candle?branch=main#3df8203a2ab0f7d12866ef392d5ea7504b0255e4" + +[[patch.unused]] +name = "candle-transformers" +version = "0.10.2" +source = "git+https://github.com/huggingface/candle?branch=main#3df8203a2ab0f7d12866ef392d5ea7504b0255e4" diff --git a/Cargo.toml b/Cargo.toml index 0d25a4c..3feec4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -44,3 +44,11 @@ fast_image_resize = "5.3" # opt-level = 3 # lto = true # codegen-units = 1 +[patch.crates-io] +candle-core = { git = "https://github.com/huggingface/candle", branch = "main" } +candle-nn = { git = "https://github.com/huggingface/candle", branch = "main" } +candle-transformers = { git = "https://github.com/huggingface/candle", branch = "main" } +candle-flash-attn = { git = "https://github.com/huggingface/candle", branch = "main" } + +# Force cudarc to a version that supports CUDA 13 +cudarc = { git = "https://github.com/coreylowman/cudarc", branch = "main" } diff --git a/README.md b/README.md index 5fedd13..e8fb724 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,7 @@ The original DeepSeek-OCR ships as a Python + Transformers stack—powerful, but - **One repo, two entrypoints** – a batteries-included CLI for batch jobs and a Rocket-based server that speaks `/v1/responses` and `/v1/chat/completions`. - **Works out of the box** – pulls model weights, configs, and tokenizer from whichever of Hugging Face or ModelScope responds fastest on first run. - **Optimised for Apple Silicon** – optional Metal backend with FP16 execution for real-time OCR on laptops. -- **CUDA (alpha)** – experimental support via `--features cuda` + `--device cuda --dtype f16`; expect rough edges while we finish kernel coverage. +- **CUDA** – NVIDIA GPU acceleration via `--features cuda` + `--device cuda --dtype f16`. VRAM swap (`--vision-offload sequential`) brings DeepSeek‑OCR Q4K down to **~2.3GB VRAM peak** on low-end GPUs (RTX 3050 4GB, GTX 1060, etc.). Full GPU vision mode (`--vision-offload full-gpu`) available for higher-VRAM cards. See the [VRAM swap section](#vram-swap--low-vram-gpus) for details. - **Intel MKL (preview)** – faster BLAS on x86 via `--features mkl` (install Intel oneMKL beforehand). - **OpenAI client compatibility** – drop-in replacement for popular SDKs; the server automatically collapses chat history to the latest user turn for OCR-friendly prompts. @@ -81,7 +81,7 @@ The workspace exposes three base model IDs plus DSQ-quantized variants for DeepS - Rust 1.78+ (edition 2024 support) - Git - Optional: Apple Silicon running macOS 13+ for Metal acceleration -- Optional: CUDA 12.2+ toolkit + driver for experimental NVIDIA GPU acceleration on Linux/Windows +- Optional: CUDA 12.2+ toolkit + driver for NVIDIA GPU acceleration on Linux/Windows - Optional: Intel oneAPI MKL for preview x86 acceleration (see below) - (Recommended) Hugging Face account with `HF_TOKEN` when pulling from the `deepseek-ai/DeepSeek-OCR` repo (ModelScope is used automatically when it’s faster/reachable). @@ -132,6 +132,9 @@ template = "plain" base_size = 1024 image_size = 640 crop_mode = true +vision_swap = true +patches_per_batch = 2 +vision_offload = "auto" max_new_tokens = 512 use_cache = true @@ -141,7 +144,7 @@ port = 8000 ``` - `[models]` picks the active model and lets you add more entries (each entry can point to its own config/tokenizer/weights). -- `[inference]` controls notebook-friendly defaults shared by the CLI and server (device, template, vision sizing, decoding budget, cache usage). +- `[inference]` controls notebook-friendly defaults shared by the CLI and server (device, template, vision sizing, decoding budget, cache usage, VRAM swap settings). - `[server]` sets the network binding and the model identifier reported by `/v1/models`. See `crates/cli/README.md` and `crates/server/README.md` for concise override tables. @@ -175,7 +178,7 @@ cargo run -p deepseek-ocr-cli --release -- \ > macOS tip: append `--features metal` to the `cargo run`/`cargo build` commands to compile with Accelerate + Metal backends. > -> CUDA tip (Linux/Windows): append `--features cuda` and run with `--device cuda --dtype f16` to target NVIDIA GPUs—feature is still alpha, so be ready for quirks. +> CUDA tip (Linux/Windows): append `--features cuda` and run with `--device cuda --dtype f16` to target NVIDIA GPUs. Add `--vision-offload sequential` for low-VRAM cards. > > Intel MKL preview: install Intel oneMKL, then build with `--features mkl` for faster CPU matmuls on x86. @@ -195,6 +198,10 @@ Key flags: - Sampling controls: `--do-sample`, `--temperature`, `--top-p`, `--top-k`, `--repetition-penalty`, `--no-repeat-ngram-size`, `--seed` - By default decoding stays deterministic (`do_sample=false`, `temperature=0.0`, `no_repeat_ngram_size=20`) - To use stochastic sampling set `--do-sample true --temperature 0.8` (and optionally adjust the other knobs) +- VRAM swap flags (CUDA only, DeepSeek‑OCR): + - `--vision-offload ` – choose vision offloading mode: `auto` (default), `sequential`, `full-gpu`, `cpu` + - `--vision-swap ` – enable/disable VRAM-aware loading (overridden by `--vision-offload`) + - `--patches-per-batch ` – patch batch size for CUDA processing (default: 2; smaller = less VRAM) ### Switching Models @@ -241,11 +248,45 @@ Notes: ## GPU Acceleration ⚡ - **Metal (macOS 13+ Apple Silicon)** – pass `--device metal --dtype f16` and build binaries with `--features metal` so Candle links against Accelerate + Metal. -- **CUDA (alpha, NVIDIA GPUs)** – install CUDA 12.2+ toolkits, build with `--features cuda`, and launch the CLI/server with `--device cuda --dtype f16`; still experimental. +- **CUDA (NVIDIA GPUs)** – install CUDA 12.2+ toolkits, build with `--features cuda`, and launch the CLI/server with `--device cuda --dtype f16`. Includes VRAM swap support for low-memory GPUs (RTX 3050 4GB, GTX 1060, etc.) and full-GPU vision mode for larger cards. See [VRAM Swap](#vram-swap--low-vram-gpus) below. - **Intel MKL (preview)** – install Intel oneMKL and build with `--features mkl` to speed up CPU workloads on x86. - For either backend, prefer release builds (e.g. `cargo build --release -p deepseek-ocr-cli --features metal|cuda`) to maximise throughput. - Combine GPU runs with `--max-new-tokens` and crop tuning flags to balance latency vs. quality. +## VRAM Swap & Low-VRAM GPUs 🔄 + +DeepSeek‑OCR with Q4K quantisation (`deepseek-ocr-q4k`) fits in **~950 MB** on CUDA, but the SAM+CLIP vision models require an additional **~2.1 GB**. On low-VRAM GPUs (≤6 GB), a **VRAM swap** mode loads one vision model at a time: + +| Mode | `--vision-offload` | Global View | Patch Crops | Peak VRAM (Q4K) | +|------|-------------------|-------------|-------------|-----------------| +| **Auto** (default) | `auto` | Auto-detects: uses Sequential on <6 GB | | ~2.3 GB | +| **Sequential** | `sequential` | CPU (always) | CUDA (chunked) | ~2.3 GB | +| **Full GPU** | `full-gpu` | CUDA | CUDA (chunked) | ~3.2 GB | +| **CPU only** | `cpu` | CPU | CPU | ~1 GB | + +**Sequential mode** runs the global view on CPU while patch crops process on CUDA in parallel (scoped threads), giving the best VRAM/speed trade-off on tight GPUs. + +**Full GPU mode** keeps all vision on CUDA: loads SAM → processes all images → drops SAM → loads CLIP → processes with cached SAM outputs. Offers faster vision prefill when VRAM allows (~3.2 GB peak). + +Fine-tune VRAM vs. throughput with `--patches-per-batch`: +```bash +# Minimum VRAM (processes one patch at a time on CUDA) +cargo run --release --features cuda -- --device cuda --dtype f16 --model deepseek-ocr-q4k \ + --vision-offload sequential --patches-per-batch 1 --prompt " OCR" --image doc.png + +# Faster (batches 4 patches per CUDA forward) +cargo run --release --features cuda -- --device cuda --dtype f16 --model deepseek-ocr-q4k \ + --vision-offload sequential --patches-per-batch 4 --prompt " OCR" --image doc.png +``` + +Disable swap entirely (force CPU vision) when you have enough RAM: +```bash +cargo run --release --features cuda -- --device cuda --dtype f16 --model deepseek-ocr-q4k \ + --vision-offload cpu --prompt " OCR" --image doc.png +``` + +> **Note:** VRAM swap is only relevant for the DeepSeek‑OCR vision pipeline (SAM+CLIP). PaddleOCR‑VL and DotsOCR do not benefit since they use lighter or different vision towers. + ## Repository Layout 🗂️ - `crates/core` – shared inference pipeline, model loaders, conversation templates. @@ -265,7 +306,8 @@ Detailed CLI usage lives in [`crates/cli/README.md`](crates/cli/README.md). The ## Roadmap 🗺️ - ✅ Apple Metal backend with FP16 support and CLI/server parity on macOS. -- ✅ NVIDIA CUDA backend (alpha) – build with `--features cuda`, run with `--device cuda --dtype f16` for Linux/Windows GPUs; polishing in progress. +- ✅ NVIDIA CUDA backend – build with `--features cuda`, run with `--device cuda --dtype f16` for Linux/Windows GPUs. VRAM swap brings DeepSeek‑OCR Q4K down to ~2.3 GB on RTX 3050 4 GB. +- ✅ **VRAM Swap** – sequential vision offload for low-VRAM CUDA GPUs via `--vision-offload sequential`, full-GPU mode via `--vision-offload full-gpu`, CPU-only mode via `--vision-offload cpu`. - 🔄 **Parity polish** – finish projector normalisation + crop tiling alignment; extend intermediate-tensor diff suite beyond the current sample baseline. - 🔄 **Grounding & streaming** – port the Python post-processing helpers (box extraction, markdown polish) and refine SSE streaming ergonomics. - 🔄 **Cross-platform acceleration** – continue tuning CUDA kernels, add automatic device detection across CPU/Metal/CUDA, and publish opt-in GPU benchmarks. diff --git a/crates/assets/src/lib.rs b/crates/assets/src/lib.rs index d80d2cf..a3dd55e 100644 --- a/crates/assets/src/lib.rs +++ b/crates/assets/src/lib.rs @@ -99,6 +99,26 @@ pub const MODEL_ASSETS: &[ModelAsset] = &[ ]; pub const QUANTIZED_MODEL_ASSETS: &[QuantizedModelAsset] = &[ + QuantizedModelAsset { + id: "deepseek-ocr-q2k", + kind: ModelKind::Deepseek, + baseline_id: "deepseek-ocr", + snapshot: SnapshotAsset { + dtype: "Q2_K", + repo_id: "TimmyOVO/deepseek-ocr.rs", + filename: "DeepSeek-OCR.Q2_K.dsq", + }, + }, + QuantizedModelAsset { + id: "deepseek-ocr-q3k", + kind: ModelKind::Deepseek, + baseline_id: "deepseek-ocr", + snapshot: SnapshotAsset { + dtype: "Q3_K", + repo_id: "TimmyOVO/deepseek-ocr.rs", + filename: "DeepSeek-OCR.Q3_K.dsq", + }, + }, QuantizedModelAsset { id: "deepseek-ocr-q4k", kind: ModelKind::Deepseek, diff --git a/crates/cli/src/args.rs b/crates/cli/src/args.rs index a9bbb3f..7f19601 100644 --- a/crates/cli/src/args.rs +++ b/crates/cli/src/args.rs @@ -49,7 +49,7 @@ pub struct SnapshotArgs { long, value_name = "DTYPE", default_value = "Q8_0", - value_parser = ["Q8_0", "Q4_K", "Q6_K"], + value_parser = ["Q8_0", "Q2_K", "Q3_K", "Q4_K", "Q6_K"], help_heading = "Snapshot" )] pub dtype: String, diff --git a/crates/config/src/args.rs b/crates/config/src/args.rs index 05a33d7..1b9c4e7 100644 --- a/crates/config/src/args.rs +++ b/crates/config/src/args.rs @@ -1,7 +1,7 @@ use std::path::PathBuf; use clap::Args; -use deepseek_ocr_core::runtime::{DeviceKind, Precision}; +use deepseek_ocr_core::runtime::{DeviceKind, Precision, VisionOffload}; use crate::config::{ConfigOverrides, ServerOverride}; @@ -54,6 +54,22 @@ pub struct CommonInferenceArgs { #[arg(long, help_heading = "Inference")] pub crop_mode: Option, + /// Enable VRAM swap for vision models (auto-detect by default). + #[arg(long, help_heading = "Inference")] + pub vision_swap: Option, + + /// Patch batch size for VRAM swap (default: 2). + #[arg(long, help_heading = "Inference")] + pub patches_per_batch: Option, + + /// Number of patches to process on CPU in sequential mode (default: 0 = all on GPU). + #[arg(long, help_heading = "Inference")] + pub cpu_patches: Option, + + /// Vision offload strategy (auto, sequential, full-gpu, cpu). + #[arg(long, help_heading = "Inference")] + pub vision_offload: Option, + /// Default max tokens budget. #[arg(long, help_heading = "Inference")] pub max_new_tokens: Option, @@ -111,6 +127,10 @@ impl From<&CommonInferenceArgs> for crate::InferenceOverride { base_size: value.base_size, image_size: value.image_size, crop_mode: value.crop_mode, + vision_swap: value.vision_swap, + patches_per_batch: value.patches_per_batch, + cpu_patches: value.cpu_patches, + vision_offload: value.vision_offload, decode: deepseek_ocr_core::DecodeParametersPatch { max_new_tokens: value.max_new_tokens, do_sample: value.do_sample, diff --git a/crates/config/src/config.rs b/crates/config/src/config.rs index b9418ca..830cad6 100644 --- a/crates/config/src/config.rs +++ b/crates/config/src/config.rs @@ -7,7 +7,7 @@ use std::{ use anyhow::{Context, Result, anyhow}; use deepseek_ocr_core::{ DecodeParameters, DecodeParametersPatch, ModelKind, - runtime::{DeviceKind, Precision}, + runtime::{DeviceKind, Precision, VisionOffload}, }; use serde::{Deserialize, Serialize}; @@ -62,6 +62,12 @@ fn ensure_default_model_entries(entries: &mut BTreeMap) { entries .entry("glm-ocr".to_string()) .or_insert_with(glm_ocr_entry); + entries + .entry("deepseek-ocr-q2k".to_string()) + .or_insert_with(|| quantized_entry(ModelKind::Deepseek, "Q2_K", "deepseek-ocr")); + entries + .entry("deepseek-ocr-q3k".to_string()) + .or_insert_with(|| quantized_entry(ModelKind::Deepseek, "Q3_K", "deepseek-ocr")); entries .entry("deepseek-ocr-q4k".to_string()) .or_insert_with(|| quantized_entry(ModelKind::Deepseek, "Q4_K", "deepseek-ocr")); @@ -139,7 +145,13 @@ fn ensure_model_defaults(entries: &mut BTreeMap) { fill_missing_model_defaults(entry, &ocr2_defaults); } - let quantized_deepseek_ids = ["deepseek-ocr-q4k", "deepseek-ocr-q6k", "deepseek-ocr-q8k"]; + let quantized_deepseek_ids = [ + "deepseek-ocr-q2k", + "deepseek-ocr-q3k", + "deepseek-ocr-q4k", + "deepseek-ocr-q6k", + "deepseek-ocr-q8k", + ]; for model_id in quantized_deepseek_ids { if let Some(entry) = entries.get_mut(model_id) { fill_missing_model_defaults(entry, &ocr1_defaults); @@ -202,6 +214,10 @@ pub struct InferenceSettings { pub base_size: u32, pub image_size: u32, pub crop_mode: bool, + pub vision_swap: bool, + pub patches_per_batch: usize, + pub cpu_patches: usize, + pub vision_offload: VisionOffload, #[serde(flatten)] pub decode: DecodeParameters, } @@ -215,6 +231,10 @@ impl Default for InferenceSettings { base_size: 1024, image_size: 640, crop_mode: true, + vision_swap: true, + patches_per_batch: 2, + vision_offload: VisionOffload::default(), + cpu_patches: 0, decode: DecodeParameters::default(), } } @@ -526,6 +546,10 @@ pub struct InferenceOverride { pub base_size: Option, pub image_size: Option, pub crop_mode: Option, + pub vision_swap: Option, + pub patches_per_batch: Option, + pub vision_offload: Option, + pub cpu_patches: Option, #[serde(flatten)] pub decode: DecodeParametersPatch, } @@ -550,6 +574,18 @@ impl std::ops::AddAssign<&InferenceOverride> for InferenceSettings { if let Some(crop_mode) = rhs.crop_mode { self.crop_mode = crop_mode; } + if let Some(vision_swap) = rhs.vision_swap { + self.vision_swap = vision_swap; + } + if let Some(patches_per_batch) = rhs.patches_per_batch { + self.patches_per_batch = patches_per_batch; + } + if let Some(vision_offload) = rhs.vision_offload { + self.vision_offload = vision_offload; + } + if let Some(cpu_patches) = rhs.cpu_patches { + self.cpu_patches = cpu_patches; + } self.decode += &rhs.decode; } @@ -561,6 +597,10 @@ impl InferenceSettings { base_size: self.base_size, image_size: self.image_size, crop_mode: self.crop_mode, + vision_swap: self.vision_swap, + patches_per_batch: self.patches_per_batch, + cpu_patches: self.cpu_patches, + vision_offload: self.vision_offload, } } } diff --git a/crates/core/src/inference.rs b/crates/core/src/inference.rs index 2fd10bd..292e644 100644 --- a/crates/core/src/inference.rs +++ b/crates/core/src/inference.rs @@ -4,7 +4,7 @@ use image::DynamicImage; use serde::{Deserialize, Serialize}; use tokenizers::Tokenizer; -use crate::{benchmark::Timer, conversation::get_conv_template, sampling::TokenSelectionParams}; +use crate::{benchmark::Timer, conversation::get_conv_template, runtime::VisionOffload, sampling::TokenSelectionParams}; /// Callback used to stream decoded token pieces. pub type StreamCallback<'a> = Option<&'a dyn Fn(usize, &[i64])>; @@ -15,6 +15,31 @@ pub struct VisionSettings { pub base_size: u32, pub image_size: u32, pub crop_mode: bool, + /// Enable VRAM swap for vision models (default: true = auto-detect). + /// Set to false to force SAM/CLIP on CPU. + pub vision_swap: bool, + /// Patch batch size for VRAM swap (default: 2). + /// Smaller values use less VRAM but are slower. + pub patches_per_batch: usize, + /// Vision offload strategy (overrides vision_swap when non-Auto). + pub vision_offload: VisionOffload, + /// Number of patches to process on CPU in sequential mode (default: 0 = all on GPU). + /// First `cpu_patches` patches run on CPU alongside global view; the rest use VRAM swap on GPU. + pub cpu_patches: usize, +} + +impl Default for VisionSettings { + fn default() -> Self { + Self { + base_size: 1024, + image_size: 640, + crop_mode: true, + vision_swap: true, + patches_per_batch: 2, + vision_offload: VisionOffload::Auto, + cpu_patches: 0, + } + } } /// Decoding parameters that map directly onto generation options. diff --git a/crates/core/src/lib.rs b/crates/core/src/lib.rs index af92747..9943052 100644 --- a/crates/core/src/lib.rs +++ b/crates/core/src/lib.rs @@ -11,6 +11,7 @@ pub use inference::{ DecodeOutcome, DecodeParameters, DecodeParametersPatch, ModelKind, ModelLoadArgs, OcrEngine, VisionSettings, normalize_text, render_prompt, }; +pub use runtime::{DeviceKind, Precision, VisionOffload, default_dtype_for_device, dtype_from_precision, prepare_device_and_dtype}; // #[cfg(feature = "mkl")] // extern crate intel_mkl_src; diff --git a/crates/core/src/runtime.rs b/crates/core/src/runtime.rs index b3cb367..593470c 100644 --- a/crates/core/src/runtime.rs +++ b/crates/core/src/runtime.rs @@ -19,6 +19,20 @@ pub enum Precision { Bf16, } +#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, ValueEnum, Serialize, Deserialize)] +#[serde(rename_all = "snake_case")] +pub enum VisionOffload { + /// Auto-detect based on VRAM: use Sequential on <6GB, normal on >=6GB. + #[default] + Auto, + /// Hybrid: global view on CPU, patch crops on CUDA (current swap behavior). + Sequential, + /// Full GPU: load SAM then CLIP sequentially on CUDA for all vision. + FullGpu, + /// Force all vision on CPU (no VRAM swap). + Cpu, +} + pub fn prepare_device_and_dtype( device: DeviceKind, precision: Option, diff --git a/crates/core/src/tensor.rs b/crates/core/src/tensor.rs index d4b9005..8467bc2 100644 --- a/crates/core/src/tensor.rs +++ b/crates/core/src/tensor.rs @@ -5,7 +5,16 @@ use candle_core::{DType, Tensor}; /// /// `weight` must be `[vocab, hidden]` and `ids` must be rank-2 `[batch, seq]`. The returned tensor /// has shape `[batch, seq, hidden]`. +/// +/// When `weight` and `ids` reside on different devices, `ids` are temporarily moved to +/// `weight`'s device for the lookup and the result is moved back to `ids`' original device. pub fn gather_token_embeddings(weight: &Tensor, ids: &Tensor) -> Result { + let target_device = ids.device(); + let ids = if !ids.device().same_device(weight.device()) { + ids.to_device(weight.device())? + } else { + ids.clone() + }; ensure!( ids.rank() == 2, "input ids must have shape [batch, seq], got rank {}", @@ -14,12 +23,17 @@ pub fn gather_token_embeddings(weight: &Tensor, ids: &Tensor) -> Result let (_vocab, hidden) = weight.shape().dims2()?; let (batch, seq_len) = ids.shape().dims2()?; let ids = if ids.dtype() == DType::I64 { - ids.clone() + ids } else { ids.to_dtype(DType::I64)? }; let weight = weight.force_contiguous()?; let flat = ids.reshape((batch * seq_len,))?.force_contiguous()?; let gathered = weight.index_select(&flat, 0)?; - Ok(gathered.reshape((batch, seq_len, hidden))?) + let gathered = gathered.reshape((batch, seq_len, hidden))?; + if !gathered.device().same_device(target_device) { + Ok(gathered.to_device(target_device)?) + } else { + Ok(gathered) + } } diff --git a/crates/dsq-cli/src/main.rs b/crates/dsq-cli/src/main.rs index afea188..7cbd2d0 100644 --- a/crates/dsq-cli/src/main.rs +++ b/crates/dsq-cli/src/main.rs @@ -13,7 +13,8 @@ use deepseek_ocr_dsq_models::{ AdapterRegistry, AdapterScope, LinearSpec, ModelAdapter, QuantContext, }; use deepseek_ocr_dsq_writer::{ - encode_bias_values, quantize_q4k, quantize_q6k, quantize_q8_0, DsqWriter, SnapshotMetadata, + encode_bias_values, quantize_q2k, quantize_q3k, quantize_q4k, quantize_q6k, quantize_q8_0, + DsqWriter, SnapshotMetadata, }; use half::{bf16, f16}; use indicatif::{ProgressBar, ProgressDrawTarget, ProgressStyle}; @@ -81,6 +82,10 @@ struct StatsArgs { enum QuantDTypeArg { #[value(name = "Q8_0")] Q8_0, + #[value(name = "Q2_K")] + Q2K, + #[value(name = "Q3_K")] + Q3K, #[value(name = "Q4_K")] Q4K, #[value(name = "Q6_K")] @@ -91,6 +96,8 @@ impl QuantDTypeArg { fn label(self) -> &'static str { match self { Self::Q8_0 => "Q8_0", + Self::Q2K => "Q2_K", + Self::Q3K => "Q3_K", Self::Q4K => "Q4_K", Self::Q6K => "Q6_K", } @@ -99,6 +106,8 @@ impl QuantDTypeArg { fn to_dtype(self) -> DsqTensorDType { match self { Self::Q8_0 => DsqTensorDType::Q8_0, + Self::Q2K => DsqTensorDType::Q2K, + Self::Q3K => DsqTensorDType::Q3K, Self::Q4K => DsqTensorDType::Q4K, Self::Q6K => DsqTensorDType::Q6K, } @@ -617,6 +626,8 @@ fn quantize_spec( } let qbytes = match selection.dtype { DsqTensorDType::Q8_0 => quantize_q8_0(&weights, spec.out_dim, spec.in_dim)?, + DsqTensorDType::Q2K => quantize_q2k(&weights, spec.out_dim, spec.in_dim)?, + DsqTensorDType::Q3K => quantize_q3k(&weights, spec.out_dim, spec.in_dim)?, DsqTensorDType::Q4K => quantize_q4k(&weights, spec.out_dim, spec.in_dim)?, DsqTensorDType::Q6K => quantize_q6k(&weights, spec.out_dim, spec.in_dim)?, other => unreachable!("float dtype {other:?} cannot be selected here"), @@ -991,7 +1002,9 @@ fn select_dtype(primary: DsqTensorDType, in_dim: usize) -> Result Option { match dtype { - DsqTensorDType::Q6K | DsqTensorDType::Q4K => Some(DsqTensorDType::Q8_0), + DsqTensorDType::Q2K | DsqTensorDType::Q3K | DsqTensorDType::Q4K | DsqTensorDType::Q6K => { + Some(DsqTensorDType::Q8_0) + } DsqTensorDType::Q8_0 => None, _ => None, } diff --git a/crates/dsq-models/src/adapters/deepseek_ocr.rs b/crates/dsq-models/src/adapters/deepseek_ocr.rs index 27a1090..fcc5244 100644 --- a/crates/dsq-models/src/adapters/deepseek_ocr.rs +++ b/crates/dsq-models/src/adapters/deepseek_ocr.rs @@ -149,6 +149,14 @@ impl ModelAdapter for DeepSeekOcrAdapter { } match tensor { "lm_head.weight" | "model.projector.layers.weight" => Some(DsqTensorDType::Q8_0), + // gate_proj and up_proj form the gating pathway — very sensitive to + // precision loss. When the primary dtype is Q2_K, promote them to + // Q4_K to preserve the expert routing signal inside each MoE layer. + name if ctx.primary == DsqTensorDType::Q2K + && (name.contains("gate_proj") || name.contains("up_proj")) => + { + Some(DsqTensorDType::Q4K) + } _ => None, } } diff --git a/crates/dsq-runtime/src/lib.rs b/crates/dsq-runtime/src/lib.rs index 72d3436..8c02c4a 100644 --- a/crates/dsq-runtime/src/lib.rs +++ b/crates/dsq-runtime/src/lib.rs @@ -336,7 +336,11 @@ impl QuantizedSnapshot { let qweight_bytes = self.reader.tensor_bytes(record)?; let bias = self.load_bias(record, name, device)?; match record.q_dtype { - DsqTensorDType::Q8_0 | DsqTensorDType::Q4K | DsqTensorDType::Q6K => { + DsqTensorDType::Q8_0 + | DsqTensorDType::Q2K + | DsqTensorDType::Q3K + | DsqTensorDType::Q4K + | DsqTensorDType::Q6K => { let ggml_dtype = ggml_from_snapshot_dtype(record.q_dtype)?; let qtensor = qtensor_from_ggml( ggml_dtype, @@ -580,6 +584,8 @@ impl QuantizedSnapshot { fn ggml_from_snapshot_dtype(dtype: DsqTensorDType) -> Result { match dtype { DsqTensorDType::Q8_0 => Ok(GgmlDType::Q8_0), + DsqTensorDType::Q2K => Ok(GgmlDType::Q2K), + DsqTensorDType::Q3K => Ok(GgmlDType::Q3K), DsqTensorDType::Q4K => Ok(GgmlDType::Q4K), DsqTensorDType::Q6K => Ok(GgmlDType::Q6K), other => bail!("snapshot dtype {:?} does not map to ggml", other), @@ -588,7 +594,11 @@ fn ggml_from_snapshot_dtype(dtype: DsqTensorDType) -> Result { fn ensure_supported_snapshot_dtype(dtype: DsqTensorDType) -> Result<()> { match dtype { - DsqTensorDType::Q8_0 | DsqTensorDType::Q4K | DsqTensorDType::Q6K => Ok(()), + DsqTensorDType::Q8_0 + | DsqTensorDType::Q2K + | DsqTensorDType::Q3K + | DsqTensorDType::Q4K + | DsqTensorDType::Q6K => Ok(()), DsqTensorDType::F16 | DsqTensorDType::BF16 | DsqTensorDType::F32 => Ok(()), } } diff --git a/crates/dsq-writer/src/lib.rs b/crates/dsq-writer/src/lib.rs index 442dc06..00170aa 100644 --- a/crates/dsq-writer/src/lib.rs +++ b/crates/dsq-writer/src/lib.rs @@ -6,7 +6,7 @@ use std::{ slice, }; -use candle_core::quantized::k_quants::{BlockQ4K, BlockQ6K, GgmlType as CandleGgmlType}; +use candle_core::quantized::k_quants::{BlockQ2K, BlockQ3K, BlockQ4K, BlockQ6K, GgmlType as CandleGgmlType}; use deepseek_ocr_dsq::{DsqBiasDType, DsqTensorDType}; use half::{bf16, f16}; @@ -16,6 +16,10 @@ use thiserror::Error; const DSQ_MAGIC: &[u8; 7] = b"DSQSNAP"; const DSQ_VERSION: u32 = 1; const Q8_BLOCK: usize = 32; +const Q2K_BLOCK: usize = 256; +const Q2K_BLOCK_BYTES: usize = mem::size_of::(); +const Q3K_BLOCK: usize = 256; +const Q3K_BLOCK_BYTES: usize = mem::size_of::(); const Q4K_BLOCK: usize = 256; const Q4K_BLOCK_BYTES: usize = mem::size_of::(); // 144 bytes per block (K-scale layout) const Q6K_BLOCK: usize = 256; @@ -167,6 +171,116 @@ impl DsqWriter { ) } + /// Quantize a dense matrix into Q2_K blocks and append it as a tensor record. + pub fn add_q2k_tensor( + &mut self, + name: impl Into, + out_dim: usize, + in_dim: usize, + weights: &[f32], + bias: Option<&[f32]>, + ) -> Result<()> { + let name = name.into(); + if self.records.iter().any(|rec| rec.name == name) { + return Err(DsqWriterError::DuplicateTensor(name.clone())); + } + if !in_dim.is_multiple_of(Q2K_BLOCK) { + return Err(DsqWriterError::InvalidBlock { + name: name.clone(), + in_dim, + block: Q2K_BLOCK, + }); + } + let expected = out_dim + .checked_mul(in_dim) + .ok_or(DsqWriterError::ValueOverflow { + what: "tensor elements", + })?; + if weights.len() != expected { + return Err(DsqWriterError::DimensionMismatch { + name: name.clone(), + expected, + found: weights.len(), + }); + } + if let Some(bias_vals) = bias { + if bias_vals.len() != out_dim { + return Err(DsqWriterError::BiasLengthMismatch { + name: name.clone(), + out_dim, + found: bias_vals.len(), + }); + } + } + let qbytes = quantize_q2k(weights, out_dim, in_dim)?; + let bias_bytes = bias.map(encode_bias_values); + self.add_quantized_tensor_internal( + name, + out_dim, + in_dim, + DsqTensorDType::Q2K, + &qbytes, + bias_bytes + .as_deref() + .map(|slice| (slice, DsqBiasDType::F32)), + ) + } + + /// Quantize a dense matrix into Q3_K blocks and append it as a tensor record. + pub fn add_q3k_tensor( + &mut self, + name: impl Into, + out_dim: usize, + in_dim: usize, + weights: &[f32], + bias: Option<&[f32]>, + ) -> Result<()> { + let name = name.into(); + if self.records.iter().any(|rec| rec.name == name) { + return Err(DsqWriterError::DuplicateTensor(name.clone())); + } + if !in_dim.is_multiple_of(Q3K_BLOCK) { + return Err(DsqWriterError::InvalidBlock { + name: name.clone(), + in_dim, + block: Q3K_BLOCK, + }); + } + let expected = out_dim + .checked_mul(in_dim) + .ok_or(DsqWriterError::ValueOverflow { + what: "tensor elements", + })?; + if weights.len() != expected { + return Err(DsqWriterError::DimensionMismatch { + name: name.clone(), + expected, + found: weights.len(), + }); + } + if let Some(bias_vals) = bias { + if bias_vals.len() != out_dim { + return Err(DsqWriterError::BiasLengthMismatch { + name: name.clone(), + out_dim, + found: bias_vals.len(), + }); + } + } + let qbytes = quantize_q3k(weights, out_dim, in_dim)?; + let bias_bytes = bias.map(encode_bias_values); + self.add_quantized_tensor_internal( + name, + out_dim, + in_dim, + DsqTensorDType::Q3K, + &qbytes, + bias_bytes + .as_deref() + .map(|slice| (slice, DsqBiasDType::F32)), + ) + } + /// Quantize a dense matrix into Q4_K blocks and append it as a tensor record. pub fn add_q4k_tensor( &mut self, @@ -597,6 +711,72 @@ pub fn quantize_q8_0(weights: &[f32], rows: usize, cols: usize) -> Result Result> { + if !cols.is_multiple_of(Q2K_BLOCK) { + return Err(DsqWriterError::InvalidBlock { + name: "quantize_q2k".into(), + in_dim: cols, + block: Q2K_BLOCK, + }); + } + if weights.len() != rows * cols { + return Err(DsqWriterError::DimensionMismatch { + name: "quantize_q2k".into(), + expected: rows * cols, + found: weights.len(), + }); + } + let blocks_per_row = cols / Q2K_BLOCK; + let total_blocks = rows + .checked_mul(blocks_per_row) + .ok_or(DsqWriterError::ValueOverflow { what: "q2k blocks" })?; + let mut result = Vec::with_capacity(total_blocks * Q2K_BLOCK_BYTES); + for row in 0..rows { + let start = row * cols; + let row_slice = &weights[start..start + cols]; + let mut blocks = vec![::zeros(); blocks_per_row]; + ::from_float(row_slice, &mut blocks); + let bytes = unsafe { + slice::from_raw_parts(blocks.as_ptr() as *const u8, blocks.len() * Q2K_BLOCK_BYTES) + }; + result.extend_from_slice(bytes); + } + Ok(result) +} + +pub fn quantize_q3k(weights: &[f32], rows: usize, cols: usize) -> Result> { + if !cols.is_multiple_of(Q3K_BLOCK) { + return Err(DsqWriterError::InvalidBlock { + name: "quantize_q3k".into(), + in_dim: cols, + block: Q3K_BLOCK, + }); + } + if weights.len() != rows * cols { + return Err(DsqWriterError::DimensionMismatch { + name: "quantize_q3k".into(), + expected: rows * cols, + found: weights.len(), + }); + } + let blocks_per_row = cols / Q3K_BLOCK; + let total_blocks = rows + .checked_mul(blocks_per_row) + .ok_or(DsqWriterError::ValueOverflow { what: "q3k blocks" })?; + let mut result = Vec::with_capacity(total_blocks * Q3K_BLOCK_BYTES); + for row in 0..rows { + let start = row * cols; + let row_slice = &weights[start..start + cols]; + let mut blocks = vec![::zeros(); blocks_per_row]; + ::from_float(row_slice, &mut blocks); + let bytes = unsafe { + slice::from_raw_parts(blocks.as_ptr() as *const u8, blocks.len() * Q3K_BLOCK_BYTES) + }; + result.extend_from_slice(bytes); + } + Ok(result) +} + pub fn quantize_q4k(weights: &[f32], rows: usize, cols: usize) -> Result> { if !cols.is_multiple_of(Q4K_BLOCK) { return Err(DsqWriterError::InvalidBlock { @@ -688,6 +868,8 @@ fn expected_qbyte_len( let blocks_per_row = in_dim / block; let per_block = match dtype { DsqTensorDType::Q8_0 => Q8_BLOCK_BYTES, + DsqTensorDType::Q2K => Q2K_BLOCK_BYTES, + DsqTensorDType::Q3K => Q3K_BLOCK_BYTES, DsqTensorDType::Q4K => Q4K_BLOCK_BYTES, DsqTensorDType::Q6K => Q6K_BLOCK_BYTES, other => unreachable!("expected quantized dtype, got {other:?}"), diff --git a/crates/dsq/src/lib.rs b/crates/dsq/src/lib.rs index dc4f3fa..279c85a 100644 --- a/crates/dsq/src/lib.rs +++ b/crates/dsq/src/lib.rs @@ -50,6 +50,8 @@ pub struct DsqHeader { #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum DsqTensorDType { Q8_0, + Q2K, + Q3K, Q4K, Q6K, F16, @@ -61,6 +63,8 @@ impl DsqTensorDType { pub fn as_u32(self) -> u32 { match self { Self::Q8_0 => 8, + Self::Q2K => 10, + Self::Q3K => 11, Self::Q4K => 12, Self::Q6K => 14, Self::F16 => 1, @@ -72,6 +76,8 @@ impl DsqTensorDType { pub fn block_size(self) -> Option { match self { Self::Q8_0 => Some(32), + Self::Q2K => Some(256), + Self::Q3K => Some(256), Self::Q4K => Some(256), Self::Q6K => Some(256), Self::F16 | Self::BF16 | Self::F32 => None, @@ -97,6 +103,8 @@ impl TryFrom for DsqTensorDType { fn try_from(value: u32) -> Result { match value { 8 => Ok(Self::Q8_0), + 10 => Ok(Self::Q2K), + 11 => Ok(Self::Q3K), 12 => Ok(Self::Q4K), 14 => Ok(Self::Q6K), 1 => Ok(Self::F16), @@ -113,6 +121,8 @@ impl fmt::Display for DsqTensorDType { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { Self::Q8_0 => f.write_str("Q8_0"), + Self::Q2K => f.write_str("Q2_K"), + Self::Q3K => f.write_str("Q3_K"), Self::Q4K => f.write_str("Q4_K"), Self::Q6K => f.write_str("Q6_K"), Self::F16 => f.write_str("F16"), diff --git a/crates/infer-deepseek/src/model/mod.rs b/crates/infer-deepseek/src/model/mod.rs index 961ff25..950d2e6 100644 --- a/crates/infer-deepseek/src/model/mod.rs +++ b/crates/infer-deepseek/src/model/mod.rs @@ -1,4 +1,5 @@ use std::{ + cell::UnsafeCell, convert::TryFrom, path::{Path, PathBuf}, sync::Arc, @@ -13,6 +14,9 @@ use rayon::prelude::*; use tokenizers::Tokenizer; use tracing::{info, trace}; +mod swap; +use swap::{SequentialVramSwap, should_use_vram_swap}; + use crate::{ config::{DeepseekOcrConfig, ProjectorConfig, load_ocr_config}, quant_snapshot::{LinearSpec, QuantizedSnapshot, SnapshotLinear, SnapshotLoadPlan}, @@ -35,6 +39,7 @@ use deepseek_ocr_core::{ DecodeOutcome, DecodeParameters, ModelKind, ModelLoadArgs, OcrEngine, VisionSettings, normalize_text, }, + runtime::VisionOffload, sampling::{TokenSelectionParams, init_rng, select_token_id}, }; @@ -485,7 +490,7 @@ impl ImageProjector { /// language model. For now it wires the language stack so we can exercise text-only inference. pub struct DeepseekOcrModel { cfg: Arc, - language: DeepseekLanguageModel, + language: UnsafeCell, projector_cfg: Arc, projector: ImageProjector, projector_f32: Option, @@ -525,8 +530,10 @@ struct VisionModules { struct VisionContext<'a> { projector: &'a ImageProjector, - vision: &'a VisionModules, + sam: &'a SamBackbone, + clip: &'a ClipVisionModel, device: &'a Device, + vision_device: Device, dtype: DType, parallel: bool, } @@ -557,20 +564,49 @@ fn prepare_image_tensor_for_device( impl<'a> VisionContext<'a> { fn new_with_dtype( model: &'a DeepseekOcrModel, - vision: &'a VisionModules, + sam: &'a SamBackbone, + clip: &'a ClipVisionModel, projector: &'a ImageProjector, dtype: DType, ) -> Self { - let parallel = matches!(model.device(), Device::Cpu); + let device = model.device(); + let parallel = matches!(device, Device::Cpu); + let vision_device = if device.is_cuda() { + Device::Cpu + } else { + device.clone() + }; Self { projector, - vision, - device: model.device(), + sam, + clip, + device, + vision_device, dtype, parallel, } } + /// Create a VisionContext with an explicit vision device (e.g. CUDA for swapped models). + fn new_with_explicit_device( + model: &'a DeepseekOcrModel, + sam: &'a SamBackbone, + clip: &'a ClipVisionModel, + projector: &'a ImageProjector, + dtype: DType, + vision_device: Device, + ) -> Self { + Self { + projector, + sam, + clip, + device: model.device(), + vision_device, + dtype, + parallel: false, + } + } + fn hidden_size(&self) -> usize { self.projector.hidden_size() } @@ -584,7 +620,7 @@ impl<'a> VisionContext<'a> { } fn prepare_image_tensor(&self, tensor: &Tensor) -> Result { - prepare_image_tensor_for_device(tensor, self.device, self.dtype) + prepare_image_tensor_for_device(tensor, &self.vision_device, self.dtype) } fn append_row_breaks(&self, grid: Tensor, newline: &Tensor) -> Result { @@ -758,12 +794,10 @@ impl<'a> VisionContext<'a> { .prepare_image_tensor(input.global) .context("invalid global image tensor")?; let sam_global = self - .vision .sam .forward(&global) .context("sam forward (global)")?; let clip_global = self - .vision .clip .forward(&global, Some(&sam_global)) .context("clip forward (global)")?; @@ -772,6 +806,11 @@ impl<'a> VisionContext<'a> { .context("concat global clip+sam tokens")? .contiguous() .context("global pre tokens not contiguous")?; + let global_pre = if !global_pre.device().same_device(self.device) { + global_pre.to_device(self.device)? + } else { + global_pre + }; let global_post = self .projector .project(&global_pre) @@ -851,22 +890,29 @@ impl<'a> VisionContext<'a> { self.process_patch_batch(&chunk) } - fn process_patch_batch(&self, batch: &Tensor) -> Result<(Tensor, Tensor)> { + /// Process a single image through SAM → CLIP → projector. + /// Used by `process_patch_batch` to avoid batching multiple images through + /// SAM at once, which would multiply activation memory and cause OOM on + /// low-VRAM GPUs. + fn process_single_image(&self, image: &Tensor) -> Result<(Tensor, Tensor)> { let sam_local = self - .vision .sam - .forward(batch) + .forward(image) .context("sam forward (local)")?; let clip_local = self - .vision .clip - .forward(batch, Some(&sam_local)) + .forward(image, Some(&sam_local)) .context("clip forward (local)")?; let local_pre = self .build_clip_sam_tokens(&clip_local, &sam_local) .context("concat local clip+sam tokens")? .contiguous() .context("local pre tokens not contiguous")?; + let local_pre = if !local_pre.device().same_device(self.device) { + local_pre.to_device(self.device)? + } else { + local_pre + }; let local_post = self .projector .project(&local_pre) @@ -876,6 +922,31 @@ impl<'a> VisionContext<'a> { Ok((local_pre, local_post)) } + fn process_patch_batch(&self, batch: &Tensor) -> Result<(Tensor, Tensor)> { + let batch_size = batch.shape().dims4().map(|d| d.0).unwrap_or(0); + if batch_size <= 1 { + return self.process_single_image(batch); + } + // Process each patch individually to limit peak SAM activation memory + let chunks = batch.chunk(batch_size, 0)?; + let results: Result> = chunks + .into_iter() + .map(|chunk| self.process_single_image(&chunk)) + .collect(); + let (pre_list, post_list): (Vec<_>, Vec<_>) = results? + .into_iter() + .unzip(); + let pre_refs: Vec<_> = pre_list.iter().collect(); + let post_refs: Vec<_> = post_list.iter().collect(); + let local_pre = Tensor::cat(&pre_refs, 0)? + .contiguous() + .context("batched local pre tokens not contiguous")?; + let local_post = Tensor::cat(&post_refs, 0)? + .contiguous() + .context("batched local post tokens not contiguous")?; + Ok((local_pre, local_post)) + } + fn assemble_artifacts( &self, global_pre: Tensor, @@ -990,7 +1061,7 @@ impl DeepseekOcrModel { .context("failed to load language model")?; let low_precision = matches!(dtype, DType::F16 | DType::BF16); - if low_precision { + if low_precision && !device.is_cuda() { let vb_f32_lang = unsafe { VarBuilder::from_mmaped_safetensors( &[resolved_weights.as_path()], @@ -1036,7 +1107,7 @@ impl DeepseekOcrModel { let projector = ImageProjector::load(&vb, projector_cfg.as_ref(), snapshot.as_deref()) .context("failed to load image projector")?; let low_precision = matches!(dtype, DType::F16 | DType::BF16); - let (projector_f32, vision_f32, vision_ocr2_f32) = if low_precision { + let (projector_f32, vision_f32, vision_ocr2_f32) = if low_precision && !device.is_cuda() { let vb_f32 = unsafe { VarBuilder::from_mmaped_safetensors( &[resolved_weights.as_path()], @@ -1074,10 +1145,34 @@ impl DeepseekOcrModel { }; let vision = match variant { OcrVariant::Ocr1 => { - let sam = SamBackbone::new(cfg.as_ref(), &vb.pp("model").pp("sam_model")) - .context("failed to load SAM backbone")?; - let clip = ClipVisionModel::load(cfg.as_ref(), &vb.pp("model").pp("vision_model")) - .context("failed to load CLIP vision model")?; + let (sam, clip) = if device.is_cuda() { + let vb_cpu = unsafe { + VarBuilder::from_mmaped_safetensors( + &[resolved_weights.as_path()], + dtype, + &Device::Cpu, + ) + } + .with_context(|| { + format!( + "failed to mmap cpu weights for vision at {}", + resolved_weights.display() + ) + })?; + let sam = SamBackbone::new(cfg.as_ref(), &vb_cpu.pp("model").pp("sam_model")) + .context("failed to load SAM backbone on CPU")?; + let clip = + ClipVisionModel::load(cfg.as_ref(), &vb_cpu.pp("model").pp("vision_model")) + .context("failed to load CLIP vision model on CPU")?; + (sam, clip) + } else { + let sam = SamBackbone::new(cfg.as_ref(), &vb.pp("model").pp("sam_model")) + .context("failed to load SAM backbone")?; + let clip = + ClipVisionModel::load(cfg.as_ref(), &vb.pp("model").pp("vision_model")) + .context("failed to load CLIP vision model")?; + (sam, clip) + }; VisionBackend::Ocr1(Box::new(VisionModules { sam, clip })) } OcrVariant::Ocr2 => VisionBackend::Ocr2(Box::new( @@ -1088,9 +1183,16 @@ impl DeepseekOcrModel { // Log quantization summary after all quantizable modules (language + projector) are loaded. QuantizationState::global().log_summary(&device); + // On CUDA, keep the token embedding on CPU to free VRAM for inference activations. + if device.is_cuda() { + language + .move_token_embedding_to(&Device::Cpu) + .context("failed to move token embedding to CPU")?; + } + Ok(Self { cfg, - language, + language: UnsafeCell::new(language), projector_cfg, projector, projector_f32, @@ -1126,12 +1228,23 @@ impl DeepseekOcrModel { /// Borrow the language-only component. pub fn language_model(&self) -> &DeepseekLanguageModel { - &self.language + // SAFETY: language is only mutated during controlled swap operations + // (swap_language_for_vision/restore_language) which are single-threaded + // and never overlap with read accesses. + unsafe { &*self.language.get() } + } + + /// Mutably borrow the language model (for device swap during FullGpu vision). + /// # Safety + /// Caller must ensure no concurrent access via language_model(). + pub unsafe fn language_model_mut(&self) -> &mut DeepseekLanguageModel { + // SAFETY: caller must ensure no concurrent mutable access + unsafe { &mut *self.language.get() } } /// Whether flash attention is enabled for the underlying decoder. pub fn flash_attention_enabled(&self) -> bool { - self.language.flash_attention_enabled() + self.language_model().flash_attention_enabled() } /// Access the projector configuration. @@ -1141,21 +1254,22 @@ impl DeepseekOcrModel { /// Construct a fresh dynamic cache sized for this model. pub fn new_cache(&self) -> DynamicCache { - let layers = self.language.transformer_weights().layers.len(); + let layers = self.language_model().transformer_weights().layers.len(); DynamicCache::with_num_layers(layers) } /// Construct a fresh dynamic cache sized for this model, matching the model dtype. pub fn new_cache_for_dtype(&self, dtype: DType) -> Result { - let layers = self.language.transformer_weights().layers.len(); + let lm = self.language_model(); + let layers = lm.transformer_weights().layers.len(); let mut cache = DynamicCache::with_num_layers(layers); // Pre-seed a zero-length cache entry per layer so cache dtype is deterministic. // Low-precision models keep cache storage in f32 to reduce accumulation drift. let store_dtype = cache_store_dtype(self.dtype, dtype); for layer in 0..layers { - let heads = self.language.config().num_attention_heads; - let head_dim = self.language.config().hidden_size / heads; - let v_head_dim = self.language.config().v_head_dim.unwrap_or(head_dim); + let heads = lm.config().num_attention_heads; + let head_dim = lm.config().hidden_size / heads; + let v_head_dim = lm.config().v_head_dim.unwrap_or(head_dim); let device = self.device.clone(); let key_t = Tensor::zeros((1, heads, head_dim, 0), store_dtype, &device)?.contiguous()?; @@ -1168,7 +1282,23 @@ impl DeepseekOcrModel { /// Helper to guard prompt-scoped cache state. pub fn prompt_guard<'a>(&'a self, cache: &'a mut DynamicCache) -> PromptCacheGuard<'a> { - self.language.prompt_guard(cache) + self.language_model().prompt_guard(cache) + } + + /// Move the language model to `target` device in-place, freeing its old device memory. + /// + /// # Safety + /// + /// No code may access `language_model()` during or between calls to this method. + /// Typically called immediately before and after vision processing on a different device. + pub unsafe fn move_language_to_device(&self, target: &Device) -> Result<()> { + // SAFETY: caller must ensure no concurrent access to self.language + let old = unsafe { std::ptr::read(self.language.get()) }; + let moved = old.to_device(target)?; + // `old` is implicitly freed here as `read` gives us ownership. + // Overwrite the stale bytes with the new value. + unsafe { std::ptr::write(self.language.get(), moved) }; + Ok(()) } #[doc(hidden)] @@ -1209,7 +1339,7 @@ impl DeepseekOcrModel { Some(t) => t.clone(), None => { let ids = input_ids.expect("input_ids validity checked above"); - self.language.embed_tokens(ids)? + self.language_model().embed_tokens(ids)? } }; @@ -1238,7 +1368,7 @@ impl DeepseekOcrModel { "failed to cast language input embeddings", )?; - let lm_out = self.language.forward( + let lm_out = self.language_model().forward( None, Some(&embeddings), attention_mask, @@ -1273,44 +1403,41 @@ impl DeepseekOcrModel { ) } + /// Compute image embeddings using the model's stored vision modules. + /// Uses auto-detection for VRAM swap (same as default VisionSettings). pub fn compute_image_embeddings( &self, inputs: &[Option>], ) -> Result> { + self.compute_image_embeddings_impl(inputs, &VisionSettings::default()) + } + + /// Internal implementation with explicit VisionSettings. + fn compute_image_embeddings_impl( + &self, + inputs: &[Option>], + vision: &VisionSettings, + ) -> Result> { + let vision_swap = match vision.vision_offload { + VisionOffload::Auto => vision.vision_swap, + VisionOffload::Sequential | VisionOffload::FullGpu => true, + VisionOffload::Cpu => false, + }; match &self.vision { VisionBackend::Ocr1(_vision) => { + if vision.vision_offload == VisionOffload::FullGpu { + info!("Full GPU mode for vision — loading SAM then CLIP sequentially on CUDA"); + return self.compute_image_embeddings_full_gpu(inputs, vision.patches_per_batch); + } + if vision_swap && should_use_vram_swap(self.device()) { + info!("Low VRAM device — sequential swap mode for vision"); + return self.compute_image_embeddings_with_swap(inputs, vision.cpu_patches, vision.patches_per_batch); + } + let vision = self.vision_modules().context("vision modules missing")?; let compute_dtype = low_precision_compute_dtype(self.dtype); - let vision_native = self.vision_modules().context("vision modules missing")?; - let vision = select_f32(compute_dtype, vision_native, self.vision_modules_f32()); + let vision = select_f32(compute_dtype, vision, self.vision_modules_f32()); let projector = self.projector_for_dtype(compute_dtype); - let ctx = VisionContext::new_with_dtype(self, vision, projector, compute_dtype); - let hidden = ctx.hidden_size(); - let device = ctx.device(); - if ctx.parallel_enabled() { - inputs - .par_iter() - .map(|input| { - if let Some(vision_input) = input { - ctx.process_input(vision_input) - } else { - Tensor::zeros((0, hidden), compute_dtype, device) - .map_err(Into::into) - } - }) - .collect::>>() - } else { - inputs - .iter() - .map(|input| { - if let Some(vision_input) = input { - ctx.process_input(vision_input) - } else { - Tensor::zeros((0, hidden), compute_dtype, device) - .map_err(Into::into) - } - }) - .collect::>>() - } + self.compute_image_embeddings_with(inputs, &vision.sam, &vision.clip, projector, &Device::Cpu) } VisionBackend::Ocr2(vision) => { let hidden = self.projector.hidden_size(); @@ -1376,6 +1503,455 @@ impl DeepseekOcrModel { } } + /// Compute image embeddings using an explicit vision module and vision device. + fn compute_image_embeddings_with( + &self, + inputs: &[Option>], + sam: &SamBackbone, + clip: &ClipVisionModel, + projector: &ImageProjector, + vision_device: &Device, + ) -> Result> { + let compute_dtype = low_precision_compute_dtype(self.dtype); + let ctx = VisionContext::new_with_explicit_device(self, sam, clip, projector, compute_dtype, vision_device.clone()); + let hidden = ctx.hidden_size(); + let device = ctx.device(); + if ctx.parallel_enabled() { + inputs + .par_iter() + .map(|input| { + if let Some(vision_input) = input { + ctx.process_input(vision_input) + } else { + Tensor::zeros((0, hidden), compute_dtype, device) + .map_err(Into::into) + } + }) + .collect::>>() + } else { + inputs + .iter() + .map(|input| { + if let Some(vision_input) = input { + ctx.process_input(vision_input) + } else { + Tensor::zeros((0, hidden), compute_dtype, device) + .map_err(Into::into) + } + }) + .collect::>>() + } + } + + /// Compute image embeddings using hybrid swap: global on CPU, patches on CUDA. + /// + /// The global view runs on CPU (identical to CPU path); patch crops run on + /// CUDA via SequentialVramSwap. Both execute in parallel via scoped threads + /// since they use separate hardware (CPU vs GPU). + fn compute_image_embeddings_with_swap( + &self, + inputs: &[Option>], + cpu_patches: usize, + chunk_size: usize, + ) -> Result> { + let vision = self.vision_modules().context("vision modules missing")?; + let compute_dtype = low_precision_compute_dtype(self.dtype); + let projector = self.projector_for_dtype(compute_dtype); + let device = self.device(); + let hidden_size = projector.hidden_size(); + let newline = projector.image_newline_token(compute_dtype, device)?; + let view_sep = projector.view_separator_token(compute_dtype, device)?; + + let mut results = Vec::with_capacity(inputs.len()); + + for input in inputs { + let result = match input { + Some(vi) => { + let global = prepare_image_tensor_for_device( + vi.global, &Device::Cpu, compute_dtype, + )?; + let patches_data = vi.patches.map(|p| { + prepare_image_tensor_for_device(p, &Device::Cpu, compute_dtype) + }).transpose()?; + + let wp = self.weights_path.clone(); + let cfg = self.cfg.clone(); + let dt = self.dtype; + + // Split patches: first cpu_patches on CPU, rest on GPU + let patches_split = patches_data.as_ref().map(|p| -> Result<_> { + let (batch, _, _, _) = p.shape().dims4()?; + let cpu_n = cpu_patches.min(batch); + Ok((cpu_n, batch - cpu_n)) + }).transpose()?; + + // Parallelize CPU tasks (global + cpu patches) and GPU patches via scoped threads. + let (combined_global_cpu, cpu_pre_tokens, gpu_projected) = std::thread::scope(|s| { + // Thread 1 (CPU): global SAM+CLIP + cpu_patches SAM+CLIP + let cpu_handle = s.spawn(|| -> Result<(Tensor, Vec)> { + let sg = vision + .sam + .forward(&global) + .context("global SAM forward (CPU)")?; + let cg = vision + .clip + .forward(&global, Some(&sg)) + .context("global CLIP forward (CPU)")?; + let (_, clip_seq, _) = cg.shape().dims3()?; + let ct = cg + .narrow(D::Minus2, 1, clip_seq - 1)? + .contiguous()?; + let (_, sc, sh, sw) = sg.shape().dims4()?; + let st = sg + .reshape((1, sc, sh * sw))? + .transpose(1, 2)? + .contiguous()?; + let combined_global = Tensor::cat(&[ct, st], D::Minus1) + .context("cat global combined")?; + + // Process cpu_patches on CPU + let mut cpu_pres = Vec::new(); + if let Some((cpu_n, _)) = patches_split { + if cpu_n > 0 { + let p = patches_data.as_ref().unwrap(); + let cpu_p = p.narrow(0, 0, cpu_n)?; + for i in 0..cpu_n { + let patch = cpu_p.narrow(0, i, 1)?.contiguous()?; + let ss = vision.sam.forward(&patch) + .context("cpu patch SAM forward")?; + let cc = vision.clip.forward(&patch, Some(&ss)) + .context("cpu patch CLIP forward")?; + let (_, cs, _) = cc.shape().dims3()?; + let ct = cc.narrow(D::Minus2, 1, cs - 1)?.contiguous()?; + let (_, sn, sh, sw) = ss.shape().dims4()?; + let st = ss + .reshape((1, sn, sh * sw))? + .transpose(1, 2)? + .contiguous()?; + cpu_pres.push(Tensor::cat(&[ct, st], D::Minus1)?); + } + } + } + Ok((combined_global, cpu_pres)) + }); + + // Thread 2 (GPU): gpu_patches via SequentialVramSwap → projected + let gpu_handle = s.spawn(|| -> Result>> { + let Some((cpu_n, gpu_n)) = patches_split else { + return Ok(None); + }; + if gpu_n == 0 { + return Ok(None); + } + let patches = patches_data.as_ref().unwrap(); + let gpu_p = patches.narrow(0, cpu_n, gpu_n)?; + let gpu_batch = gpu_n; + let swap = SequentialVramSwap::new(&wp, cfg, dt, device); + let num = (gpu_batch + chunk_size - 1) / chunk_size; + let pcs = gpu_p.chunk(num, 0)?; + + let sam_cuda = swap + .load_sam_on_cuda() + .context("load SAM on CUDA for patches")?; + let mut sam_cpu = Vec::with_capacity(num); + for c in &pcs { + let cc = c.to_device(device)?; + let o = sam_cuda + .forward(&cc) + .context("patch SAM forward (CUDA)")?; + sam_cpu.push(o.to_device(&Device::Cpu)?.contiguous()?); + } + drop(sam_cuda); + + let clip_cuda = swap + .load_clip_on_cuda() + .context("load CLIP on CUDA for patches")?; + let mut all_proj = Vec::with_capacity(num); + for (pc, sc) in pcs.iter().zip(sam_cpu.iter()) { + let cc = pc.to_device(device)?; + let st = sc.to_device(device)?; + let co = clip_cuda + .forward(&cc, Some(&st)) + .context("patch CLIP forward (CUDA)")?; + let (_, cseq, _) = co.shape().dims3()?; + let ct = co + .narrow(D::Minus2, 1, cseq - 1)? + .contiguous()?; + let (_, scn, sh, sw) = st.shape().dims4()?; + let sts = st + .reshape((pc.dims()[0], scn, sh * sw))? + .transpose(1, 2)? + .contiguous()?; + let cb = Tensor::cat(&[ct, sts], D::Minus1)?; + all_proj.push( + projector.project(&cb).context("proj forward (patch)")?, + ); + } + drop(clip_cuda); + Ok(Some(all_proj)) + }); + + let (combined, cpu_pres) = cpu_handle.join().unwrap()?; + let gpu_res = gpu_handle.join().unwrap()?; + Ok::<_, anyhow::Error>((combined, cpu_pres, gpu_res)) + })?; + device.synchronize()?; + + // Project cpu_pres on CUDA and merge with gpu projected tokens + let mut all_projected: Vec = Vec::new(); + for cp in &cpu_pre_tokens { + let cp_cuda = cp.to_device(device)?; + all_projected.push( + projector.project(&cp_cuda).context("proj forward (cpu patch)")?, + ); + } + if let Some(gpu_proj) = gpu_projected { + all_projected.extend(gpu_proj); + } + + // Format local grid if any patches were processed + let local_tokens_opt = if !all_projected.is_empty() { + let refs: Vec<&Tensor> = all_projected.iter().collect(); + let proj = Tensor::cat(&refs, 0)?; + let total_batch = if let Some((cpu_n, gpu_n)) = patches_split { + cpu_n + gpu_n + } else { + 0 + }; + let cs = vi.crop_shape.unwrap_or((1, total_batch)); + let (wc, hc) = cs; + let nl = cast_dtype(&newline, proj.dtype(), "local nl")?; + let (np, seq, hd) = proj.shape().dims3()?; + ensure!(np == wc * hc); + let side = (seq as f64).sqrt() as usize; + ensure!(side * side == seq); + let grid = proj + .reshape((hc, wc, side, side, hd))? + .permute((0, 2, 1, 3, 4))? + .reshape((hc * side, wc * side, hd))? + .contiguous()?; + let (rows, cols, _) = grid.shape().dims3()?; + let nle = nl + .reshape((1, 1, hd))? + .expand((rows, 1, hd))? + .contiguous()?; + let wb = Tensor::cat(&[grid, nle], 1)?; + Some(wb.reshape((rows * (cols + 1), hd))?) + } else { + None + }; + + // Project + format global + let combined_cuda = combined_global_cpu.to_device(device)?; + let projected_global = projector + .project(&combined_cuda) + .context("projector forward (global)")?; + let global_tokens = { + let nl = cast_dtype(&newline, projected_global.dtype(), "nl")?; + let (bs, seq, hd) = projected_global.shape().dims3()?; + ensure!(bs == 1, "global batch must be 1, got {bs}"); + let side = (seq as f64).sqrt() as usize; + ensure!(side * side == seq, "global tokens {seq} not square"); + let grid = projected_global + .get(0)? + .reshape((side, side, hd))? + .contiguous()?; + let nle = nl + .reshape((1, 1, hd))? + .expand((side, 1, hd))? + .contiguous()?; + Tensor::cat(&[grid, nle], 1)? + .reshape((side * (side + 1), hd))? + }; + + // Assemble final token sequence + let target_dtype = global_tokens.dtype(); + let mut segments = Vec::new(); + if let Some(lt) = local_tokens_opt { + segments.push(cast_dtype_owned( + lt, target_dtype, "local dtype cast", + )?); + } + segments.push(global_tokens); + let vst = view_sep.reshape((1, hidden_size))?.contiguous()?; + segments.push(cast_dtype_owned(vst, target_dtype, "sep cast")?); + Tensor::cat(&segments, 0)? + } + None => Tensor::zeros((0, hidden_size), compute_dtype, device)?, + }; + results.push(result); + } + + info!("Image embeddings computed via parallel global CPU + patch CUDA"); + Ok(results) + } + + /// Compute image embeddings entirely on GPU (no CPU offload). + /// + /// Loads SAM on CUDA, runs global + patches, drops SAM. + /// Loads CLIP on CUDA, runs global + patches (with cached SAM outputs), drops CLIP. + /// All processing (projector, formatting, assembly) stays on CUDA. + /// Uses SequentialVramSwap internally to load one vision model at a time. + fn compute_image_embeddings_full_gpu( + &self, + inputs: &[Option>], + chunk_size: usize, + ) -> Result> { + // LM stays on CUDA (~950MB). Global SAM+CLIP on CPU. Patches on CUDA in + // mini-batches via SequentialVramSwap (~200MB SAM + ~200MB CLIP temporary). + // Peak VRAM: ~1.6GB — fits on 4GB without eviction. + let compute_dtype = self.dtype; + let projector = self.projector_for_dtype(compute_dtype); + let device = self.device(); + let hidden_size = projector.hidden_size(); + let newline = projector.image_newline_token(compute_dtype, device)?; + let view_sep = projector.view_separator_token(compute_dtype, device)?; + let cpu_vision = self.vision_modules().context("vision modules missing")?; + let wp = self.weights_path.clone(); + let cfg = self.cfg.clone(); + let dt = self.dtype; + + let mut results = Vec::with_capacity(inputs.len()); + + for mb_input in inputs { + let result = match mb_input { + Some(vi) => { + // === Global view: SAM + CLIP on CPU (avoids large attention score on CUDA) === + let global_cpu = prepare_image_tensor_for_device( + vi.global, &Device::Cpu, compute_dtype, + )?; + let sg = cpu_vision + .sam + .forward(&global_cpu) + .context("global SAM forward (CPU)")?; + let cg = cpu_vision + .clip + .forward(&global_cpu, Some(&sg)) + .context("global CLIP forward (CPU)")?; + let (_, clip_seq, _) = cg.shape().dims3()?; + let ct = cg + .narrow(D::Minus2, 1, clip_seq - 1)? + .contiguous()?; + let (_, sc, sh, sw) = sg.shape().dims4()?; + let st = sg + .reshape((1, sc, sh * sw))? + .transpose(1, 2)? + .contiguous()?; + let combined_global = Tensor::cat(&[ct, st], D::Minus1) + .context("cat global combined")?; + // Move to CUDA for projector + let combined_global_gpu = combined_global.to_device(device)?; + + // === Patches: SAM + CLIP on CUDA via sequential swap === + let local_tokens_opt: Option = vi + .patches + .as_ref() + .map(|patches| -> Result { + let patches_cpu = prepare_image_tensor_for_device( + patches, &Device::Cpu, compute_dtype, + )?; + let (batch, _, _, _) = patches_cpu.shape().dims4()?; + let swap = + SequentialVramSwap::new(&wp, cfg.clone(), dt, device); + let num = (batch + chunk_size - 1) / chunk_size; + let pcs = patches_cpu.chunk(num, 0)?; + + let sam_cuda = swap + .load_sam_on_cuda() + .context("load SAM on CUDA for patches")?; + let mut sam_cpu = Vec::with_capacity(num); + for c in &pcs { + let cc = c.to_device(device)?; + let o = sam_cuda + .forward(&cc) + .context("patch SAM forward (CUDA)")?; + sam_cpu.push(o.to_device(&Device::Cpu)?.contiguous()?); + } + drop(sam_cuda); + + let clip_cuda = swap + .load_clip_on_cuda() + .context("load CLIP on CUDA for patches")?; + let mut all_proj = Vec::with_capacity(num); + for (pc, sc) in pcs.iter().zip(sam_cpu.iter()) { + let cc = pc.to_device(device)?; + let st_cuda = sc.to_device(device)?; + let co = clip_cuda + .forward(&cc, Some(&st_cuda)) + .context("patch CLIP forward (CUDA)")?; + let (_, cseq, _) = co.shape().dims3()?; + let pct = co + .narrow(D::Minus2, 1, cseq - 1)? + .contiguous()?; + let (_, scn, sh, sw) = st_cuda.shape().dims4()?; + let pst = st_cuda + .reshape((cc.dims()[0], scn, sh * sw))? + .transpose(1, 2)? + .contiguous()?; + let cb = Tensor::cat(&[pct, pst], D::Minus1)?; + all_proj.push(projector.project(&cb)?); + } + drop(clip_cuda); + + let refs: Vec<&Tensor> = all_proj.iter().collect(); + let proj = Tensor::cat(&refs, 0)?; + let cs = vi.crop_shape.unwrap_or((1, batch)); + let (wc, hc) = cs; + let (_, seq, hd) = proj.shape().dims3()?; + let side = (seq as f64).sqrt() as usize; + let grid = proj + .reshape((hc, wc, side, side, hd))? + .permute((0, 2, 1, 3, 4))? + .reshape((hc * side, wc * side, hd))? + .contiguous()?; + let (rows, cols, _) = grid.shape().dims3()?; + let nle = newline + .reshape((1, 1, hd))? + .expand((rows, 1, hd))? + .contiguous()?; + Ok(Tensor::cat(&[grid, nle], 1)? + .reshape((rows * (cols + 1), hd))?) + }) + .transpose()?; + + // === Project global + format on CUDA === + let projected_global = projector.project(&combined_global_gpu)?; + let nl = cast_dtype(&newline, projected_global.dtype(), "nl")?; + let (_, gseq, ghd) = projected_global.shape().dims3()?; + let side = (gseq as f64).sqrt() as usize; + let grid = projected_global + .get(0)? + .reshape((side, side, ghd))? + .contiguous()?; + let nle = nl + .reshape((1, 1, ghd))? + .expand((side, 1, ghd))? + .contiguous()?; + let global_tokens = Tensor::cat(&[grid, nle], 1)? + .reshape((side * (side + 1), ghd))?; + + // === Assemble === + let target_dtype = global_tokens.dtype(); + let mut segments = Vec::new(); + if let Some(lt) = local_tokens_opt { + segments.push( + cast_dtype_owned(lt, target_dtype, "local dtype cast")?, + ); + } + segments.push(global_tokens); + let vst = view_sep.reshape((1, hidden_size))?.contiguous()?; + segments.push(cast_dtype_owned(vst, target_dtype, "sep cast")?); + Tensor::cat(&segments, 0)? + } + None => Tensor::zeros((0, hidden_size), compute_dtype, device)?, + }; + results.push(result); + } + + info!("Image embeddings computed (global on CPU, patches on CUDA via swap)"); + Ok(results) + } + pub fn compute_vision_projection( &self, input: &VisionInput<'_>, @@ -1690,7 +2266,7 @@ impl DeepseekOcrModel { let dtype = low_precision_compute_dtype(self.dtype); let vision = select_f32(dtype, vision.as_ref(), self.vision_modules_f32()); let projector = self.projector_for_dtype(dtype); - VisionContext::new_with_dtype(self, vision, projector, dtype) + VisionContext::new_with_dtype(self, &vision.sam, &vision.clip, projector, dtype) .process_input_full(input) } VisionBackend::Ocr2(_) => { @@ -1986,9 +2562,10 @@ impl DeepseekOcrModel { let token_index = usize::try_from(current) .context("token id out of range while preparing decode embedding")?; let mut decode_inputs = self - .language + .language_model() .token_embedding_for_id(token_index) .context("failed to gather embedding for decode token")? + .to_device(self.device())? .unsqueeze(0)? .unsqueeze(0)?; decode_inputs = cast_dtype_owned( @@ -2384,7 +2961,7 @@ impl OcrEngine for DeepseekOcrModel { vision.crop_mode, ) .with_context(|| "vision input failed")?; - let embeddings = compute_image_embeddings(self, &owned_inputs) + let embeddings = compute_image_embeddings(self, &owned_inputs, vision) .with_context(|| "image embedding failed")?; let (input_ids_vec, mask_vec) = build_prompt_tokens( tokenizer, @@ -2434,6 +3011,8 @@ impl OcrEngine for DeepseekOcrModel { .into_iter() .next() .unwrap_or_default(); + let gen_first: Vec = generated_tokens.iter().take(20).copied().collect(); + trace!("decode: first 20 generated tokens = {gen_first:?}"); let decoded = tokenizer .decode( &generated_tokens @@ -2443,6 +3022,7 @@ impl OcrEngine for DeepseekOcrModel { true, ) .unwrap_or_default(); + trace!("decode: decoded text length = {} bytes", decoded.len()); let normalized = normalize_text(&decoded); Ok(DecodeOutcome { @@ -2494,6 +3074,7 @@ fn prepare_vision_inputs( fn compute_image_embeddings( model: &DeepseekOcrModel, owned_inputs: &[OwnedVisionInput], + vision: VisionSettings, ) -> Result> { let timer = Timer::new("vision.compute_embeddings"); if owned_inputs.is_empty() { @@ -2507,7 +3088,7 @@ fn compute_image_embeddings( .map(|owned| Some(owned.as_ref())) .collect(); trace!("Computing image embeddings for {} image(s)...", refs.len()); - let outputs = model.compute_image_embeddings(&refs); + let outputs = model.compute_image_embeddings_impl(&refs, &vision); match &outputs { Ok(values) => { let tokens_total: u64 = values diff --git a/crates/infer-deepseek/src/model/swap.rs b/crates/infer-deepseek/src/model/swap.rs new file mode 100644 index 0000000..753c205 --- /dev/null +++ b/crates/infer-deepseek/src/model/swap.rs @@ -0,0 +1,132 @@ +use std::sync::Arc; +use std::path::Path; +use std::time::Instant; + +use anyhow::{Context, Result}; +use candle_core::{DType, Device}; +use candle_nn::VarBuilder; +use tracing::info; + +use crate::config::DeepseekOcrConfig; +use crate::vision::{ClipVisionModel, SamBackbone}; + +/// Detects total VRAM in bytes for the given CUDA device. +/// Tries nvidia-smi first, then falls back to `DEEPSEEK_OCR_VRAM_MB` env var. +pub(crate) fn get_vram_bytes(device: &Device) -> Option { + if !device.is_cuda() { + return None; + } + if let Ok(output) = std::process::Command::new("nvidia-smi") + .args(["--query-gpu=memory.total", "--format=csv,noheader,nounits"]) + .output() + { + if let Ok(s) = String::from_utf8(output.stdout) { + if let Ok(mb) = s.trim().parse::() { + return Some(mb * 1024 * 1024); + } + } + } + if let Ok(vram_mb) = std::env::var("DEEPSEEK_OCR_VRAM_MB") { + if let Ok(mb) = vram_mb.parse::() { + return Some(mb * 1024 * 1024); + } + } + None +} + +/// Whether to use sequential VRAM swap (low-VRAM devices < 6GB). +pub(crate) fn should_use_vram_swap(device: &Device) -> bool { + match get_vram_bytes(device) { + Some(bytes) => bytes < 6 * 1024 * 1024 * 1024, + None => false, + } +} + +/// Manages sequential VRAM swap for low-VRAM CUDA devices. +/// +/// Never holds more than one heavy model in VRAM at a time: +/// Phase 1: SAM on CUDA (~1.26 GB + activations ≈ 1.6 GB peak) +/// Phase 2: CLIP on CUDA (~0.86 GB + activations ≈ 1.1 GB peak) +/// +/// The LM (Q4K, ~950 MB) stays on CUDA permanently because QMatMul +/// (quantized matrix multiply) does not support device transfer. +/// +/// High-VRAM devices (>6 GB) skip this entirely and run normally. +pub(crate) struct SequentialVramSwap { + weights_path: std::path::PathBuf, + cfg: Arc, + dtype: DType, + cuda_device: Device, +} + +impl SequentialVramSwap { + pub(crate) fn new( + weights_path: &Path, + cfg: Arc, + dtype: DType, + cuda_device: &Device, + ) -> Self { + info!("SequentialVramSwap initialized — one vision model at a time on CUDA"); + Self { + weights_path: weights_path.to_path_buf(), + cfg, + dtype, + cuda_device: cuda_device.clone(), + } + } + + /// Load SAM backbone on CUDA from the safetensor file. + /// Caller MUST `drop()` the result before calling `load_clip_on_cuda()` + /// or running LM generation. + pub(crate) fn load_sam_on_cuda(&self) -> Result { + let t0 = Instant::now(); + let vb = unsafe { + VarBuilder::from_mmaped_safetensors( + &[self.weights_path.as_path()], + self.dtype, + &self.cuda_device, + ) + } + .with_context(|| { + format!( + "failed to mmap SAM weights on CUDA from {}", + self.weights_path.display() + ) + })?; + let sam = SamBackbone::new(self.cfg.as_ref(), &vb.pp("model").pp("sam_model")) + .context("failed to load SAM backbone on CUDA")?; + info!( + elapsed = %format!("{:.2}s", t0.elapsed().as_secs_f32()), + "SAM loaded on CUDA" + ); + Ok(sam) + } + + /// Load CLIP vision model on CUDA from the safetensor file. + /// Caller MUST `drop()` the result before calling `load_sam_on_cuda()` + /// or running LM generation. + pub(crate) fn load_clip_on_cuda(&self) -> Result { + let t0 = Instant::now(); + let vb = unsafe { + VarBuilder::from_mmaped_safetensors( + &[self.weights_path.as_path()], + self.dtype, + &self.cuda_device, + ) + } + .with_context(|| { + format!( + "failed to mmap CLIP weights on CUDA from {}", + self.weights_path.display() + ) + })?; + let clip = ClipVisionModel::load(self.cfg.as_ref(), &vb.pp("model").pp("vision_model")) + .context("failed to load CLIP vision model on CUDA")?; + info!( + elapsed = %format!("{:.2}s", t0.elapsed().as_secs_f32()), + "CLIP loaded on CUDA" + ); + Ok(clip) + } + +} diff --git a/crates/infer-deepseek/src/transformer/model.rs b/crates/infer-deepseek/src/transformer/model.rs index 1c6ecc1..949006a 100644 --- a/crates/infer-deepseek/src/transformer/model.rs +++ b/crates/infer-deepseek/src/transformer/model.rs @@ -1,7 +1,7 @@ use std::sync::Arc; use anyhow::{Context, Result, ensure}; -use candle_core::{DType, IndexOp, Tensor, quantized::QMatMul}; +use candle_core::{DType, Device, IndexOp, Tensor, quantized::QMatMul}; use candle_nn::ops::{rms_norm, rms_norm_slow}; use deepseek_ocr_core::tensor::gather_token_embeddings; @@ -12,7 +12,7 @@ use crate::{ transformer::{ cache::{DynamicCache, PromptCacheGuard}, decoder::TransformerDecoder, - weights::{DeepseekLanguageModelWeights, TransformerWeights}, + weights::{DeepseekLanguageModelWeights, RmsNormWeights, TransformerWeights, qmatmul_to_device}, }, }; @@ -112,6 +112,13 @@ impl DeepseekLanguageModel { self.decoder.flash_attention_enabled() } + /// Move the token embedding tensor to the given device. + /// Useful on CUDA to keep the embedding on CPU and save VRAM for activations. + pub fn move_token_embedding_to(&mut self, device: &Device) -> Result<()> { + self.token_embedding = self.token_embedding.to_device(device)?; + Ok(()) + } + /// Lookup token embeddings for the provided input ids. pub fn embed_tokens(&self, input_ids: &Tensor) -> Result { let ids = if input_ids.dtype() == DType::I64 { @@ -276,4 +283,41 @@ impl DeepseekLanguageModel { aux_loss: decoder_out.aux_loss, }) } + + /// Reconstruct this language model with all tensors moved to `device`. + /// Creates a fresh decoder (with empty RoPE cache) on the target device. + pub fn to_device(&self, device: &Device) -> Result { + let weights = DeepseekLanguageModelWeights { + token_embedding: self.token_embedding.to_device(device)?, + transformer: self.transformer_weights.to_device(device)?, + final_layernorm: RmsNormWeights { + weight: self.final_layernorm.to_device(device)?, + }, + lm_head_weight: self + .lm_head_weight + .as_ref() + .map(|w| w.to_device(device)) + .transpose()?, + lm_head_q: self + .lm_head_q + .as_ref() + .map(|qm| Ok::<_, anyhow::Error>(Arc::new(qmatmul_to_device(qm, device)?))) + .transpose()?, + lm_out_dim: self.lm_out_dim, + lm_in_dim: self.lm_in_dim, + lm_head_label: self.lm_head_label.clone(), + }; + let mut model = Self::from_weights(self.cfg.clone(), weights); + model.final_layernorm_f32 = self + .final_layernorm_f32 + .as_ref() + .map(|t| t.to_device(device)) + .transpose()?; + model.lm_head_weight_f32 = self + .lm_head_weight_f32 + .as_ref() + .map(|t| t.to_device(device)) + .transpose()?; + Ok(model) + } } diff --git a/crates/infer-deepseek/src/transformer/weights.rs b/crates/infer-deepseek/src/transformer/weights.rs index c097c81..d371c6c 100644 --- a/crates/infer-deepseek/src/transformer/weights.rs +++ b/crates/infer-deepseek/src/transformer/weights.rs @@ -3,6 +3,11 @@ use std::{ sync::Arc, }; +use anyhow::{Context, Result, ensure}; +use candle_core::{DType, Device, Tensor, quantized::{QMatMul, QStorage, QTensor}}; +use candle_nn::VarBuilder; +use tracing::trace; + use crate::{ config::DeepseekV2Config, quant_snapshot::{ @@ -12,10 +17,6 @@ use crate::{ LinearLayerGroup, QuantModule, QuantizationOutcome, QuantizationState, backend_label, }, }; -use anyhow::{Context, Result, ensure}; -use candle_core::{DType, Tensor, quantized::QMatMul}; -use candle_nn::VarBuilder; -use tracing::trace; /// Fully connected layer weights captured directly from safetensors via [`VarBuilder`]. #[derive(Clone)] @@ -64,7 +65,7 @@ impl LinearWeights { .with_context(|| format!("missing linear weight `{label}`"))? .contiguous()?; let mut weight = Some(weight_init.clone()); - let weight_f32 = if matches!(weight_init.dtype(), DType::F16 | DType::BF16) { + let mut weight_f32 = if matches!(weight_init.dtype(), DType::F16 | DType::BF16) { Some(weight_init.to_dtype(DType::F32)?.contiguous()?) } else { None @@ -101,6 +102,7 @@ impl LinearWeights { bias_tensor = bias; qmatmul = Some(qm); weight = None; + weight_f32 = None; } SnapshotLinear::Float { weight: snapshot_weight, @@ -121,6 +123,7 @@ impl LinearWeights { quant.record_attempt(module, QuantizationOutcome::Fallback); bias_tensor = bias; weight = Some(snapshot_weight); + weight_f32 = None; } } } @@ -637,3 +640,163 @@ pub(crate) fn qualified_name(vb: &VarBuilder, tensor: &str) -> String { } // Runtime quantization path removed: no `maybe_quantize_linear` fallback. + +// --------------------------------------------------------------------------- +// Device transfer: used by the VRAM-swap path to move LM weights between CPU +// and GPU while preserving quantization (QMatMul::QTensor reconstructed via +// raw byte transfer). +// --------------------------------------------------------------------------- + +/// Copy a [`QMatMul`] to another device, preserving its quantized form when +/// possible (the `QTensor` variant gets reconstructed from raw bytes). +pub(crate) fn qmatmul_to_device(qm: &QMatMul, device: &Device) -> Result { + match qm { + QMatMul::QTensor(qt) => { + let data = qt.data()?; + let dtype = qt.dtype(); + let shape = qt.shape().clone(); + let storage = QStorage::from_data(data, device, dtype)?; + let qt_new = QTensor::new(storage, shape)?; + Ok(QMatMul::QTensor(Arc::new(qt_new))) + } + QMatMul::Tensor(t) => Ok(QMatMul::Tensor(t.to_device(device)?)), + QMatMul::TensorF16(t) => Ok(QMatMul::TensorF16(t.to_device(device)?)), + } +} + +impl LinearWeights { + pub fn to_device(&self, device: &Device) -> Result { + Ok(Self { + weight: self + .weight + .as_ref() + .map(|w| w.to_device(device)) + .transpose()?, + weight_f32: self + .weight_f32 + .as_ref() + .map(|w| w.to_device(device)) + .transpose()?, + bias: self + .bias + .as_ref() + .map(|b| b.to_device(device)) + .transpose()?, + qmatmul: self + .qmatmul + .as_ref() + .map(|qm| Ok::<_, anyhow::Error>(Arc::new(qmatmul_to_device(qm, device)?))) + .transpose()?, + out_dim: self.out_dim, + in_dim: self.in_dim, + label: self.label.clone(), + }) + } +} + +impl RmsNormWeights { + pub fn to_device(&self, device: &Device) -> Result { + Ok(Self { + weight: self.weight.to_device(device)?, + }) + } +} + +impl AttentionWeights { + pub fn to_device(&self, device: &Device) -> Result { + Ok(Self { + q_proj: self.q_proj.to_device(device)?, + k_proj: self.k_proj.to_device(device)?, + v_proj: self.v_proj.to_device(device)?, + o_proj: self.o_proj.to_device(device)?, + }) + } +} + +impl DenseMlpWeights { + pub fn to_device(&self, device: &Device) -> Result { + Ok(Self { + gate_proj: self.gate_proj.to_device(device)?, + up_proj: self.up_proj.to_device(device)?, + down_proj: self.down_proj.to_device(device)?, + }) + } +} + +impl MoeWeights { + pub fn to_device(&self, device: &Device) -> Result { + Ok(Self { + gate_weight: self.gate_weight.to_device(device)?, + experts: self + .experts + .iter() + .map(|e| e.to_device(device)) + .collect::>>()?, + shared_experts: self + .shared_experts + .as_ref() + .map(|s| s.to_device(device)) + .transpose()?, + aux_bias: self + .aux_bias + .as_ref() + .map(|b| b.to_device(device)) + .transpose()?, + }) + } +} + +impl MlpWeights { + pub fn to_device(&self, device: &Device) -> Result { + match self { + Self::Dense(d) => Ok(Self::Dense(d.to_device(device)?)), + Self::Moe(m) => Ok(Self::Moe(m.to_device(device)?)), + } + } +} + +impl TransformerBlockWeights { + pub fn to_device(&self, device: &Device) -> Result { + Ok(Self { + attention: self.attention.to_device(device)?, + mlp: self.mlp.to_device(device)?, + input_layernorm: self.input_layernorm.to_device(device)?, + post_attention_layernorm: self.post_attention_layernorm.to_device(device)?, + }) + } +} + +impl TransformerWeights { + pub fn to_device(&self, device: &Device) -> Result { + Ok(Self { + layers: self + .layers + .iter() + .map(|l| l.to_device(device)) + .collect::>>()?, + }) + } +} + +impl DeepseekLanguageModelWeights { + pub fn to_device(&self, device: &Device) -> Result { + Ok(Self { + token_embedding: self.token_embedding.to_device(device)?, + transformer: self.transformer.to_device(device)?, + final_layernorm: self.final_layernorm.to_device(device)?, + lm_head_weight: self + .lm_head_weight + .as_ref() + .map(|w| w.to_device(device)) + .transpose()?, + lm_head_q: self + .lm_head_q + .as_ref() + .map(|qm| Ok::<_, anyhow::Error>(Arc::new(qmatmul_to_device(qm, device)?))) + .transpose()?, + lm_out_dim: self.lm_out_dim, + lm_in_dim: self.lm_in_dim, + lm_head_label: self.lm_head_label.clone(), + }) + } +} diff --git a/crates/infer-deepseek/tests/long_generation_baseline.rs b/crates/infer-deepseek/tests/long_generation_baseline.rs index c111ed3..1e21f5d 100644 --- a/crates/infer-deepseek/tests/long_generation_baseline.rs +++ b/crates/infer-deepseek/tests/long_generation_baseline.rs @@ -253,6 +253,7 @@ fn run_one_baseline(baseline_dir: &Path) -> Result<()> { base_size, image_size, crop_mode, + ..Default::default() }; let outcome = model.decode(