diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3dcad74825..9499cbec8e 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -643,10 +643,12 @@ if(NOT BUILD_CPU_ONLY) src/neighbors/tiered_index.cu src/neighbors/sparse_brute_force.cu src/neighbors/vamana_build_float.cu + src/neighbors/vamana_build_half.cu src/neighbors/vamana_build_uint8.cu src/neighbors/vamana_build_int8.cu src/neighbors/vamana_codebooks_float.cu src/neighbors/vamana_serialize_float.cu + src/neighbors/vamana_serialize_half.cu src/neighbors/vamana_serialize_uint8.cu src/neighbors/vamana_serialize_int8.cu src/preprocessing/quantize/scalar.cu diff --git a/cpp/include/cuvs/neighbors/vamana.hpp b/cpp/include/cuvs/neighbors/vamana.hpp index c3ba86d5b6..a56292c59d 100644 --- a/cpp/include/cuvs/neighbors/vamana.hpp +++ b/cpp/include/cuvs/neighbors/vamana.hpp @@ -477,6 +477,74 @@ auto build(raft::resources const& res, raft::host_matrix_view dataset) -> cuvs::neighbors::vamana::index; +/** + * @brief Build the index from the dataset for efficient DiskANN search. + * + * The build utilities the Vamana insertion-based algorithm to create the graph. The algorithm + * starts with an empty graph and iteratively iserts batches of nodes. Each batch involves + * performing a greedy search for each vector to be inserted, and inserting it with edges to + * all nodes traversed during the search. Reverse edges are also inserted and robustPrune is applied + * to improve graph quality. The index_params struct controls the degree of the final graph. + * + * The following distance metrics are supported: + * - L2 + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters; + * vamana::index_params index_params; + * // create and fill index from a [N, D] dataset; + * auto index = vamana::build(res, index_params, dataset); + * // write index to file to be used by CPU-based DiskANN search (cuVS does not yet support + * search) vamana::serialize(res, filename, index); + * @endcode + * + * @param[in] res + * @param[in] params parameters for building the index + * @param[in] dataset a matrix view (device) to a row-major matrix [n_rows, dim] + * + * @return the constructed vamana index + */ +auto build(raft::resources const& res, + const cuvs::neighbors::vamana::index_params& params, + raft::device_matrix_view dataset) + -> cuvs::neighbors::vamana::index; + +/** + * @brief Build the index from the dataset for efficient DiskANN search. + * + * The build utilities the Vamana insertion-based algorithm to create the graph. The algorithm + * starts with an empty graph and iteratively iserts batches of nodes. Each batch involves + * performing a greedy search for each vector to be inserted, and inserting it with edges to + * all nodes traversed during the search. Reverse edges are also inserted and robustPrune is applied + * to improve graph quality. The index_params struct controls the degree of the final graph. + * + * The following distance metrics are supported: + * - L2 + * + * Usage example: + * @code{.cpp} + * using namespace cuvs::neighbors; + * // use default index parameters; + * vamana::index_params index_params; + * // create and fill index from a [N, D] dataset; + * auto index = vamana::build(res, index_params, dataset); + * // write index to file to be used by CPU-based DiskANN search (cuVS does not yet support + * search) vamana::serialize(res, filename, index); + * @endcode + * + * @param[in] res + * @param[in] params parameters for building the index + * @param[in] dataset a matrix view (host) to a row-major matrix [n_rows, dim] + * + * @return the constructed vamana index + */ +auto build(raft::resources const& res, + const cuvs::neighbors::vamana::index_params& params, + raft::host_matrix_view dataset) + -> cuvs::neighbors::vamana::index; + /** * @} */ @@ -577,6 +645,42 @@ void serialize(raft::resources const& handle, bool include_dataset = true, bool sector_aligned = false); +/** + * Save the index to file. + * + * Matches the file format used by the DiskANN open-source repository, allowing cross-compatibility. + * + * @warning Serializing a half-precision (float16) dataset with `include_dataset=true` produces a + * `.data` file containing raw half values. DiskANN only supports float, int8, and uint8 data types, + * so this file cannot be used with DiskANN search. Similarly, sector-aligned serialization embeds + * half-precision vectors directly into the disk index, which DiskANN search cannot interpret. + * Set `include_dataset` to false if cross-compatibility with DiskANN is required. + * + * @code{.cpp} + * #include + * #include + * + * raft::resources handle; + * + * // create a string with a filepath + * std::string file_prefix("/path/to/index/prefix"); + * // create an index with `auto index = cuvs::neighbors::vamana::build(...);` + * cuvs::neighbors::vamana::serialize(handle, file_prefix, index); + * @endcode + * + * @param[in] handle the raft handle + * @param[in] file_prefix prefix of path and name of index files + * @param[in] index Vamana index + * @param[in] include_dataset whether or not to serialize the dataset + * @param[in] sector_aligned whether output file should be aligned to disk sectors of 4096 bytes + * + */ +void serialize(raft::resources const& handle, + const std::string& file_prefix, + const cuvs::neighbors::vamana::index& index, + bool include_dataset = true, + bool sector_aligned = false); + /** * @} */ diff --git a/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh b/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh index 887c9eb448..fa8af7c745 100644 --- a/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh +++ b/cpp/src/neighbors/detail/vamana/vamana_serialize.cuh @@ -310,6 +310,21 @@ void serialize(raft::resources const& res, bool include_dataset, bool sector_aligned) { + if constexpr (std::is_same_v) { + if (include_dataset) { + RAFT_LOG_WARN( + "Serializing a half-precision (float16) dataset is not compatible with DiskANN search. " + "The serialized .data file uses raw half values with no type metadata, so DiskANN (which " + "only supports float, int8, and uint8) will misinterpret it. " + "Set include_dataset to false if cross-compatibility with DiskANN is required."); + } + if (sector_aligned) { + RAFT_LOG_WARN( + "Sector-aligned serialization embeds half-precision vectors directly into the disk index. " + "DiskANN search does not support half-precision data and will misinterpret these vectors."); + } + } + auto d_graph = index_.graph(); auto h_graph = raft::make_host_matrix(d_graph.extent(0), d_graph.extent(1)); raft::copy(res, h_graph.view(), d_graph); diff --git a/cpp/src/neighbors/vamana_build_half.cu b/cpp/src/neighbors/vamana_build_half.cu new file mode 100644 index 0000000000..817f8c0488 --- /dev/null +++ b/cpp/src/neighbors/vamana_build_half.cu @@ -0,0 +1,33 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "vamana.cuh" +#include +#include + +namespace cuvs::neighbors::vamana { + +#define RAFT_INST_VAMANA_BUILD(T, IdxT) \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::vamana::index_params& params, \ + raft::device_matrix_view dataset) \ + -> cuvs::neighbors::vamana::index \ + { \ + return cuvs::neighbors::vamana::build(handle, params, dataset); \ + } \ + \ + auto build(raft::resources const& handle, \ + const cuvs::neighbors::vamana::index_params& params, \ + raft::host_matrix_view dataset) \ + -> cuvs::neighbors::vamana::index \ + { \ + return cuvs::neighbors::vamana::build(handle, params, dataset); \ + } + +RAFT_INST_VAMANA_BUILD(half, uint32_t); + +#undef RAFT_INST_VAMANA_BUILD + +} // namespace cuvs::neighbors::vamana diff --git a/cpp/src/neighbors/vamana_serialize_half.cu b/cpp/src/neighbors/vamana_serialize_half.cu new file mode 100644 index 0000000000..b9af307841 --- /dev/null +++ b/cpp/src/neighbors/vamana_serialize_half.cu @@ -0,0 +1,14 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include "vamana_serialize.cuh" + +#include + +namespace cuvs::neighbors::vamana { + +CUVS_INST_VAMANA_SERIALIZE(half); + +} // namespace cuvs::neighbors::vamana diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index bbddef87e5..8c4b355683 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -211,8 +211,8 @@ ConfigureTest( ConfigureTest( NAME NEIGHBORS_ANN_VAMANA_TEST - PATH neighbors/ann_vamana/test_float_uint32_t.cu neighbors/ann_vamana/test_int8_t_uint32_t.cu - neighbors/ann_vamana/test_uint8_t_uint32_t.cu + PATH neighbors/ann_vamana/test_float_uint32_t.cu neighbors/ann_vamana/test_half_uint32_t.cu + neighbors/ann_vamana/test_int8_t_uint32_t.cu neighbors/ann_vamana/test_uint8_t_uint32_t.cu GPUS 1 PERCENT 100 FETCH_CODEBOOKS diff --git a/cpp/tests/neighbors/ann_vamana.cuh b/cpp/tests/neighbors/ann_vamana.cuh index 0397c74e1c..266f53d032 100644 --- a/cpp/tests/neighbors/ann_vamana.cuh +++ b/cpp/tests/neighbors/ann_vamana.cuh @@ -1,5 +1,5 @@ /* - * SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION. + * SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION. * SPDX-License-Identifier: Apache-2.0 */ @@ -11,6 +11,7 @@ #include "naive_knn.cuh" +#include #include #include #include @@ -19,7 +20,8 @@ #include #include #include -#include +#include +#include #include #include @@ -27,8 +29,6 @@ #include -#include - #include #include #include @@ -251,7 +251,22 @@ class AnnVamanaTest : public ::testing::TestWithParam { database.resize(((size_t)ps.n_rows) * ps.dim, stream_); search_queries.resize(((size_t)ps.n_queries) * ps.dim, stream_); raft::random::RngState r(1234ULL); - if constexpr (std::is_same{}) { + if constexpr (std::is_same_v) { + rmm::device_uvector database_f(ps.n_rows * ps.dim, stream_); + rmm::device_uvector queries_f(ps.n_queries * ps.dim, stream_); + raft::random::normal(handle_, r, database_f.data(), ps.n_rows * ps.dim, 0.1f, 2.0f); + raft::random::normal(handle_, r, queries_f.data(), ps.n_queries * ps.dim, 0.1f, 2.0f); + auto database_f_view = + raft::make_device_vector_view(database_f.data(), database_f.size()); + auto database_h_view = + raft::make_device_vector_view(database.data(), database.size()); + raft::linalg::map(handle_, database_h_view, raft::cast_op{}, database_f_view); + auto queries_f_view = + raft::make_device_vector_view(queries_f.data(), queries_f.size()); + auto queries_h_view = + raft::make_device_vector_view(search_queries.data(), search_queries.size()); + raft::linalg::map(handle_, queries_h_view, raft::cast_op{}, queries_f_view); + } else if constexpr (std::is_same_v) { raft::random::normal(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0)); raft::random::normal( handle_, r, search_queries.data(), ps.n_queries * ps.dim, DataT(0.1), DataT(2.0)); diff --git a/cpp/tests/neighbors/ann_vamana/test_half_uint32_t.cu b/cpp/tests/neighbors/ann_vamana/test_half_uint32_t.cu new file mode 100644 index 0000000000..d03518e9fc --- /dev/null +++ b/cpp/tests/neighbors/ann_vamana/test_half_uint32_t.cu @@ -0,0 +1,17 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +#include + +#include "../ann_vamana.cuh" + +namespace cuvs::neighbors::vamana { + +typedef AnnVamanaTest AnnVamanaTestF16_U32; +TEST_P(AnnVamanaTestF16_U32, AnnVamana) { this->testVamana(); } + +INSTANTIATE_TEST_CASE_P(AnnVamanaTest, AnnVamanaTestF16_U32, ::testing::ValuesIn(inputs)); + +} // namespace cuvs::neighbors::vamana