Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -643,10 +643,12 @@ if(NOT BUILD_CPU_ONLY)
src/neighbors/tiered_index.cu
src/neighbors/sparse_brute_force.cu
src/neighbors/vamana_build_float.cu
src/neighbors/vamana_build_half.cu
src/neighbors/vamana_build_uint8.cu
src/neighbors/vamana_build_int8.cu
src/neighbors/vamana_codebooks_float.cu
src/neighbors/vamana_serialize_float.cu
src/neighbors/vamana_serialize_half.cu
src/neighbors/vamana_serialize_uint8.cu
src/neighbors/vamana_serialize_int8.cu
src/preprocessing/quantize/scalar.cu
Expand Down
104 changes: 104 additions & 0 deletions cpp/include/cuvs/neighbors/vamana.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,74 @@ auto build(raft::resources const& res,
raft::host_matrix_view<const uint8_t, int64_t, raft::row_major> dataset)
-> cuvs::neighbors::vamana::index<uint8_t, uint32_t>;

/**
* @brief Build the index from the dataset for efficient DiskANN search.
*
* The build utilities the Vamana insertion-based algorithm to create the graph. The algorithm
* starts with an empty graph and iteratively iserts batches of nodes. Each batch involves
* performing a greedy search for each vector to be inserted, and inserting it with edges to
* all nodes traversed during the search. Reverse edges are also inserted and robustPrune is applied
* to improve graph quality. The index_params struct controls the degree of the final graph.
*
* The following distance metrics are supported:
* - L2
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters;
* vamana::index_params index_params;
* // create and fill index from a [N, D] dataset;
* auto index = vamana::build(res, index_params, dataset);
* // write index to file to be used by CPU-based DiskANN search (cuVS does not yet support
* search) vamana::serialize(res, filename, index);
* @endcode
*
* @param[in] res
* @param[in] params parameters for building the index
* @param[in] dataset a matrix view (device) to a row-major matrix [n_rows, dim]
*
* @return the constructed vamana index
*/
auto build(raft::resources const& res,
const cuvs::neighbors::vamana::index_params& params,
raft::device_matrix_view<const half, int64_t, raft::row_major> dataset)
-> cuvs::neighbors::vamana::index<half, uint32_t>;

/**
* @brief Build the index from the dataset for efficient DiskANN search.
*
* The build utilities the Vamana insertion-based algorithm to create the graph. The algorithm
* starts with an empty graph and iteratively iserts batches of nodes. Each batch involves
* performing a greedy search for each vector to be inserted, and inserting it with edges to
* all nodes traversed during the search. Reverse edges are also inserted and robustPrune is applied
* to improve graph quality. The index_params struct controls the degree of the final graph.
*
* The following distance metrics are supported:
* - L2
*
* Usage example:
* @code{.cpp}
* using namespace cuvs::neighbors;
* // use default index parameters;
* vamana::index_params index_params;
* // create and fill index from a [N, D] dataset;
* auto index = vamana::build(res, index_params, dataset);
* // write index to file to be used by CPU-based DiskANN search (cuVS does not yet support
* search) vamana::serialize(res, filename, index);
* @endcode
*
* @param[in] res
* @param[in] params parameters for building the index
* @param[in] dataset a matrix view (host) to a row-major matrix [n_rows, dim]
*
* @return the constructed vamana index
*/
auto build(raft::resources const& res,
const cuvs::neighbors::vamana::index_params& params,
raft::host_matrix_view<const half, int64_t, raft::row_major> dataset)
-> cuvs::neighbors::vamana::index<half, uint32_t>;

/**
* @}
*/
Expand Down Expand Up @@ -577,6 +645,42 @@ void serialize(raft::resources const& handle,
bool include_dataset = true,
bool sector_aligned = false);

/**
* Save the index to file.
*
* Matches the file format used by the DiskANN open-source repository, allowing cross-compatibility.
*
* @warning Serializing a half-precision (float16) dataset with `include_dataset=true` produces a
* `.data` file containing raw half values. DiskANN only supports float, int8, and uint8 data types,
* so this file cannot be used with DiskANN search. Similarly, sector-aligned serialization embeds
* half-precision vectors directly into the disk index, which DiskANN search cannot interpret.
* Set `include_dataset` to false if cross-compatibility with DiskANN is required.
*
* @code{.cpp}
* #include <raft/core/resources.hpp>
* #include <cuvs/neighbors/vamana.hpp>
*
* raft::resources handle;
*
* // create a string with a filepath
* std::string file_prefix("/path/to/index/prefix");
* // create an index with `auto index = cuvs::neighbors::vamana::build(...);`
* cuvs::neighbors::vamana::serialize(handle, file_prefix, index);
* @endcode
*
* @param[in] handle the raft handle
* @param[in] file_prefix prefix of path and name of index files
* @param[in] index Vamana index
* @param[in] include_dataset whether or not to serialize the dataset
* @param[in] sector_aligned whether output file should be aligned to disk sectors of 4096 bytes
*
*/
void serialize(raft::resources const& handle,
const std::string& file_prefix,
const cuvs::neighbors::vamana::index<half, uint32_t>& index,
bool include_dataset = true,
bool sector_aligned = false);

/**
* @}
*/
Expand Down
15 changes: 15 additions & 0 deletions cpp/src/neighbors/detail/vamana/vamana_serialize.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,21 @@ void serialize(raft::resources const& res,
bool include_dataset,
bool sector_aligned)
{
if constexpr (std::is_same_v<T, half>) {
if (include_dataset) {
RAFT_LOG_WARN(
"Serializing a half-precision (float16) dataset is not compatible with DiskANN search. "
"The serialized .data file uses raw half values with no type metadata, so DiskANN (which "
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are you sure there is no type metadata? Does DiskANN break with a "half" .data file or does it give garbage results? Just want to make sure that the wording of the warning is accurate.

"only supports float, int8, and uint8) will misinterpret it. "
"Set include_dataset to false if cross-compatibility with DiskANN is required.");
}
if (sector_aligned) {
RAFT_LOG_WARN(
"Sector-aligned serialization embeds half-precision vectors directly into the disk index. "
"DiskANN search does not support half-precision data and will misinterpret these vectors.");
}
}

auto d_graph = index_.graph();
auto h_graph = raft::make_host_matrix<IdxT, int64_t>(d_graph.extent(0), d_graph.extent(1));
raft::copy(res, h_graph.view(), d_graph);
Expand Down
33 changes: 33 additions & 0 deletions cpp/src/neighbors/vamana_build_half.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

#include "vamana.cuh"
#include <cuda_fp16.h>
#include <cuvs/neighbors/vamana.hpp>

namespace cuvs::neighbors::vamana {

#define RAFT_INST_VAMANA_BUILD(T, IdxT) \
auto build(raft::resources const& handle, \
const cuvs::neighbors::vamana::index_params& params, \
raft::device_matrix_view<const T, int64_t, raft::row_major> dataset) \
-> cuvs::neighbors::vamana::index<T, IdxT> \
{ \
return cuvs::neighbors::vamana::build<T, IdxT>(handle, params, dataset); \
} \
\
auto build(raft::resources const& handle, \
const cuvs::neighbors::vamana::index_params& params, \
raft::host_matrix_view<const T, int64_t, raft::row_major> dataset) \
-> cuvs::neighbors::vamana::index<T, IdxT> \
{ \
return cuvs::neighbors::vamana::build<T, IdxT>(handle, params, dataset); \
}

RAFT_INST_VAMANA_BUILD(half, uint32_t);

#undef RAFT_INST_VAMANA_BUILD

} // namespace cuvs::neighbors::vamana
14 changes: 14 additions & 0 deletions cpp/src/neighbors/vamana_serialize_half.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

#include "vamana_serialize.cuh"

#include <cuda_fp16.h>

namespace cuvs::neighbors::vamana {

CUVS_INST_VAMANA_SERIALIZE(half);

} // namespace cuvs::neighbors::vamana
4 changes: 2 additions & 2 deletions cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -211,8 +211,8 @@ ConfigureTest(

ConfigureTest(
NAME NEIGHBORS_ANN_VAMANA_TEST
PATH neighbors/ann_vamana/test_float_uint32_t.cu neighbors/ann_vamana/test_int8_t_uint32_t.cu
neighbors/ann_vamana/test_uint8_t_uint32_t.cu
PATH neighbors/ann_vamana/test_float_uint32_t.cu neighbors/ann_vamana/test_half_uint32_t.cu
neighbors/ann_vamana/test_int8_t_uint32_t.cu neighbors/ann_vamana/test_uint8_t_uint32_t.cu
GPUS 1
PERCENT 100
FETCH_CODEBOOKS
Expand Down
25 changes: 20 additions & 5 deletions cpp/tests/neighbors/ann_vamana.cuh
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2024-2025, NVIDIA CORPORATION.
* SPDX-FileCopyrightText: Copyright (c) 2024-2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

Expand All @@ -11,6 +11,7 @@

#include "naive_knn.cuh"

#include <cuda_fp16.h>
#include <cuvs/distance/distance.hpp>
#include <cuvs/neighbors/cagra.hpp>
#include <cuvs/neighbors/vamana.hpp>
Expand All @@ -19,16 +20,15 @@
#include <raft/core/host_mdarray.hpp>
#include <raft/core/host_mdspan.hpp>
#include <raft/core/logger.hpp>
#include <raft/linalg/add.cuh>
#include <raft/core/operators.hpp>
#include <raft/linalg/map.cuh>
#include <raft/random/rng.cuh>
#include <raft/util/itertools.hpp>

#include <rmm/device_buffer.hpp>

#include <gtest/gtest.h>

#include <thrust/sequence.h>

#include <cstddef>
#include <filesystem>
#include <iostream>
Expand Down Expand Up @@ -251,7 +251,22 @@ class AnnVamanaTest : public ::testing::TestWithParam<AnnVamanaInputs> {
database.resize(((size_t)ps.n_rows) * ps.dim, stream_);
search_queries.resize(((size_t)ps.n_queries) * ps.dim, stream_);
raft::random::RngState r(1234ULL);
if constexpr (std::is_same<DataT, float>{}) {
if constexpr (std::is_same_v<DataT, half>) {
rmm::device_uvector<float> database_f(ps.n_rows * ps.dim, stream_);
rmm::device_uvector<float> queries_f(ps.n_queries * ps.dim, stream_);
raft::random::normal(handle_, r, database_f.data(), ps.n_rows * ps.dim, 0.1f, 2.0f);
raft::random::normal(handle_, r, queries_f.data(), ps.n_queries * ps.dim, 0.1f, 2.0f);
auto database_f_view =
raft::make_device_vector_view<const float, int64_t>(database_f.data(), database_f.size());
auto database_h_view =
raft::make_device_vector_view<half, int64_t>(database.data(), database.size());
raft::linalg::map(handle_, database_h_view, raft::cast_op<half>{}, database_f_view);
auto queries_f_view =
raft::make_device_vector_view<const float, int64_t>(queries_f.data(), queries_f.size());
auto queries_h_view =
raft::make_device_vector_view<half, int64_t>(search_queries.data(), search_queries.size());
raft::linalg::map(handle_, queries_h_view, raft::cast_op<half>{}, queries_f_view);
} else if constexpr (std::is_same_v<DataT, float>) {
raft::random::normal(handle_, r, database.data(), ps.n_rows * ps.dim, DataT(0.1), DataT(2.0));
raft::random::normal(
handle_, r, search_queries.data(), ps.n_queries * ps.dim, DataT(0.1), DataT(2.0));
Expand Down
17 changes: 17 additions & 0 deletions cpp/tests/neighbors/ann_vamana/test_half_uint32_t.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION.
* SPDX-License-Identifier: Apache-2.0
*/

#include <gtest/gtest.h>

#include "../ann_vamana.cuh"

namespace cuvs::neighbors::vamana {

typedef AnnVamanaTest<float, half, std::uint32_t> AnnVamanaTestF16_U32;
TEST_P(AnnVamanaTestF16_U32, AnnVamana) { this->testVamana(); }

INSTANTIATE_TEST_CASE_P(AnnVamanaTest, AnnVamanaTestF16_U32, ::testing::ValuesIn(inputs));

} // namespace cuvs::neighbors::vamana
Loading