rapidsai · tarang-jain · Apr 10, 2026 · Apr 13, 2026 · Apr 13, 2026 · Apr 13, 2026
@@ -39,6 +39,8 @@ typedef enum {
 
 /**
  * @brief Hyper-parameters for the kmeans algorithm
+ * NB: The inertia_check field is kept for ABI compatibility. Removed in cuvsKMeansParams_v2.
+ * TODO: CalVer for the replacement: 26.08
  */
 struct cuvsKMeansParams {
   cuvsDistanceType metric;
@@ -91,7 +93,7 @@ struct cuvsKMeansParams {
    */
   int batch_centroids;
 
-  /** Check inertia during iterations for early convergence. */
+  /** Deprecated, ignored. Kept for ABI compatibility. */
   bool inertia_check;
 
   /**
@@ -108,14 +110,104 @@ struct cuvsKMeansParams {
    * Number of samples to process per GPU batch for the batched (host-data) API.
    * When set to 0, defaults to n_samples (process all at once).
    */
-   int64_t streaming_batch_size;
+  int64_t streaming_batch_size;
+
+  /**
+   * Number of samples to draw for KMeansPlusPlus initialization.
+   * When set to 0, uses heuristic min(3 * n_clusters, n_samples) for host data,
+   * or n_samples for device data.
+   */
+  int64_t init_size;
+};
+
+/**
+ * @brief Hyper-parameters for the kmeans algorithm
+ * TODO: Remove this after cuvsKMeansParams is replaced in ABI 2.0
+ */
+ struct cuvsKMeansParams_v2 {
+  cuvsDistanceType metric;
+
+  /**
+   * The number of clusters to form as well as the number of centroids to generate (default:8).
+   */
+  int n_clusters;
+
+  /**
+   * Method for initialization, defaults to k-means++:
+   *  - cuvsKMeansInitMethod::KMeansPlusPlus (k-means++): Use scalable k-means++ algorithm
+   * to select the initial cluster centers.
+   *  - cuvsKMeansInitMethod::Random (random): Choose 'n_clusters' observations (rows) at
+   * random from the input data for the initial centroids.
+   *  - cuvsKMeansInitMethod::Array (ndarray): Use 'centroids' as initial cluster centers.
+   */
+  cuvsKMeansInitMethod init;
+
+  /**
+   * Maximum number of iterations of the k-means algorithm for a single run.
+   */
+  int max_iter;
+
+  /**
+   * Relative tolerance with regards to inertia to declare convergence.
+   */
+  double tol;
+
+  /**
+   * Number of instance k-means algorithm will be run with different seeds.
+   */
+  int n_init;
+
+  /**
+   * Oversampling factor for use in the k-means|| algorithm
+   */
+  double oversampling_factor;
+
+  /**
+   * batch_samples and batch_centroids are used to tile 1NN computation which is
+   * useful to optimize/control the memory footprint
+   * Default tile is [batch_samples x n_clusters] i.e. when batch_centroids is 0
+   * then don't tile the centroids
+   */
+  int batch_samples;
+
+  /**
+   * if 0 then batch_centroids = n_clusters
+   */
+  int batch_centroids;
+
+  /**
+   * Whether to use hierarchical (balanced) kmeans or not
+   */
+  bool hierarchical;
+
+  /**
+   * For hierarchical k-means , defines the number of training iterations
+   */
+  int hierarchical_n_iters;
+
+  /**
+   * Number of samples to process per GPU batch for the batched (host-data) API.
+   * When set to 0, defaults to n_samples (process all at once).
+   */
+  int64_t streaming_batch_size;
+
+  /**
+   * Number of samples to draw for KMeansPlusPlus initialization.
+   * When set to 0, uses heuristic min(3 * n_clusters, n_samples) for host data,
+   * or n_samples for device data.
+   */
+  int64_t init_size;
 };
 
 typedef struct cuvsKMeansParams* cuvsKMeansParams_t;
+typedef struct cuvsKMeansParams_v2* cuvsKMeansParams_v2_t;
 
 /**
  * @brief Allocate KMeans params, and populate with default values
  *
+ * @note In cuVS 26.08 (next ABI major version) this signature will be
+ * replaced by cuvsKMeansParamsCreate_v2.
+ *
  * @param[in] params cuvsKMeansParams_t to allocate
  * @return cuvsError_t
  */
@@ -124,11 +216,33 @@ cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params);
 /**
  * @brief De-allocate KMeans params
  *
+ * @note In cuVS 26.08 (next ABI major version) this signature will be
+ * replaced by cuvsKMeansParamsDestroy_v2.
+ *
  * @param[in] params
  * @return cuvsError_t
  */
 cuvsError_t cuvsKMeansParamsDestroy(cuvsKMeansParams_t params);
 
+/**
+ * @brief Allocate KMeans params
+ *
+ * Mirrors cuvsKMeansParamsCreate but operates on cuvsKMeansParams_v2.
+ * Will become the unsuffixed cuvsKMeansParamsCreate in cuVS 26.08.
+ *
+ * @param[in] params cuvsKMeansParams_v2_t to allocate
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsKMeansParamsCreate_v2(cuvsKMeansParams_v2_t* params);
+
+/**
+ * @brief De-allocate KMeans params allocated by cuvsKMeansParamsCreate_v2.
+ *
+ * @param[in] params
+ * @return cuvsError_t
+ */
+cuvsError_t cuvsKMeansParamsDestroy_v2(cuvsKMeansParams_v2_t params);
+
 /**
  * @brief Type of k-means algorithm.
  */
@@ -154,6 +268,9 @@ typedef enum { CUVS_KMEANS_TYPE_KMEANS = 0, CUVS_KMEANS_TYPE_KMEANS_BALANCED = 1
  *   When X is on the host the data is streamed to the GPU in
  *   batches controlled by params->streaming_batch_size.
  *
+ * @note In cuVS 26.08 (next ABI major version) this signature will be
+ * replaced by cuvsKMeansFit_v2.
+ *
  * @param[in]     res           opaque C handle
  * @param[in]     params        Parameters for KMeans model.
  * @param[in]     X             Training instances to cluster. The data must
@@ -181,9 +298,45 @@ cuvsError_t cuvsKMeansFit(cuvsResources_t res,
                           double* inertia,
                           int* n_iter);
 
+/**
+ * @brief Find clusters with k-means algorithm (v2 params layout).
+ *
+ * Mirrors cuvsKMeansFit but takes cuvsKMeansParams_v2_t. Will become the
+ * unsuffixed cuvsKMeansFit in cuVS 26.08.
+ *
+ * @param[in]     res           opaque C handle
+ * @param[in]     params        Parameters for KMeans model (v2 layout).
+ * @param[in]     X             Training instances to cluster. The data must
+ *                              be in row-major format. May be on host or
+ *                              device memory.
+ *                              [dim = n_samples x n_features]
+ * @param[in]     sample_weight Optional weights for each observation in X.
+ *                              Must be on the same memory space as X.
+ *                              [len = n_samples]
+ * @param[inout]  centroids     [in] When init is InitMethod::Array, use
+ *                              centroids as the initial cluster centers.
+ *                              [out] The generated centroids from the
+ *                              kmeans algorithm are stored at the address
+ *                              pointed by 'centroids'. Must be on device.
+ *                              [dim = n_clusters x n_features]
+ * @param[out]    inertia       Sum of squared distances of samples to their
+ *                              closest cluster center.
+ * @param[out]    n_iter        Number of iterations run.
+ */
+cuvsError_t cuvsKMeansFit_v2(cuvsResources_t res,
+                             cuvsKMeansParams_v2_t params,
+                             DLManagedTensor* X,
+                             DLManagedTensor* sample_weight,
+                             DLManagedTensor* centroids,
+                             double* inertia,
+                             int* n_iter);
+
 /**
  * @brief Predict the closest cluster each sample in X belongs to.
  *
+ * @note In cuVS 26.08 (next ABI major version) this signature will be
+ * replaced by cuvsKMeansPredict_v2.
+ *
  * @param[in]     res              opaque C handle
  * @param[in]     params           Parameters for KMeans model.
  * @param[in]     X                New data to predict.
@@ -209,6 +362,37 @@ cuvsError_t cuvsKMeansPredict(cuvsResources_t res,
                               bool normalize_weight,
                               double* inertia);
 
+/**
+ * @brief Predict the closest cluster each sample in X belongs to (v2 params layout).
+ *
+ * Mirrors cuvsKMeansPredict but takes cuvsKMeansParams_v2_t. Will become the
+ * unsuffixed cuvsKMeansPredict in cuVS 26.08.
+ *
+ * @param[in]     res              opaque C handle
+ * @param[in]     params           Parameters for KMeans model (v2 layout).
+ * @param[in]     X                New data to predict.
+ *                                 [dim = n_samples x n_features]
+ * @param[in]     sample_weight    Optional weights for each observation in X.
+ *                                 [len = n_samples]
+ * @param[in]     centroids        Cluster centroids. The data must be in
+ *                                 row-major format.
+ *                                 [dim = n_clusters x n_features]
+ * @param[in]     normalize_weight True if the weights should be normalized
+ * @param[out]    labels           Index of the cluster each sample in X
+ *                                 belongs to.
+ *                                 [len = n_samples]
+ * @param[out]    inertia          Sum of squared distances of samples to
+ *                                 their closest cluster center.
+ */
+cuvsError_t cuvsKMeansPredict_v2(cuvsResources_t res,
+                                 cuvsKMeansParams_v2_t params,
+                                 DLManagedTensor* X,
+                                 DLManagedTensor* sample_weight,
+                                 DLManagedTensor* centroids,
+                                 DLManagedTensor* labels,
+                                 bool normalize_weight,
+                                 double* inertia);
+
 /**
  * @brief Compute cluster cost
  *

@@ -16,7 +16,9 @@
 
 namespace {
 
-cuvs::cluster::kmeans::params convert_params(const cuvsKMeansParams& params)
+// The conversions are templated on the C struct type and reused by both API surfaces.
+template <typename ParamsT>
+cuvs::cluster::kmeans::params convert_params(const ParamsT& params)
 {
   auto kmeans_params                = cuvs::cluster::kmeans::params();
   kmeans_params.metric              = static_cast<cuvs::distance::DistanceType>(params.metric);
@@ -28,22 +30,23 @@ cuvs::cluster::kmeans::params convert_params(const cuvsKMeansParams& params)
   kmeans_params.oversampling_factor = params.oversampling_factor;
   kmeans_params.batch_samples       = params.batch_samples;
   kmeans_params.batch_centroids     = params.batch_centroids;
-  kmeans_params.inertia_check       = params.inertia_check;
+  kmeans_params.init_size             = params.init_size;
   kmeans_params.streaming_batch_size  = params.streaming_batch_size;
   return kmeans_params;
 }
 
-cuvs::cluster::kmeans::balanced_params convert_balanced_params(const cuvsKMeansParams& params)
+template <typename ParamsT>
+cuvs::cluster::kmeans::balanced_params convert_balanced_params(const ParamsT& params)
 {
   auto kmeans_params    = cuvs::cluster::kmeans::balanced_params();
   kmeans_params.metric  = static_cast<cuvs::distance::DistanceType>(params.metric);
   kmeans_params.n_iters = params.hierarchical_n_iters;
   return kmeans_params;
 }
 
-template <typename T, typename IdxT = int64_t>
+template <typename T, typename ParamsT, typename IdxT = int64_t>
 void _fit(cuvsResources_t res,
-          const cuvsKMeansParams& params,
+          const ParamsT& params,
           DLManagedTensor* X_tensor,
           DLManagedTensor* sample_weight_tensor,
           DLManagedTensor* centroids_tensor,
@@ -140,9 +143,9 @@ void _fit(cuvsResources_t res,
   }
 }
 
-template <typename T, typename IdxT = int32_t, typename LabelsT = int32_t>
+template <typename T, typename ParamsT, typename IdxT = int32_t, typename LabelsT = int32_t>
 void _predict(cuvsResources_t res,
-              const cuvsKMeansParams& params,
+              const ParamsT& params,
               DLManagedTensor* X_tensor,
               DLManagedTensor* sample_weight_tensor,
               DLManagedTensor* centroids_tensor,
@@ -237,10 +240,11 @@ extern "C" cuvsError_t cuvsKMeansParamsCreate(cuvsKMeansParams_t* params)
       .oversampling_factor  = cpp_params.oversampling_factor,
       .batch_samples        = cpp_params.batch_samples,
       .batch_centroids      = cpp_params.batch_centroids,
-      .inertia_check        = cpp_params.inertia_check,
+      .inertia_check        = false,
       .hierarchical         = false,
       .hierarchical_n_iters = static_cast<int>(cpp_balanced_params.n_iters),
-      .streaming_batch_size           = cpp_params.streaming_batch_size};
+      .streaming_batch_size = cpp_params.streaming_batch_size,
+      .init_size            = cpp_params.init_size};
   });
 }
 
@@ -294,6 +298,79 @@ extern "C" cuvsError_t cuvsKMeansPredict(cuvsResources_t res,
   });
 }
 
+extern "C" cuvsError_t cuvsKMeansParamsCreate_v2(cuvsKMeansParams_v2_t* params)
+{
+  return cuvs::core::translate_exceptions([=] {
+    cuvs::cluster::kmeans::params cpp_params;
+    cuvs::cluster::kmeans::balanced_params cpp_balanced_params;
+    *params = new cuvsKMeansParams_v2{
+      .metric               = static_cast<cuvsDistanceType>(cpp_params.metric),
+      .n_clusters           = cpp_params.n_clusters,
+      .init                 = static_cast<cuvsKMeansInitMethod>(cpp_params.init),
+      .max_iter             = cpp_params.max_iter,
+      .tol                  = cpp_params.tol,
+      .n_init               = cpp_params.n_init,
+      .oversampling_factor  = cpp_params.oversampling_factor,
+      .batch_samples        = cpp_params.batch_samples,
+      .batch_centroids      = cpp_params.batch_centroids,
+      .hierarchical         = false,
+      .hierarchical_n_iters = static_cast<int>(cpp_balanced_params.n_iters),
+      .streaming_batch_size = cpp_params.streaming_batch_size,
+      .init_size            = cpp_params.init_size};
+  });
+}
+
+extern "C" cuvsError_t cuvsKMeansParamsDestroy_v2(cuvsKMeansParams_v2_t params)
+{
+  return cuvs::core::translate_exceptions([=] { delete params; });
+}
+
+extern "C" cuvsError_t cuvsKMeansFit_v2(cuvsResources_t res,
+                                        cuvsKMeansParams_v2_t params,
+                                        DLManagedTensor* X,
+                                        DLManagedTensor* sample_weight,
+                                        DLManagedTensor* centroids,
+                                        double* inertia,
+                                        int* n_iter)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto dataset = X->dl_tensor;
+    if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) {
+      _fit<float>(res, *params, X, sample_weight, centroids, inertia, n_iter);
+    } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 64) {
+      _fit<double>(res, *params, X, sample_weight, centroids, inertia, n_iter);
+    } else {
+      RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d",
+                dataset.dtype.code,
+                dataset.dtype.bits);
+    }
+  });
+}
+
+extern "C" cuvsError_t cuvsKMeansPredict_v2(cuvsResources_t res,
+                                            cuvsKMeansParams_v2_t params,
+                                            DLManagedTensor* X,
+                                            DLManagedTensor* sample_weight,
+                                            DLManagedTensor* centroids,
+                                            DLManagedTensor* labels,
+                                            bool normalize_weight,
+                                            double* inertia)
+{
+  return cuvs::core::translate_exceptions([=] {
+    auto dataset = X->dl_tensor;
+    if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 32) {
+      _predict<float>(res, *params, X, sample_weight, centroids, labels, normalize_weight, inertia);
+    } else if (dataset.dtype.code == kDLFloat && dataset.dtype.bits == 64) {
+      _predict<double>(
+        res, *params, X, sample_weight, centroids, labels, normalize_weight, inertia);
+    } else {
+      RAFT_FAIL("Unsupported dataset DLtensor dtype: %d and bits: %d",
+                dataset.dtype.code,
+                dataset.dtype.bits);
+    }
+  });
+}
+
 extern "C" cuvsError_t cuvsKMeansClusterCost(cuvsResources_t res,
                                              DLManagedTensor* X,
                                              DLManagedTensor* centroids,