From 54351f8aa3c3431f97008603c383c60c3cb79a65 Mon Sep 17 00:00:00 2001 From: jamie8johnson Date: Tue, 9 Jun 2026 19:11:27 -0500 Subject: [PATCH 1/3] feat(rust): add scalar quantizer bindings Add idiomatic Rust bindings for the scalar quantizer preprocessing API (cuvsScalarQuantizer*). Introduces a new preprocessing module tree under rust/cuvs/src/ with only the scalar path wired up; binary and PQ quantizers are intentionally left for follow-up contributions. Wraps the full lifecycle with RAII handle types and balanced Drop: - ScalarQuantizerParams (Create/Destroy, set_quantile builder) - Quantizer (Create/Destroy) with train, transform, inverse_transform Adds an IntoDtype impl for i8 in dlpack.rs so int8 quantized tensors can be passed through ManagedTensor. The cuvsScalarQuantizer* FFI symbols are already present in the checked-in bindings (reachable via core/all.h), so bindings.rs is unchanged and no all.h edit was required. Tests (CUDA_VISIBLE_DEVICES=1, single-threaded): params setter, a train -> transform -> inverse_transform roundtrip asserting reconstruction within quantization tolerance (observed max abs error ~0.0196 on a data range of 10), and an unsupported-dtype error path. cargo fmt and clippy clean for new code. --- rust/cuvs/src/dlpack.rs | 6 + rust/cuvs/src/lib.rs | 1 + rust/cuvs/src/preprocessing/mod.rs | 11 + rust/cuvs/src/preprocessing/quantize/mod.rs | 54 ++++ .../cuvs/src/preprocessing/quantize/scalar.rs | 268 ++++++++++++++++++ 5 files changed, 340 insertions(+) create mode 100644 rust/cuvs/src/preprocessing/mod.rs create mode 100644 rust/cuvs/src/preprocessing/quantize/mod.rs create mode 100644 rust/cuvs/src/preprocessing/quantize/scalar.rs diff --git a/rust/cuvs/src/dlpack.rs b/rust/cuvs/src/dlpack.rs index 1687f88d17..df5de87c3e 100644 --- a/rust/cuvs/src/dlpack.rs +++ b/rust/cuvs/src/dlpack.rs @@ -133,6 +133,12 @@ impl IntoDtype for f64 { } } +impl IntoDtype for i8 { + fn ffi_dtype() -> ffi::DLDataType { + ffi::DLDataType { code: ffi::DLDataTypeCode::kDLInt as _, bits: 8, lanes: 1 } + } +} + impl IntoDtype for i32 { fn ffi_dtype() -> ffi::DLDataType { ffi::DLDataType { code: ffi::DLDataTypeCode::kDLInt as _, bits: 32, lanes: 1 } diff --git a/rust/cuvs/src/lib.rs b/rust/cuvs/src/lib.rs index 519519440b..3429715c5f 100644 --- a/rust/cuvs/src/lib.rs +++ b/rust/cuvs/src/lib.rs @@ -18,6 +18,7 @@ mod dlpack; mod error; pub mod ivf_flat; pub mod ivf_pq; +pub mod preprocessing; mod resources; pub mod vamana; diff --git a/rust/cuvs/src/preprocessing/mod.rs b/rust/cuvs/src/preprocessing/mod.rs new file mode 100644 index 0000000000..1fc3db74d1 --- /dev/null +++ b/rust/cuvs/src/preprocessing/mod.rs @@ -0,0 +1,11 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +//! Preprocessing utilities for cuVS datasets. +//! +//! Currently this exposes the [`quantize`] module, which provides quantizers +//! that compress floating-point datasets into more compact representations. + +pub mod quantize; diff --git a/rust/cuvs/src/preprocessing/quantize/mod.rs b/rust/cuvs/src/preprocessing/quantize/mod.rs new file mode 100644 index 0000000000..c3dd2d9d03 --- /dev/null +++ b/rust/cuvs/src/preprocessing/quantize/mod.rs @@ -0,0 +1,54 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +//! Dataset quantizers. +//! +//! Quantizers compress a floating-point dataset into a more compact +//! representation. The [`scalar`] quantizer maps an interval of the input +//! float range onto the full range of an 8-bit integer. +//! +//! The binary and product (PQ) quantizers exposed by the cuVS C API are not +//! yet wrapped in Rust; they are intended to be added in follow-up +//! contributions. +//! +//! Example: +//! ``` +//! use cuvs::preprocessing::quantize::scalar::{Quantizer, ScalarQuantizerParams}; +//! use cuvs::{ManagedTensor, Resources, Result}; +//! +//! use ndarray_rand::rand_distr::Uniform; +//! use ndarray_rand::RandomExt; +//! +//! fn scalar_quantize_example() -> Result<()> { +//! let res = Resources::new()?; +//! +//! // Create a new random dataset to quantize +//! let n_rows = 1024; +//! let n_cols = 16; +//! let dataset = +//! ndarray::Array::::random((n_rows, n_cols), Uniform::new(0., 1.0)); +//! let dataset_device = ManagedTensor::from(&dataset).to_device(&res)?; +//! +//! // Train a scalar quantizer on the dataset +//! let params = ScalarQuantizerParams::new()?; +//! let quantizer = Quantizer::train(&res, ¶ms, &dataset_device)?; +//! +//! // Quantize the dataset into int8 +//! let mut quantized_host = ndarray::Array::::zeros((n_rows, n_cols)); +//! let quantized = ManagedTensor::from(&quantized_host).to_device(&res)?; +//! quantizer.transform(&res, &dataset_device, &quantized)?; +//! quantized.to_host(&res, &mut quantized_host)?; +//! +//! // Reconstruct an approximation of the original f32 dataset +//! let mut reconstructed_host = ndarray::Array::::zeros((n_rows, n_cols)); +//! let reconstructed = ManagedTensor::from(&reconstructed_host).to_device(&res)?; +//! quantizer.inverse_transform(&res, &quantized, &reconstructed)?; +//! reconstructed.to_host(&res, &mut reconstructed_host)?; +//! +//! Ok(()) +//! } +//! ``` + +pub mod scalar; diff --git a/rust/cuvs/src/preprocessing/quantize/scalar.rs b/rust/cuvs/src/preprocessing/quantize/scalar.rs new file mode 100644 index 0000000000..249489244e --- /dev/null +++ b/rust/cuvs/src/preprocessing/quantize/scalar.rs @@ -0,0 +1,268 @@ +/* + * SPDX-FileCopyrightText: Copyright (c) 2026, NVIDIA CORPORATION. + * SPDX-License-Identifier: Apache-2.0 + */ + +//! Scalar quantizer. +//! +//! The scalar quantizer performs a linear mapping of an interval of the input +//! float range onto the full range of an 8-bit integer. The interval is +//! derived during [`Quantizer::train`] from the dataset, optionally clipping a +//! configurable fraction of outliers (see +//! [`ScalarQuantizerParams::set_quantile`]). + +use std::fmt; +use std::io::{Write, stderr}; + +use crate::dlpack::ManagedTensor; +use crate::error::{Result, check_cuvs}; +use crate::resources::Resources; + +/// Parameters controlling how a [`Quantizer`] is trained. +pub struct ScalarQuantizerParams(pub ffi::cuvsScalarQuantizerParams_t); + +impl ScalarQuantizerParams { + /// Returns a new `ScalarQuantizerParams` populated with default values. + pub fn new() -> Result { + unsafe { + let mut params = std::mem::MaybeUninit::::uninit(); + check_cuvs(ffi::cuvsScalarQuantizerParamsCreate(params.as_mut_ptr()))?; + Ok(ScalarQuantizerParams(params.assume_init())) + } + } + + /// Sets the fraction of the data that is kept once outliers at the top and + /// bottom of the distribution have been ignored. + /// + /// Must be within the range `(0, 1]`. The default is `0.99`. + pub fn set_quantile(self, quantile: f32) -> ScalarQuantizerParams { + unsafe { + (*self.0).quantile = quantile; + } + self + } +} + +impl fmt::Debug for ScalarQuantizerParams { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + // custom debug impl: the default would just print the raw pointer + write!(f, "ScalarQuantizerParams({:?})", unsafe { *self.0 }) + } +} + +impl Drop for ScalarQuantizerParams { + fn drop(&mut self) { + if let Err(e) = check_cuvs(unsafe { ffi::cuvsScalarQuantizerParamsDestroy(self.0) }) { + write!(stderr(), "failed to call cuvsScalarQuantizerParamsDestroy {:?}", e) + .expect("failed to write to stderr"); + } + } +} + +/// A trained scalar quantizer. +/// +/// Build one with [`Quantizer::train`], then use [`Quantizer::transform`] to +/// quantize a float dataset into int8 and [`Quantizer::inverse_transform`] to +/// reconstruct an approximation of the original float values. +#[derive(Debug)] +pub struct Quantizer(ffi::cuvsScalarQuantizer_t); + +impl Quantizer { + /// Creates a new, untrained quantizer. + fn new() -> Result { + unsafe { + let mut quantizer = std::mem::MaybeUninit::::uninit(); + check_cuvs(ffi::cuvsScalarQuantizerCreate(quantizer.as_mut_ptr()))?; + Ok(Quantizer(quantizer.assume_init())) + } + } + + /// Trains a scalar quantizer on `dataset` for later use in quantizing data. + /// + /// # Arguments + /// + /// * `res` - Resources to use + /// * `params` - Parameters controlling the quantization (e.g. quantile) + /// * `dataset` - A row-major `f32`, `f16`, or `f64` matrix on either the host or device + pub fn train( + res: &Resources, + params: &ScalarQuantizerParams, + dataset: &ManagedTensor, + ) -> Result { + let quantizer = Quantizer::new()?; + unsafe { + check_cuvs(ffi::cuvsScalarQuantizerTrain( + res.0, + params.0, + dataset.as_ptr(), + quantizer.0, + ))?; + } + Ok(quantizer) + } + + /// Quantizes `dataset` into `out`. + /// + /// # Arguments + /// + /// * `res` - Resources to use + /// * `dataset` - A row-major `f32`, `f16`, or `f64` matrix to quantize, shape `(m, n)` + /// * `out` - A row-major `i8` matrix that receives the quantized data, shape `(m, n)` + /// — the output dtype must be `i8`: the C API does not validate it and will + /// reinterpret the buffer otherwise (unlike `inverse_transform`, whose output + /// dtype is validated) + pub fn transform( + &self, + res: &Resources, + dataset: &ManagedTensor, + out: &ManagedTensor, + ) -> Result<()> { + unsafe { + check_cuvs(ffi::cuvsScalarQuantizerTransform( + res.0, + self.0, + dataset.as_ptr(), + out.as_ptr(), + )) + } + } + + /// Reconstructs an approximation of the original float dataset from + /// previously quantized data. + /// + /// Note that scalar quantization is lossy, so the reconstructed values only + /// approximate the originals. + /// + /// # Arguments + /// + /// * `res` - Resources to use + /// * `dataset` - A row-major `i8` matrix of quantized data, shape `(m, n)` + /// * `out` - A row-major `f32` matrix that receives the reconstructed data, shape `(m, n)` + pub fn inverse_transform( + &self, + res: &Resources, + dataset: &ManagedTensor, + out: &ManagedTensor, + ) -> Result<()> { + unsafe { + check_cuvs(ffi::cuvsScalarQuantizerInverseTransform( + res.0, + self.0, + dataset.as_ptr(), + out.as_ptr(), + )) + } + } +} + +impl Drop for Quantizer { + fn drop(&mut self) { + if let Err(e) = check_cuvs(unsafe { ffi::cuvsScalarQuantizerDestroy(self.0) }) { + write!(stderr(), "failed to call cuvsScalarQuantizerDestroy {:?}", e) + .expect("failed to write to stderr"); + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + use ndarray_rand::RandomExt; + use ndarray_rand::rand_distr::Uniform; + + #[test] + fn test_scalar_quantizer_params() { + let params = ScalarQuantizerParams::new().unwrap().set_quantile(0.95); + + // make sure the setter actually updated the internal c-struct + unsafe { + assert_eq!((*params.0).quantile, 0.95); + } + } + + #[test] + fn test_scalar_quantizer_roundtrip() { + let res = Resources::new().unwrap(); + + // Create a random dataset to quantize. The data range is [0, 10), so + // the int8 quantization step is roughly 10 / 256 ~= 0.04. + let n_rows = 1024; + let n_cols = 16; + let data_lo = 0.0f32; + let data_hi = 10.0f32; + let dataset = + ndarray::Array::::random((n_rows, n_cols), Uniform::new(data_lo, data_hi)); + let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap(); + + // Train the quantizer (use the full range so we don't clip outliers). + let params = ScalarQuantizerParams::new().unwrap().set_quantile(1.0); + let quantizer = Quantizer::train(&res, ¶ms, &dataset_device).unwrap(); + + // Quantize the dataset into int8. + let mut quantized_host = ndarray::Array::::zeros((n_rows, n_cols)); + let quantized = ManagedTensor::from(&quantized_host).to_device(&res).unwrap(); + quantizer.transform(&res, &dataset_device, &quantized).unwrap(); + quantized.to_host(&res, &mut quantized_host).unwrap(); + + // The quantized values should span a good chunk of the int8 range, + // confirming the transform actually did something. + let q_min = *quantized_host.iter().min().unwrap(); + let q_max = *quantized_host.iter().max().unwrap(); + assert!( + q_max as i32 - q_min as i32 > 200, + "quantized values should span most of the int8 range, got [{q_min}, {q_max}]" + ); + + // Reconstruct an approximation of the original f32 values. + let mut reconstructed_host = ndarray::Array::::zeros((n_rows, n_cols)); + let reconstructed = ManagedTensor::from(&reconstructed_host).to_device(&res).unwrap(); + quantizer.inverse_transform(&res, &quantized, &reconstructed).unwrap(); + reconstructed.to_host(&res, &mut reconstructed_host).unwrap(); + + // Compute the max absolute reconstruction error. It should be bounded + // by a few quantization steps and far below the data range. + let mut max_abs_err = 0.0f32; + for (orig, recon) in dataset.iter().zip(reconstructed_host.iter()) { + let err = (orig - recon).abs(); + if err > max_abs_err { + max_abs_err = err; + } + } + + let data_range = data_hi - data_lo; + // A loose epsilon: a handful of quantization steps. One step is + // data_range / 256 ~= 0.04; allow up to ~5 steps of slack. + let epsilon = data_range / 50.0; + assert!( + max_abs_err < epsilon, + "max abs reconstruction error {max_abs_err} should be below {epsilon}" + ); + assert!( + max_abs_err < data_range * 0.05, + "max abs reconstruction error {max_abs_err} should be far below data range {data_range}" + ); + } + + #[test] + fn test_train_unsupported_dtype_errors() { + let res = Resources::new().unwrap(); + + // The C API only supports float (16/32/64-bit) training datasets, and + // surfaces an integer dataset as an error rather than silently + // succeeding. (Note: a freshly created, untrained quantizer has + // min_ == max_ == 0, which produces degenerate output but is *not* + // reported as an error by the C API, so we exercise the dtype guard + // instead to cover the error path.) + let n_rows = 8; + let n_cols = 4; + let dataset = ndarray::Array::::zeros((n_rows, n_cols)); + let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap(); + + let params = ScalarQuantizerParams::new().unwrap(); + let result = Quantizer::train(&res, ¶ms, &dataset_device); + assert!( + result.is_err(), + "training on an unsupported (integer) dtype should return an error" + ); + } +} From 32a0d3eaf4e66f06f4da6c11fb3529d8f6678ae4 Mon Sep 17 00:00:00 2001 From: jamie8johnson Date: Tue, 9 Jun 2026 19:28:13 -0500 Subject: [PATCH 2/3] Address review feedback: i8 dtype guards on transform/inverse_transform, non-panicking Drop logging The C API reinterprets i8 buffers without dtype validation; guard both the transform output and the inverse_transform input Rust-side so a wrong-dtype tensor surfaces as InvalidArgument instead of memory corruption. Drop logging switched from .expect to best-effort write to avoid a double-panic during unwinding (sibling modules share the old pattern; happy to sweep them in a follow-up). --- .../cuvs/src/preprocessing/quantize/scalar.rs | 47 +++++++++++++++++-- 1 file changed, 42 insertions(+), 5 deletions(-) diff --git a/rust/cuvs/src/preprocessing/quantize/scalar.rs b/rust/cuvs/src/preprocessing/quantize/scalar.rs index 249489244e..c82c45d5a1 100644 --- a/rust/cuvs/src/preprocessing/quantize/scalar.rs +++ b/rust/cuvs/src/preprocessing/quantize/scalar.rs @@ -15,9 +15,23 @@ use std::fmt; use std::io::{Write, stderr}; use crate::dlpack::ManagedTensor; -use crate::error::{Result, check_cuvs}; +use crate::error::{Error, Result, check_cuvs}; use crate::resources::Resources; +/// The C API reinterprets `i8` buffers without validating dtype; guard +/// Rust-side so a wrong-dtype tensor surfaces as `InvalidArgument` instead +/// of memory corruption. +fn expect_i8_tensor(tensor: &ManagedTensor, arg: &str) -> Result<()> { + let dtype = unsafe { (*tensor.as_ptr()).dl_tensor.dtype }; + if dtype.code != ffi::DLDataTypeCode::kDLInt as u8 || dtype.bits != 8 || dtype.lanes != 1 { + return Err(Error::InvalidArgument(format!( + "{arg} must be an i8 tensor (got code={}, bits={}, lanes={})", + dtype.code, dtype.bits, dtype.lanes + ))); + } + Ok(()) +} + /// Parameters controlling how a [`Quantizer`] is trained. pub struct ScalarQuantizerParams(pub ffi::cuvsScalarQuantizerParams_t); @@ -53,8 +67,7 @@ impl fmt::Debug for ScalarQuantizerParams { impl Drop for ScalarQuantizerParams { fn drop(&mut self) { if let Err(e) = check_cuvs(unsafe { ffi::cuvsScalarQuantizerParamsDestroy(self.0) }) { - write!(stderr(), "failed to call cuvsScalarQuantizerParamsDestroy {:?}", e) - .expect("failed to write to stderr"); + let _ = write!(stderr(), "failed to call cuvsScalarQuantizerParamsDestroy {:?}", e); } } } @@ -117,6 +130,7 @@ impl Quantizer { dataset: &ManagedTensor, out: &ManagedTensor, ) -> Result<()> { + expect_i8_tensor(out, "transform output")?; unsafe { check_cuvs(ffi::cuvsScalarQuantizerTransform( res.0, @@ -144,6 +158,7 @@ impl Quantizer { dataset: &ManagedTensor, out: &ManagedTensor, ) -> Result<()> { + expect_i8_tensor(dataset, "inverse_transform input")?; unsafe { check_cuvs(ffi::cuvsScalarQuantizerInverseTransform( res.0, @@ -158,8 +173,7 @@ impl Quantizer { impl Drop for Quantizer { fn drop(&mut self) { if let Err(e) = check_cuvs(unsafe { ffi::cuvsScalarQuantizerDestroy(self.0) }) { - write!(stderr(), "failed to call cuvsScalarQuantizerDestroy {:?}", e) - .expect("failed to write to stderr"); + let _ = write!(stderr(), "failed to call cuvsScalarQuantizerDestroy {:?}", e); } } } @@ -265,4 +279,27 @@ mod tests { "training on an unsupported (integer) dtype should return an error" ); } + + #[test] + fn test_transform_rejects_non_i8_output() { + let res = Resources::new().unwrap(); + let n_rows = 8; + let n_cols = 4; + + let dataset = ndarray::Array::::zeros((n_rows, n_cols)); + let dataset_device = ManagedTensor::from(&dataset).to_device(&res).unwrap(); + let params = ScalarQuantizerParams::new().unwrap(); + let quantizer = Quantizer::train(&res, ¶ms, &dataset_device).unwrap(); + + // The C API would silently reinterpret a non-i8 output buffer; + // the wrapper must reject it before any FFI happens. + let bad_out = ndarray::Array::::zeros((n_rows, n_cols)); + let bad_out_device = ManagedTensor::from(&bad_out).to_device(&res).unwrap(); + let result = quantizer.transform(&res, &dataset_device, &bad_out_device); + assert!(result.is_err(), "transform must reject a non-i8 output tensor"); + + // Same guard on the inverse path's input. + let result = quantizer.inverse_transform(&res, &bad_out_device, &dataset_device); + assert!(result.is_err(), "inverse_transform must reject a non-i8 input tensor"); + } } From 5ffe91746964dcdea3b23f2bdbd3aa2bf5fdc31e Mon Sep 17 00:00:00 2001 From: jamie8johnson Date: Tue, 9 Jun 2026 19:35:26 -0500 Subject: [PATCH 3/3] Address review feedback: assert the dtype guard's InvalidArgument specifically --- rust/cuvs/src/preprocessing/quantize/scalar.rs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/rust/cuvs/src/preprocessing/quantize/scalar.rs b/rust/cuvs/src/preprocessing/quantize/scalar.rs index c82c45d5a1..0037f94564 100644 --- a/rust/cuvs/src/preprocessing/quantize/scalar.rs +++ b/rust/cuvs/src/preprocessing/quantize/scalar.rs @@ -296,10 +296,24 @@ mod tests { let bad_out = ndarray::Array::::zeros((n_rows, n_cols)); let bad_out_device = ManagedTensor::from(&bad_out).to_device(&res).unwrap(); let result = quantizer.transform(&res, &dataset_device, &bad_out_device); - assert!(result.is_err(), "transform must reject a non-i8 output tensor"); + assert!( + matches!( + &result, + Err(Error::InvalidArgument(msg)) + if msg.contains("transform output") && msg.contains("i8 tensor") + ), + "transform must reject a non-i8 output tensor via the dtype guard, got {result:?}" + ); // Same guard on the inverse path's input. let result = quantizer.inverse_transform(&res, &bad_out_device, &dataset_device); - assert!(result.is_err(), "inverse_transform must reject a non-i8 input tensor"); + assert!( + matches!( + &result, + Err(Error::InvalidArgument(msg)) + if msg.contains("inverse_transform input") && msg.contains("i8 tensor") + ), + "inverse_transform must reject a non-i8 input tensor via the dtype guard, got {result:?}" + ); } }