From 60b19a0f62ba0bd716de349382828e43be6e2daa Mon Sep 17 00:00:00 2001 From: Jake Awe Date: Wed, 8 Apr 2026 12:23:37 -0700 Subject: [PATCH 1/2] REL v26.04.00 release --- .pre-commit-config.yaml | 362 +++++++++--------- .../all_cuda-129_arch-aarch64.yaml | 4 +- .../all_cuda-129_arch-x86_64.yaml | 4 +- .../all_cuda-131_arch-aarch64.yaml | 4 +- .../all_cuda-131_arch-x86_64.yaml | 4 +- .../bench_ann_cuda-129_arch-aarch64.yaml | 8 +- .../bench_ann_cuda-129_arch-x86_64.yaml | 8 +- .../bench_ann_cuda-131_arch-aarch64.yaml | 8 +- .../bench_ann_cuda-131_arch-x86_64.yaml | 8 +- .../go_cuda-129_arch-aarch64.yaml | 4 +- .../environments/go_cuda-129_arch-x86_64.yaml | 4 +- .../go_cuda-131_arch-aarch64.yaml | 4 +- .../environments/go_cuda-131_arch-x86_64.yaml | 4 +- .../rust_cuda-129_arch-aarch64.yaml | 4 +- .../rust_cuda-129_arch-x86_64.yaml | 4 +- .../rust_cuda-131_arch-aarch64.yaml | 4 +- .../rust_cuda-131_arch-x86_64.yaml | 4 +- dependencies.yaml | 30 +- python/cuvs/pyproject.toml | 10 +- python/cuvs_bench/pyproject.toml | 2 +- python/libcuvs/pyproject.toml | 8 +- 21 files changed, 246 insertions(+), 246 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 28546f8332..f8ce48e1c0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,185 +2,185 @@ # SPDX-License-Identifier: Apache-2.0 repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v5.0.0 - hooks: - - id: check-json - - id: trailing-whitespace - - id: end-of-file-fixer - - id: check-symlinks - - repo: https://github.com/PyCQA/isort - rev: 5.12.0 - hooks: - - id: isort - # Use the config file specific to each subproject so that each - # project can specify its own first/third-party packages. - args: ["--config-root=python/", "--resolve-all-configs"] - files: python/.* - types: [cython] - - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.14.3 - hooks: - - id: ruff-check - args: [--fix] - - id: ruff-format - - repo: https://github.com/pre-commit/mirrors-mypy - rev: 'v1.19.1' - hooks: - - id: mypy - additional_dependencies: [types-cachetools] - args: ["--config-file=pyproject.toml", - "python/cuvs/cuvs", - "cpp/cmake/modules", - "cpp/tests/python"] - pass_filenames: false - - repo: https://github.com/PyCQA/pydocstyle - rev: 6.1.1 - hooks: - - id: pydocstyle - # https://github.com/PyCQA/pydocstyle/issues/603 - additional_dependencies: [toml] - args: ["--config=pyproject.toml"] - - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v20.1.4 - hooks: - - id: clang-format - types_or: [c, c++, cuda] - args: ["-fallback-style=none", "-style=file", "-i"] - - repo: local - hooks: - - id: no-deprecationwarning - name: no-deprecationwarning - description: 'Enforce that DeprecationWarning is not introduced (use FutureWarning instead)' - entry: '(category=|\s)DeprecationWarning[,)]' - language: pygrep - types_or: [python, cython] - - id: cmake-format - name: cmake-format - entry: ./cpp/scripts/run-cmake-format.sh cmake-format - language: python - types: [cmake] - exclude: .*/thirdparty/.*|.*FindAVX.cmake.* - # Note that pre-commit autoupdate does not update the versions - # of dependencies, so we'll have to update this manually. - additional_dependencies: - - cmakelang==0.6.13 - verbose: true - require_serial: true - - id: cmake-lint - name: cmake-lint - entry: ./cpp/scripts/run-cmake-format.sh cmake-lint - language: python - types: [cmake] - # Note that pre-commit autoupdate does not update the versions - # of dependencies, so we'll have to update this manually. - additional_dependencies: - - cmakelang==0.6.13 - verbose: true - require_serial: true - exclude: .*/thirdparty/.* - - id: include-check - name: include-check - entry: python ./cpp/scripts/include_checker.py cpp/bench cpp/include cpp/tests - pass_filenames: false - language: python - additional_dependencies: [gitpython] - - id: cargo-fmt - name: cargo-fmt - entry: cargo fmt --manifest-path rust/Cargo.toml --all - pass_filenames: false - files: rust/.* - language: rust - - repo: https://github.com/codespell-project/codespell - rev: v2.2.2 - hooks: - - id: codespell - additional_dependencies: [tomli] - args: ["--toml", "pyproject.toml"] - exclude: | - (?x) - ^CHANGELOG[.]md$| - ^cpp/cmake/patches/cutlass/build-export[.]patch$ - - repo: https://github.com/rapidsai/pre-commit-hooks - rev: v1.3.3 - hooks: - - id: verify-copyright - name: verify-copyright-cuvs - args: [--fix, --spdx] - files: | - (?x) - [.](cmake|c|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx|rs|java)$| - CMakeLists[.]txt$| - CMakeLists_standalone[.]txt$| - meta[.]yaml$| - pyproject[.]toml$| - ^python/cuvs_bench/cuvs_bench/split_groundtruth/split_groundtruth[.]pl$| - Dockerfile$| - pom[.]xml$| - ^java/cuvs-java/src/assembly/native-with-deps[.]xml$| - ^java/docker-build/build-in-docker$| - ^java/docker-build/run-in-docker$| - ^[.]flake8$| - recipe[.]yaml$| - ^[.]pre-commit-config[.]yaml$ - exclude: | - (?x)^( - docs/source/sphinxext/github_link[.]py$| - cpp/cmake/modules/FindAVX[.]cmake$| - cpp/src/neighbors/detail/faiss_distance_utils[.]h$| - cpp/src/distance/detail/fused_distance_nn/gemm[.]h$| - cpp/src/distance/detail/fused_distance_nn/epilogue[.]cuh$| - cpp/src/distance/detail/fused_distance_nn/predicated_tile_iterator_normvec_smem[.]h$| - cpp/src/distance/detail/fused_distance_nn/persistent_gemm[.]h$| - cpp/src/distance/detail/fused_distance_nn/epilogue_elementwise[.]cuh$| - cpp/src/distance/detail/fused_distance_nn/predicated_tile_iterator_reduced_vec[.]h$| - cpp/src/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast[.]h$ - ) - - id: verify-copyright - name: verify-copyright-scikit-learn - args: [--fix, --spdx, "--spdx-license-identifier=Apache-2.0 AND BSD-3-Clause"] - files: | - (?x)^( - docs/source/sphinxext/github_link[.]py$ - ) - - id: verify-copyright - name: verify-copyright-cutlass - args: [--fix, --spdx, "--spdx-license-identifier=Apache-2.0 AND BSD-3-Clause"] - files: | - (?x)^( - cpp/src/distance/detail/fused_distance_nn/gemm[.]h$| - cpp/src/distance/detail/fused_distance_nn/epilogue[.]cuh$| - cpp/src/distance/detail/fused_distance_nn/predicated_tile_iterator_normvec_smem[.]h$| - cpp/src/distance/detail/fused_distance_nn/persistent_gemm[.]h$| - cpp/src/distance/detail/fused_distance_nn/epilogue_elementwise[.]cuh$| - cpp/src/distance/detail/fused_distance_nn/predicated_tile_iterator_reduced_vec[.]h$| - cpp/src/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast[.]h$ - ) - - id: verify-copyright - name: verify-copyright-faiss - args: [--fix, --spdx, "--spdx-license-identifier=Apache-2.0 AND MIT"] - files: | - (?x)^( - cpp/src/neighbors/detail/faiss_distance_utils[.]h$ - ) - - id: verify-alpha-spec - - id: verify-codeowners - args: [--fix, --project-prefix=cuvs] - - id: verify-pyproject-license - # ignore the top-level pyproject.toml, which doesn't - # have or need a [project] table - exclude: | - (?x) - ^pyproject[.]toml$ - - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.20.0 - hooks: - - id: rapids-dependency-file-generator - args: ["--clean", "--warn-all", "--strict"] - - repo: https://github.com/shellcheck-py/shellcheck-py - rev: v0.10.0.1 - hooks: - - id: shellcheck - + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-json + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-symlinks + - repo: https://github.com/PyCQA/isort + rev: 5.12.0 + hooks: + - id: isort + # Use the config file specific to each subproject so that each + # project can specify its own first/third-party packages. + args: ["--config-root=python/", "--resolve-all-configs"] + files: python/.* + types: [cython] + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.14.3 + hooks: + - id: ruff-check + args: [--fix] + - id: ruff-format + - repo: https://github.com/pre-commit/mirrors-mypy + rev: 'v1.19.1' + hooks: + - id: mypy + additional_dependencies: [types-cachetools] + args: ["--config-file=pyproject.toml", "python/cuvs/cuvs", "cpp/cmake/modules", "cpp/tests/python"] + pass_filenames: false + - repo: https://github.com/PyCQA/pydocstyle + rev: 6.1.1 + hooks: + - id: pydocstyle + # https://github.com/PyCQA/pydocstyle/issues/603 + additional_dependencies: [toml] + args: ["--config=pyproject.toml"] + - repo: https://github.com/pre-commit/mirrors-clang-format + rev: v20.1.4 + hooks: + - id: clang-format + types_or: [c, c++, cuda] + args: ["-fallback-style=none", "-style=file", "-i"] + - repo: local + hooks: + - id: no-deprecationwarning + name: no-deprecationwarning + description: 'Enforce that DeprecationWarning is not introduced (use FutureWarning instead)' + entry: '(category=|\s)DeprecationWarning[,)]' + language: pygrep + types_or: [python, cython] + - id: cmake-format + name: cmake-format + entry: ./cpp/scripts/run-cmake-format.sh cmake-format + language: python + types: [cmake] + exclude: .*/thirdparty/.*|.*FindAVX.cmake.* + # Note that pre-commit autoupdate does not update the versions + # of dependencies, so we'll have to update this manually. + additional_dependencies: + - cmakelang==0.6.13 + verbose: true + require_serial: true + - id: cmake-lint + name: cmake-lint + entry: ./cpp/scripts/run-cmake-format.sh cmake-lint + language: python + types: [cmake] + # Note that pre-commit autoupdate does not update the versions + # of dependencies, so we'll have to update this manually. + additional_dependencies: + - cmakelang==0.6.13 + verbose: true + require_serial: true + exclude: .*/thirdparty/.* + - id: include-check + name: include-check + entry: python ./cpp/scripts/include_checker.py cpp/bench cpp/include cpp/tests + pass_filenames: false + language: python + additional_dependencies: [gitpython] + - id: cargo-fmt + name: cargo-fmt + entry: cargo fmt --manifest-path rust/Cargo.toml --all + pass_filenames: false + files: rust/.* + language: rust + - repo: https://github.com/codespell-project/codespell + rev: v2.2.2 + hooks: + - id: codespell + additional_dependencies: [tomli] + args: ["--toml", "pyproject.toml"] + exclude: | + (?x) + ^CHANGELOG[.]md$| + ^cpp/cmake/patches/cutlass/build-export[.]patch$ + - repo: https://github.com/rapidsai/pre-commit-hooks + rev: v1.3.3 + hooks: + - id: verify-copyright + name: verify-copyright-cuvs + args: [--fix, --spdx] + files: | + (?x) + [.](cmake|c|cpp|cu|cuh|h|hpp|sh|pxd|py|pyx|rs|java)$| + CMakeLists[.]txt$| + CMakeLists_standalone[.]txt$| + meta[.]yaml$| + pyproject[.]toml$| + ^python/cuvs_bench/cuvs_bench/split_groundtruth/split_groundtruth[.]pl$| + Dockerfile$| + pom[.]xml$| + ^java/cuvs-java/src/assembly/native-with-deps[.]xml$| + ^java/docker-build/build-in-docker$| + ^java/docker-build/run-in-docker$| + ^[.]flake8$| + recipe[.]yaml$| + ^[.]pre-commit-config[.]yaml$ + exclude: | + (?x)^( + docs/source/sphinxext/github_link[.]py$| + cpp/cmake/modules/FindAVX[.]cmake$| + cpp/src/neighbors/detail/faiss_distance_utils[.]h$| + cpp/src/distance/detail/fused_distance_nn/gemm[.]h$| + cpp/src/distance/detail/fused_distance_nn/epilogue[.]cuh$| + cpp/src/distance/detail/fused_distance_nn/predicated_tile_iterator_normvec_smem[.]h$| + cpp/src/distance/detail/fused_distance_nn/persistent_gemm[.]h$| + cpp/src/distance/detail/fused_distance_nn/epilogue_elementwise[.]cuh$| + cpp/src/distance/detail/fused_distance_nn/predicated_tile_iterator_reduced_vec[.]h$| + cpp/src/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast[.]h$ + ) + - id: verify-copyright + name: verify-copyright-scikit-learn + args: [--fix, --spdx, "--spdx-license-identifier=Apache-2.0 AND BSD-3-Clause"] + files: | + (?x)^( + docs/source/sphinxext/github_link[.]py$ + ) + - id: verify-copyright + name: verify-copyright-cutlass + args: [--fix, --spdx, "--spdx-license-identifier=Apache-2.0 AND BSD-3-Clause"] + files: | + (?x)^( + cpp/src/distance/detail/fused_distance_nn/gemm[.]h$| + cpp/src/distance/detail/fused_distance_nn/epilogue[.]cuh$| + cpp/src/distance/detail/fused_distance_nn/predicated_tile_iterator_normvec_smem[.]h$| + cpp/src/distance/detail/fused_distance_nn/persistent_gemm[.]h$| + cpp/src/distance/detail/fused_distance_nn/epilogue_elementwise[.]cuh$| + cpp/src/distance/detail/fused_distance_nn/predicated_tile_iterator_reduced_vec[.]h$| + cpp/src/distance/detail/fused_distance_nn/custom_epilogue_with_broadcast[.]h$ + ) + - id: verify-copyright + name: verify-copyright-faiss + args: [--fix, --spdx, "--spdx-license-identifier=Apache-2.0 AND MIT"] + files: | + (?x)^( + cpp/src/neighbors/detail/faiss_distance_utils[.]h$ + ) + - id: verify-alpha-spec + args: + - --fix + - --mode + - release + - id: verify-codeowners + args: [--fix, --project-prefix=cuvs] + - id: verify-pyproject-license + # ignore the top-level pyproject.toml, which doesn't + # have or need a [project] table + exclude: | + (?x) + ^pyproject[.]toml$ + - repo: https://github.com/rapidsai/dependency-file-generator + rev: v1.20.0 + hooks: + - id: rapids-dependency-file-generator + args: ["--clean", "--warn-all", "--strict"] + - repo: https://github.com/shellcheck-py/shellcheck-py + rev: v0.10.0.1 + hooks: + - id: shellcheck default_language_version: - python: python3 + python: python3 diff --git a/conda/environments/all_cuda-129_arch-aarch64.yaml b/conda/environments/all_cuda-129_arch-aarch64.yaml index d9280859c8..6d63df8738 100644 --- a/conda/environments/all_cuda-129_arch-aarch64.yaml +++ b/conda/environments/all_cuda-129_arch-aarch64.yaml @@ -32,7 +32,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libopenblas<=0.3.30 -- librmm==26.4.*,>=0.0.0a0 +- librmm==26.4.* - make - nccl>=2.19 - ninja @@ -40,7 +40,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pylibraft==26.4.*,>=0.0.0a0 +- pylibraft==26.4.* - pytest - pytest-cov - rapids-build-backend>=0.4.0,<0.5.0 diff --git a/conda/environments/all_cuda-129_arch-x86_64.yaml b/conda/environments/all_cuda-129_arch-x86_64.yaml index 52bcdfa730..e8f23532df 100644 --- a/conda/environments/all_cuda-129_arch-x86_64.yaml +++ b/conda/environments/all_cuda-129_arch-x86_64.yaml @@ -31,7 +31,7 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- librmm==26.4.*,>=0.0.0a0 +- librmm==26.4.* - make - nccl>=2.19 - ninja @@ -39,7 +39,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pylibraft==26.4.*,>=0.0.0a0 +- pylibraft==26.4.* - pytest - pytest-cov - rapids-build-backend>=0.4.0,<0.5.0 diff --git a/conda/environments/all_cuda-131_arch-aarch64.yaml b/conda/environments/all_cuda-131_arch-aarch64.yaml index 7f00057821..51856ba867 100644 --- a/conda/environments/all_cuda-131_arch-aarch64.yaml +++ b/conda/environments/all_cuda-131_arch-aarch64.yaml @@ -33,7 +33,7 @@ dependencies: - libcusparse-dev - libnvjitlink-dev - libopenblas<=0.3.30 -- librmm==26.4.*,>=0.0.0a0 +- librmm==26.4.* - make - nccl>=2.19 - ninja @@ -41,7 +41,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pylibraft==26.4.*,>=0.0.0a0 +- pylibraft==26.4.* - pytest - pytest-cov - rapids-build-backend>=0.4.0,<0.5.0 diff --git a/conda/environments/all_cuda-131_arch-x86_64.yaml b/conda/environments/all_cuda-131_arch-x86_64.yaml index 5020088a7c..f0153c31e4 100644 --- a/conda/environments/all_cuda-131_arch-x86_64.yaml +++ b/conda/environments/all_cuda-131_arch-x86_64.yaml @@ -32,7 +32,7 @@ dependencies: - libcusolver-dev - libcusparse-dev - libnvjitlink-dev -- librmm==26.4.*,>=0.0.0a0 +- librmm==26.4.* - make - nccl>=2.19 - ninja @@ -40,7 +40,7 @@ dependencies: - numpydoc - openblas - pre-commit -- pylibraft==26.4.*,>=0.0.0a0 +- pylibraft==26.4.* - pytest - pytest-cov - rapids-build-backend>=0.4.0,<0.5.0 diff --git a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml index f3cb4c38af..dc6145d682 100644 --- a/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-aarch64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-python>=12.9.2,<13.0 - cuda-version=12.9 - cupy>=13.6.0 -- cuvs==26.4.*,>=0.0.0a0 +- cuvs==26.4.* - cxx-compiler - cython>=3.2.2 - dlpack>=0.8,<1.0 @@ -29,15 +29,15 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==26.4.*,>=0.0.0a0 -- librmm==26.4.*,>=0.0.0a0 +- libcuvs==26.4.* +- librmm==26.4.* - matplotlib-base>=3.9 - nccl>=2.19 - ninja - nlohmann_json>=3.12.0 - openblas - pandas -- pylibraft==26.4.*,>=0.0.0a0 +- pylibraft==26.4.* - pyyaml - rapids-build-backend>=0.4.0,<0.5.0 - requests diff --git a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml index c44e2e58fb..967337b056 100644 --- a/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-129_arch-x86_64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-python>=12.9.2,<13.0 - cuda-version=12.9 - cupy>=13.6.0 -- cuvs==26.4.*,>=0.0.0a0 +- cuvs==26.4.* - cxx-compiler - cython>=3.2.2 - dlpack>=0.8,<1.0 @@ -31,8 +31,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==26.4.*,>=0.0.0a0 -- librmm==26.4.*,>=0.0.0a0 +- libcuvs==26.4.* +- librmm==26.4.* - matplotlib-base>=3.9 - mkl-devel=2023 - nccl>=2.19 @@ -40,7 +40,7 @@ dependencies: - nlohmann_json>=3.12.0 - openblas - pandas -- pylibraft==26.4.*,>=0.0.0a0 +- pylibraft==26.4.* - pyyaml - rapids-build-backend>=0.4.0,<0.5.0 - requests diff --git a/conda/environments/bench_ann_cuda-131_arch-aarch64.yaml b/conda/environments/bench_ann_cuda-131_arch-aarch64.yaml index 2a7f1cd9ea..432ad91d86 100644 --- a/conda/environments/bench_ann_cuda-131_arch-aarch64.yaml +++ b/conda/environments/bench_ann_cuda-131_arch-aarch64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-python>=13.0.1,<14.0 - cuda-version=13.1 - cupy>=13.6.0 -- cuvs==26.4.*,>=0.0.0a0 +- cuvs==26.4.* - cxx-compiler - cython>=3.2.2 - dlpack>=0.8,<1.0 @@ -29,16 +29,16 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==26.4.*,>=0.0.0a0 +- libcuvs==26.4.* - libnvjitlink-dev -- librmm==26.4.*,>=0.0.0a0 +- librmm==26.4.* - matplotlib-base>=3.9 - nccl>=2.19 - ninja - nlohmann_json>=3.12.0 - openblas - pandas -- pylibraft==26.4.*,>=0.0.0a0 +- pylibraft==26.4.* - pyyaml - rapids-build-backend>=0.4.0,<0.5.0 - requests diff --git a/conda/environments/bench_ann_cuda-131_arch-x86_64.yaml b/conda/environments/bench_ann_cuda-131_arch-x86_64.yaml index 07fb692de1..9bfd0030b2 100644 --- a/conda/environments/bench_ann_cuda-131_arch-x86_64.yaml +++ b/conda/environments/bench_ann_cuda-131_arch-x86_64.yaml @@ -17,7 +17,7 @@ dependencies: - cuda-python>=13.0.1,<14.0 - cuda-version=13.1 - cupy>=13.6.0 -- cuvs==26.4.*,>=0.0.0a0 +- cuvs==26.4.* - cxx-compiler - cython>=3.2.2 - dlpack>=0.8,<1.0 @@ -31,9 +31,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==26.4.*,>=0.0.0a0 +- libcuvs==26.4.* - libnvjitlink-dev -- librmm==26.4.*,>=0.0.0a0 +- librmm==26.4.* - matplotlib-base>=3.9 - mkl-devel=2023 - nccl>=2.19 @@ -41,7 +41,7 @@ dependencies: - nlohmann_json>=3.12.0 - openblas - pandas -- pylibraft==26.4.*,>=0.0.0a0 +- pylibraft==26.4.* - pyyaml - rapids-build-backend>=0.4.0,<0.5.0 - requests diff --git a/conda/environments/go_cuda-129_arch-aarch64.yaml b/conda/environments/go_cuda-129_arch-aarch64.yaml index 55842e86b8..60a2e4fac1 100644 --- a/conda/environments/go_cuda-129_arch-aarch64.yaml +++ b/conda/environments/go_cuda-129_arch-aarch64.yaml @@ -24,8 +24,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==26.4.*,>=0.0.0a0 -- libraft==26.4.*,>=0.0.0a0 +- libcuvs==26.4.* +- libraft==26.4.* - nccl>=2.19 - ninja - sysroot_linux-aarch64==2.28 diff --git a/conda/environments/go_cuda-129_arch-x86_64.yaml b/conda/environments/go_cuda-129_arch-x86_64.yaml index 2854de33b7..949135c5ed 100644 --- a/conda/environments/go_cuda-129_arch-x86_64.yaml +++ b/conda/environments/go_cuda-129_arch-x86_64.yaml @@ -24,8 +24,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==26.4.*,>=0.0.0a0 -- libraft==26.4.*,>=0.0.0a0 +- libcuvs==26.4.* +- libraft==26.4.* - nccl>=2.19 - ninja - sysroot_linux-64==2.28 diff --git a/conda/environments/go_cuda-131_arch-aarch64.yaml b/conda/environments/go_cuda-131_arch-aarch64.yaml index 135f6a88cc..08cbce208c 100644 --- a/conda/environments/go_cuda-131_arch-aarch64.yaml +++ b/conda/environments/go_cuda-131_arch-aarch64.yaml @@ -24,9 +24,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==26.4.*,>=0.0.0a0 +- libcuvs==26.4.* - libnvjitlink-dev -- libraft==26.4.*,>=0.0.0a0 +- libraft==26.4.* - nccl>=2.19 - ninja - sysroot_linux-aarch64==2.28 diff --git a/conda/environments/go_cuda-131_arch-x86_64.yaml b/conda/environments/go_cuda-131_arch-x86_64.yaml index df6a779331..79fe707c21 100644 --- a/conda/environments/go_cuda-131_arch-x86_64.yaml +++ b/conda/environments/go_cuda-131_arch-x86_64.yaml @@ -24,9 +24,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==26.4.*,>=0.0.0a0 +- libcuvs==26.4.* - libnvjitlink-dev -- libraft==26.4.*,>=0.0.0a0 +- libraft==26.4.* - nccl>=2.19 - ninja - sysroot_linux-64==2.28 diff --git a/conda/environments/rust_cuda-129_arch-aarch64.yaml b/conda/environments/rust_cuda-129_arch-aarch64.yaml index 0aa5a7ea6f..2340d75dcb 100644 --- a/conda/environments/rust_cuda-129_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-129_arch-aarch64.yaml @@ -21,8 +21,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==26.4.*,>=0.0.0a0 -- libraft==26.4.*,>=0.0.0a0 +- libcuvs==26.4.* +- libraft==26.4.* - make - nccl>=2.19 - ninja diff --git a/conda/environments/rust_cuda-129_arch-x86_64.yaml b/conda/environments/rust_cuda-129_arch-x86_64.yaml index b9dabfafa7..724a137b25 100644 --- a/conda/environments/rust_cuda-129_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-129_arch-x86_64.yaml @@ -21,8 +21,8 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==26.4.*,>=0.0.0a0 -- libraft==26.4.*,>=0.0.0a0 +- libcuvs==26.4.* +- libraft==26.4.* - make - nccl>=2.19 - ninja diff --git a/conda/environments/rust_cuda-131_arch-aarch64.yaml b/conda/environments/rust_cuda-131_arch-aarch64.yaml index 062cbc8ea0..e2c7bd1654 100644 --- a/conda/environments/rust_cuda-131_arch-aarch64.yaml +++ b/conda/environments/rust_cuda-131_arch-aarch64.yaml @@ -21,9 +21,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==26.4.*,>=0.0.0a0 +- libcuvs==26.4.* - libnvjitlink-dev -- libraft==26.4.*,>=0.0.0a0 +- libraft==26.4.* - make - nccl>=2.19 - ninja diff --git a/conda/environments/rust_cuda-131_arch-x86_64.yaml b/conda/environments/rust_cuda-131_arch-x86_64.yaml index 2b96d4a64e..c7c410d890 100644 --- a/conda/environments/rust_cuda-131_arch-x86_64.yaml +++ b/conda/environments/rust_cuda-131_arch-x86_64.yaml @@ -21,9 +21,9 @@ dependencies: - libcurand-dev - libcusolver-dev - libcusparse-dev -- libcuvs==26.4.*,>=0.0.0a0 +- libcuvs==26.4.* - libnvjitlink-dev -- libraft==26.4.*,>=0.0.0a0 +- libraft==26.4.* - make - nccl>=2.19 - ninja diff --git a/dependencies.yaml b/dependencies.yaml index 0d3c5dde03..9a54bae94a 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -561,7 +561,7 @@ dependencies: - output_types: [conda, pyproject, requirements] packages: - click - - cuvs==26.4.*,>=0.0.0a0 + - cuvs==26.4.* - pandas - pyyaml - requests @@ -588,17 +588,17 @@ dependencies: common: - output_types: conda packages: - - cuvs==26.4.*,>=0.0.0a0 + - cuvs==26.4.* depends_on_cuvs_bench: common: - output_types: conda packages: - - cuvs-bench==26.4.*,>=0.0.0a0 + - cuvs-bench==26.4.* depends_on_libcuvs: common: - output_types: conda packages: - - &libcuvs_unsuffixed libcuvs==26.4.*,>=0.0.0a0 + - &libcuvs_unsuffixed libcuvs==26.4.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -610,12 +610,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - libcuvs-cu12==26.4.*,>=0.0.0a0 + - libcuvs-cu12==26.4.* - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - libcuvs-cu13==26.4.*,>=0.0.0a0 + - libcuvs-cu13==26.4.* - {matrix: null, packages: [*libcuvs_unsuffixed]} depends_on_libcuvs_tests: common: @@ -626,7 +626,7 @@ dependencies: common: - output_types: conda packages: - - &libraft_unsuffixed libraft==26.4.*,>=0.0.0a0 + - &libraft_unsuffixed libraft==26.4.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -638,18 +638,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - libraft-cu12==26.4.*,>=0.0.0a0 + - libraft-cu12==26.4.* - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - libraft-cu13==26.4.*,>=0.0.0a0 + - libraft-cu13==26.4.* - {matrix: null, packages: [*libraft_unsuffixed]} depends_on_librmm: common: - output_types: conda packages: - - &librmm_unsuffixed librmm==26.4.*,>=0.0.0a0 + - &librmm_unsuffixed librmm==26.4.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -661,18 +661,18 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - librmm-cu12==26.4.*,>=0.0.0a0 + - librmm-cu12==26.4.* - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - librmm-cu13==26.4.*,>=0.0.0a0 + - librmm-cu13==26.4.* - {matrix: null, packages: [*librmm_unsuffixed]} depends_on_pylibraft: common: - output_types: conda packages: - - &pylibraft_unsuffixed pylibraft==26.4.*,>=0.0.0a0 + - &pylibraft_unsuffixed pylibraft==26.4.* - output_types: requirements packages: # pip recognizes the index as a global option for the requirements.txt file @@ -684,12 +684,12 @@ dependencies: cuda: "12.*" cuda_suffixed: "true" packages: - - pylibraft-cu12==26.4.*,>=0.0.0a0 + - pylibraft-cu12==26.4.* - matrix: cuda: "13.*" cuda_suffixed: "true" packages: - - pylibraft-cu13==26.4.*,>=0.0.0a0 + - pylibraft-cu13==26.4.* - {matrix: null, packages: [*pylibraft_unsuffixed]} depends_on_nccl: common: diff --git a/python/cuvs/pyproject.toml b/python/cuvs/pyproject.toml index 267130acb0..69d1f73c96 100644 --- a/python/cuvs/pyproject.toml +++ b/python/cuvs/pyproject.toml @@ -21,9 +21,9 @@ license = "Apache-2.0" requires-python = ">=3.11" dependencies = [ "cuda-python>=13.0.1,<14.0", - "libcuvs==26.4.*,>=0.0.0a0", + "libcuvs==26.4.*", "numpy>=1.23,<3.0", - "pylibraft==26.4.*,>=0.0.0a0", + "pylibraft==26.4.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ "Intended Audience :: Developers", @@ -109,9 +109,9 @@ requires = [ "cmake>=3.30.4", "cuda-python>=13.0.1,<14.0", "cython>=3.2.2", - "libcuvs==26.4.*,>=0.0.0a0", - "libraft==26.4.*,>=0.0.0a0", - "librmm==26.4.*,>=0.0.0a0", + "libcuvs==26.4.*", + "libraft==26.4.*", + "librmm==26.4.*", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. build-backend = "scikit_build_core.build" diff --git a/python/cuvs_bench/pyproject.toml b/python/cuvs_bench/pyproject.toml index 446894fb6e..c96e463d47 100644 --- a/python/cuvs_bench/pyproject.toml +++ b/python/cuvs_bench/pyproject.toml @@ -20,7 +20,7 @@ license = "Apache-2.0" requires-python = ">=3.11" dependencies = [ "click", - "cuvs==26.4.*,>=0.0.0a0", + "cuvs==26.4.*", "matplotlib>=3.9", "pandas", "pyyaml", diff --git a/python/libcuvs/pyproject.toml b/python/libcuvs/pyproject.toml index 052834cab0..456ae51135 100644 --- a/python/libcuvs/pyproject.toml +++ b/python/libcuvs/pyproject.toml @@ -20,8 +20,8 @@ license = "Apache-2.0" requires-python = ">=3.11" dependencies = [ "cuda-toolkit[cublas,curand,cusolver,cusparse]==13.*", - "libraft==26.4.*,>=0.0.0a0", - "librmm==26.4.*,>=0.0.0a0", + "libraft==26.4.*", + "librmm==26.4.*", "nvidia-nvjitlink>=13.0,<14", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. classifiers = [ @@ -81,8 +81,8 @@ regex = "(?P.*)" build-backend = "scikit_build_core.build" requires = [ "cmake>=3.30.4", - "libraft==26.4.*,>=0.0.0a0", - "librmm==26.4.*,>=0.0.0a0", + "libraft==26.4.*", + "librmm==26.4.*", "ninja", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. dependencies-file = "../../dependencies.yaml" From 9442b57d25672e65a2fc6e4b6c377493d77c4890 Mon Sep 17 00:00:00 2001 From: kanta1211 Date: Thu, 25 Jun 2026 18:08:07 +0900 Subject: [PATCH 2/2] add script .py --- .gitignore | 4 + cagra_test/.cagra_sift1m.py.swn | Bin 0 -> 24576 bytes cagra_test/.cagra_sift1m.py.swo | Bin 0 -> 16384 bytes cagra_test/cagra_gist1m.py | 279 +++++++++++++++++++++ cagra_test/cagra_sift1m.py | 417 ++++++++++++++++++++++++++++++++ cagra_test/test_cagra_random.py | 37 +++ 6 files changed, 737 insertions(+) create mode 100644 cagra_test/.cagra_sift1m.py.swn create mode 100644 cagra_test/.cagra_sift1m.py.swo create mode 100644 cagra_test/cagra_gist1m.py create mode 100644 cagra_test/cagra_sift1m.py create mode 100644 cagra_test/test_cagra_random.py diff --git a/.gitignore b/.gitignore index 3627558ff5..73dd8aa7f7 100644 --- a/.gitignore +++ b/.gitignore @@ -88,3 +88,7 @@ ivf_pq_index # java .classpath + +*.csv +*.txt + diff --git a/cagra_test/.cagra_sift1m.py.swn b/cagra_test/.cagra_sift1m.py.swn new file mode 100644 index 0000000000000000000000000000000000000000..e32e2473c5682448110d626825cf3bc6c045192e GIT binary patch literal 24576 zcmeI33y>VedB+D^!o~rEEh?3m;0&u++LQP89s(>~ytavrF*c@TOTyUaV{5cKdpEj$ zX=e6LCt0A~MK4PrnMBspyiDnV&`Aj*Q9wY&jv=WsaXB{eBNd!@w5Ox;z_`c*J1OV? z^<#QwclJ)GNF}M-sruc{^nBg@_1E3g-Cy^u>b>pePwPvP{Ti+-H0{ny=B>VB{%cph z<%Ns1(b6ho!f-|`dN;#mPpmVGGrX6t?(`Gv886wRJsD%zHqx%?xZ>8aa&BKCS)PzF zI=>wYbS&_uSfJwOx|dwGa&doOFQqp>c8xyo-7DXeq&tapEYPt)#{wM-bS%)ZK*s_d z3v?{dvB3Wi3%KPu+FelftwP;R;`fK*_agE87V*1A{BCJ_KSw;<;`cwA-p>`!OU3VM zk+4i(1b2Qq7U)=@V}XtZIu__ypkslK1v(b!SfFEpjs-dv=vd%?zyd~A)82!0e?bdz z?ElO4{kJdIv~PgNz^$MQ%mpvLQ`5GCG`IsK!Ot(#v{%93f**q)ffL{`aKPKZuP)WJ zpMe*_<6tYOfhxEI^nqTm5d8AjG_3(11Din(d>C8~-U*(2hoY4 z0H}jCU=X|qTnT;&Cx-{V3(kN%$N>}F0QcH^jQMpzNB8X-{w|3f6VVvao3JRfb~*rAeH^l(a^P zPwDiNESq*Noq<}eX~(9?Syde(4}07`-j_GLmSn{K(K z@+i`EsE~uKL_El`)|o?0%*;FHKy-w%VM8mG2DgoR<`%JX+*B%Dw;a7#a`jtFMI_DA zrmUd?OVLj9gefd8loczAWSvCznE*hqad!5LY$A_Kt5i?&-#in*_X-w|-s(ZZZqh9sy#)GFCyS94O z6MCj{+j8BjPI~n%UVXP$JL=VsdG)P5(_0>GY~MY7t8I@Z ze4_?cR-vp&q?Rv-0=uSB=%&gM(WC6bE!wPmcQ>nmZdTdQrA=9m1$7kdBak22LaN)> zzeMR4sV&bj5Is^|O0!wpv(IcF(EECO6McQ4A1nsfg6qKb-~(U@SlXND@9jN5Yf=Nz zC_B1fO=Xc;%^9URQgBSe&Wv=ATUmERHp5U;>fZT24jv@9L}5RjB6lf ztbj&qv4& z#mZ-e7D_|E(Zq_yw8%q#=%c$QtZ3Q=Giw>H*^PsHt0r z)kh4+a9umj(?mD-MPsB6Wk;9Dl7CIo>}w)(4TfnaaJvya&_W)c_<`LJs~8{XLHw#%!Z@oJ~N`eCpBc;indF^)`ctWRy4WMkq3XP^Dr z%ln?xD3-Z4W0qZg1;*tYZM$TvDTJ_mM3##djgHfx&$D*ziGYRpo56tnhGda^ld_Cw ztP_#o1|~^r#)9UJ4e*&tFid@mW6Hn~R1X0O_U#pk&^>>}Rfo3gS+=oQjQ#p*(_p;! z-gA#U$&KjGD#Ww_vyF|_Hly_kns}gQLi3F3v9nJ-UL8}EhgfYsi~P?%eQ;{)E%aP0Ui80m<#5Bw}L775ncihfo0$m;KSf*@Ly;X z-v!$NwU1Tc0xoKf@>1QMH%RrYzS<<)wr}%jZC{5|33hTGF&Gk-<=vc1@q6_?G(rY@ zyhggRaglE1KqAHfFWlW*M(0BqM!f1XUUmP=8}~O(pQhn}uV*ocA3a1L|EAHxtDR)y z#O|rX+s~cYb^*hNMW3s5VAO~jDf%=jP1w&MOi(b^#zUhqW^(RGGGki#xEg`=9v$;{ z{z0PQpG|jC!6g-+eMp1xg;)1SS71myjOBZuS3BX=j(Bw}>T6#00k8JuKApYAS{-Zs zL(>l(VZ$@6+sQ~wVXrFR9@%rYQ@M5Eb3O{LlRq=PKGpN?$6ycaYVGgGo{GvlHOyCv7it0jSa@&1&a zH;YKbjoYRJCqcSsTEio&O13j-4Rz@YU{N<7@TgKK?+dw-ty}oE>9$cEHd~@|m*`;c z5b4CMBXmjWz3iE3gsDe!zJ5e~kv__!2J%OQz|-JBLLV6AuEgeaB9ab@%Nh-&Mp}{w zn0bm3JzlI7Sog9_C!v2RHOdR#oKd?75Z5R{)PRxCe`HjjrA>3WwBe>lTdbJ~mO7!2 zHdpi@29fyMBo@I|Io{Q!Q{}9s%5lxLaDA8a7Cf4XSQkxpeKBq~X^2?BAr@oqMryU2 zF?*Z(>XydwJ&mnF}HlLGW1Pk!@2u52#NR6H?k;uyAVOvK^ccF_!l5h+a(f|?O`g9FTO!OMY?v8M?CuZl>nYH9_214F!W zC;$o4PZAYN+XbxJMU;fdiY?W7^;4q=Czxbw+!oxDWSBLO}1_v?Jt_Kps;PdElulnrtGjK?3lo4c`4(!(`U0K`X`vNd2 zE1MW={#ROea?CQv<78HBMls_B&CMFb|Lx|^cDGs@Q>VGl)ZArVw`uUqq!--a`F3O5 z22b4=GoUf7%jFD0w+iKw?Q-XWCLb#03Egn?Oj(BDgUW-7Wh^6k6xubF_a#T((ezy2 zXPhy4E4{O9{f}j%_OjUjm*@9s&Hq<`*8IN@#y|zE1dBmGxD@;ZYx$SJQE&tt1P=Hh zxCs0?*7EDXQm_PE1}+6tShxQr_zSQX>;Y@QP2l6;UEm5Z2fT>2`$@1B+zu`RKg2qH z5*!By!2vJ zT^&^mT`3TwrGb3UmI<1CC4bqVjF~Q^V#{y-_=>(y2e$l3h&xy5j?ey_q){%L#cW&+ zi1PSBXjwurS}u&=At8AvfdpkL{sg}zwhTZjzED&OV%oW$`N~E&$Y`Noqye#k>P*GNs|_U)i7%b&e;90UOH2* zV7e~PaEL_%MGT1`dWNRG?CTfzUv;Sl1{YfK^FMgrcNW`pDBcBqlG^Q>KzNlFMDp z{0hlpC7(|^6UEGkT`FQ8FHez3@$iJ7LU|W#{W7r*ksd~S(PU)#%Y0j{!C`+zC{`Bd zY>Xl{{}ZaAp@iJC&*KHYR+P_-r+lO$vlm`UQWt_tKsp3iq4_Zw=>i~0zlxySU|>CQ z{`rLM8)e%zc8#!U8+)FYEoHgqV_|=Yoys}m)gSa~*a541^*yRFhZWAnLLYyap=mVG zyK$*i9c7bKJCh`@zWc%XFc(xlFOpPGp_L*RrF?`x6Q4TTQlZf?B*^=qM)Gv=$3Sli z7ft1sQn%qoEDX*zp}BuL7!sBxZMaN$)NZz4t20nz+TxFIDHwyq{O2aWnX0T%i$JV` zG5ZE3%Z~ZO*oimh4nRNPHlQO>Vv#N84oFqDeE%EAj*D@vY22a8;C)99%u-S%%7$O{ z*evK56{*T3RT!4zER#TpIyT4L<@!SMe=clJHdtLU**O=^=1ppv{%}dnljfw{0n38< z2VOSKumFXgPh;gRi`!>~EG@a9=+LlR(r;R}a@O8UN2+}biK_6AS<9YF`~SCN-FiFL zxwQW;|39Dh{SSf9gGJyKtmV&u`@m{20Oo;dtmXd{+ye$d0=$Mb`x)>c7zY+e13Ksb zA&>+YgSp^)Sj(RQ+W@Wa2f-J>?|><+;lBkQ0QZA&Fa}lwI^+LI&=2N=UtOYU-v;-C z5wHmS6Uy`)*aq$bcY+Oo_U2i6FJLpf6QyH;js-dv=vbg*fsO^*T7b<1*&%NY%cgXi zHeSwJAwG@KSa#x1E(RwokFXH_ zK$3y4wRP1`D-iH=avi>(tC=si`_Cb*o~JTln|L6813o|J89dQMu#zuZW^Kp0|o- zDux3i*s9A3dr*cdDP&5n4XHQ|2P8+$3H(o8Q%Orsl6)vx%XZ>Cmo)AZIKz+F>C$N6 z|2!xI^e`o*&h@J>Vp6uM%8odrmY7~} zA_oPjTKcKGt6sgXe(!aCyqTkOx9&ecchJB*UvY|CFvd@p4@m6tr|r1OJXysTQdv$KG+zuz*)dqz*)dqz*)dqz*)dq;OAk1G~Ph&MpsXt zDUM8bd%`+jAn&h}=OwHDLV16qJU?dD|6A%;dcLyiznAr?EVuQ`Vs~;Da29YDa29YD za29YDa29YDa29YDa29YDa2EJ4Ea0~ZdH-TUUf+zy`2An)??1YXkQc$D;1O^P#J~f0 zfLp*M`2JEt-U5FIE8sY|9qa_3Zz1Fz@HTh}oCJRc9@q^o2b;h~@Xap?`71aMmVpQM zf=N&Tr!RpX@Hn^?Yz3RZM)27$2zeen0Adh<-C!4}gKzO};{9F)PXG?KgLyCk-VpB( zPJ$+=f+=t*`1m41PJusx5ZnO%wTX~7!F}LPunYVee6kUGz~8__;6ZQ_bionO099}W z*Z{uVK*($0DR4hn0WEL<)WC1Sh2S)jE8l@Pz`cNjL*O9T24(;S6W|LZSUv!cf)M-x zTmmKl0cTWVN5u-2$23t_;@qhutmXIndsh5pk=J^wjg=%CHak;R@zg@Sc712HVl_T= z@Nm&!>R8N$%#P0L47H`wV8Eo9zqIIa<|nPLCrWYMlBy{mZy2Oe-1B&FTs7APXmxG7 z^sG1u!nB%quoEQ|uR(*5CX8qOl-Kj}LZs5bK@VqxfuAf>aZ@RbQdCdF$T*6dPKEnR ztX-#teWEOlvvd{mxr~q-CPB)QT5*boyegll?QBYPncrqf)vlxs+Tu&KeL*X|E!S7= z%+OB2`t8skFy7FYMayB3c4-u|u$sHA=oaiZtjy4m4f{dJ8kNd4^*Qa-i@q+5dOYUy z%FL*)0zaC!Yxv8o*8VU{rGtk-JMEg~N3)>c9{Js@2}7^Vc#DOp7YcKlbww+`zZjXN z@HI;YtR46%^YHhgxC;%731g`YMYSH;IQ8P?!X;ZNrt7?9Xv7}vH(ZiY= z{;YXGL>;9V4pCmr2s&(46HE%UDZi*$4Dbti0|aS4J;DkcTmqmj(0yN1qtb8~Zol7hjHP++%(3Dpx|#Ny?t_~_9S zY35PP&{_K{C#r#EPw^VrplZA&pGNCG=9E$y)(=X9I4hqeS%@H@Tj~5Ne8u{H%rQ-h zH3H@4^eEbtO^XMQ%uGbJI&!ggz*9eLG4XPvYobKN+3cMPT={(MWg#dFp(vfF+h~ga z!--;bpb#~Pw;EjA@mf(PrW9jp7J{v~*2>y`Z68|-TC6%<In?xbjB$nu?w_*yG zfruyMe}0uiq_S-6C;kBIogi$pyJnOV$75CWAe}pZ-OSfT`Krhd7g@1(NLbALbXu+w z#1qHdb#oLlB5K4j=A}}t=^XUCxu9+S15NoqFyka0`pF=RbzjPXB0HV0R~qM-(e%XF z(3g6omZ}N|kGy|4U-2Hyy+lyFR{U z8CR++MYM^|<g?4nF8szs@i)GBgGeQOJv-c##zv&@$7iKoaz$@j`aa)&TvESAF! z%{|I6Kww~6XU8BXQzPuvyX1?5V(DYfaC$9Nn%s0TRH>m$boi_z);T^evH!mtd)jBP z*A@H!8|A+64EFZVf)s28*MM)ar+*o|1TsLuo7lUHJ^g;L7wiEyfZu}#5c~XHpboAD zJHU1@4=w`lVQ>F5I006{?ce~o3G4w^gU#S;?DIbauYhO4U%(1rU_Y1zzX22AQ|$TQ z0sjO~fhWNUun$aujbH=#WHa{n;9YPEJPhsuO;7>f!6q+&Q{W#Uw~^X6yR)-^vw*XJ zvw*XJv%pVl0kupkFW1LXV=iYsU2H1Udeh~s-^FEPhTj6ToXJ aGR<@@MIsTrmX7ih8}_GzEUujvCgeX5e&i$o literal 0 HcmV?d00001 diff --git a/cagra_test/cagra_gist1m.py b/cagra_test/cagra_gist1m.py new file mode 100644 index 0000000000..bdcd99161d --- /dev/null +++ b/cagra_test/cagra_gist1m.py @@ -0,0 +1,279 @@ +import argparse +import csv +import time +import numpy as np +import cupy as cp +from cuvs.neighbors import cagra + + +def read_fvecs(path: str) -> np.ndarray: + data = np.fromfile(path, dtype=np.int32) + dim = int(data[0]) + data = data.reshape(-1, dim + 1) + return data[:, 1:].view(np.float32) + + +def read_ivecs(path: str) -> np.ndarray: + data = np.fromfile(path, dtype=np.int32) + dim = int(data[0]) + data = data.reshape(-1, dim + 1) + return data[:, 1:].copy() + + +def to_numpy(x): + if hasattr(x, "copy_to_host"): + return np.asarray(x.copy_to_host()) + return cp.asnumpy(x) + + +def recall_at_k(found: np.ndarray, gt: np.ndarray, k: int) -> float: + found = found[:, :k] + gt = gt[:, :k] + + hit = 0 + for i in range(found.shape[0]): + hit += len(set(found[i]) & set(gt[i])) + + return hit / (found.shape[0] * k) + + +def parse_int_list(s: str): + return [int(x) for x in s.split(",") if x.strip()] + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument("--base", required=True) + parser.add_argument("--query", required=True) + parser.add_argument("--gt", required=True) + + parser.add_argument("--k", type=int, default=100) + parser.add_argument("--base-limit", type=int, default=0) + parser.add_argument("--query-limit", type=int, default=0) + + # GIST1M paper setting: d = 48 + # Initial/intermediate graph degree: 3d = 144 + parser.add_argument("--graph-degree", type=int, default=48) + parser.add_argument("--intermediate-graph-degree", type=int, default=144) + parser.add_argument("--build-algo", default="nn_descent") + parser.add_argument("--nn-descent-niter", type=int, default=20) + + parser.add_argument("--itopk-list", default="100,128,160,192,256,320,384,448,512") + parser.add_argument("--search-width", type=int, default=1) + + parser.add_argument( + "--algo", + default="single_cta", + choices=["auto", "single_cta", "multi_cta"], + help="CAGRA search algorithm: auto, single_cta, or multi_cta", + ) + + parser.add_argument("--repeat", type=int, default=5) + parser.add_argument("--output", default="cagra_gist1m_sweep.csv") + + args = parser.parse_args() + itopk_list = parse_int_list(args.itopk_list) + + print("==== Load dataset ====") + t0 = time.perf_counter() + base_np = read_fvecs(args.base).astype(np.float32) + query_np = read_fvecs(args.query).astype(np.float32) + gt_np = read_ivecs(args.gt) + t1 = time.perf_counter() + + if args.base_limit > 0: + base_np = base_np[:args.base_limit] + + if args.query_limit > 0: + query_np = query_np[:args.query_limit] + gt_np = gt_np[:args.query_limit] + + print(f"base shape : {base_np.shape}") + print(f"query shape : {query_np.shape}") + print(f"groundtruth shape: {gt_np.shape}") + print(f"load time [s] : {t1 - t0:.6f}") + + print() + print("==== Transfer dataset to GPU ====") + cp.cuda.Device().synchronize() + t0 = time.perf_counter() + dataset = cp.asarray(base_np) + queries = cp.asarray(query_np) + cp.cuda.Device().synchronize() + t1 = time.perf_counter() + transfer_time = t1 - t0 + print(f"transfer time [s]: {transfer_time:.6f}") + + print() + print("==== Build CAGRA index ====") + print(f"metric : sqeuclidean") + print(f"build_algo : {args.build_algo}") + print(f"graph_degree : {args.graph_degree}") + print(f"intermediate_graph_degree: {args.intermediate_graph_degree}") + print(f"nn_descent_niter : {args.nn_descent_niter}") + + index_params = cagra.IndexParams( + metric="sqeuclidean", + intermediate_graph_degree=args.intermediate_graph_degree, + graph_degree=args.graph_degree, + build_algo=args.build_algo, + nn_descent_niter=args.nn_descent_niter, + ) + + cp.cuda.Device().synchronize() + t0 = time.perf_counter() + index = cagra.build(index_params, dataset) + cp.cuda.Device().synchronize() + t1 = time.perf_counter() + build_time = t1 - t0 + + print(f"graph build time [s]: {build_time:.6f}") + + results = [] + + print() + print("==== Sweep search parameters ====") + print(f"k : {args.k}") + print(f"search_algo : {args.algo}") + print(f"search_width: {args.search_width}") + print(f"itopk_list : {itopk_list}") + print(f"repeat : {args.repeat}") + + for itopk_size in itopk_list: + if itopk_size < args.k: + print(f"[SKIP] itopk_size={itopk_size} is smaller than k={args.k}") + continue + + if args.algo == "single_cta" and itopk_size > 512: + print(f"[SKIP] single_cta does not allow itopk_size > 512: itopk_size={itopk_size}") + continue + + print() + print(f"---- algo = {args.algo}, itopk_size = {itopk_size} ----") + + search_params = cagra.SearchParams( + algo=args.algo, + itopk_size=itopk_size, + search_width=args.search_width, + ) + + # warmup + distances, neighbors = cagra.search(search_params, index, queries, args.k) + cp.cuda.Device().synchronize() + + search_times = [] + last_distances = None + last_neighbors = None + + for r in range(args.repeat): + cp.cuda.Device().synchronize() + t0 = time.perf_counter() + distances, neighbors = cagra.search(search_params, index, queries, args.k) + cp.cuda.Device().synchronize() + t1 = time.perf_counter() + + elapsed = t1 - t0 + search_times.append(elapsed) + last_distances = distances + last_neighbors = neighbors + + print(f"run {r + 1}: {elapsed:.6f} s") + + avg_search_time = sum(search_times) / len(search_times) + qps = queries.shape[0] / avg_search_time + + search_time_sec_per_query = avg_search_time / queries.shape[0] + search_time_ms_per_query = search_time_sec_per_query * 1000 + search_time_us_per_query = search_time_sec_per_query * 1_000_000 + + neighbors_np = to_numpy(last_neighbors) + distances_np = to_numpy(last_distances) + + rec = recall_at_k(neighbors_np, gt_np, args.k) + + print(f"avg search time for all queries [s]: {avg_search_time:.6f}") + print(f"search time [ms/query] : {search_time_ms_per_query:.6f}") + print(f"search time [us/query] : {search_time_us_per_query:.6f}") + print(f"QPS [queries/sec] : {qps:.3f}") + print(f"recall@{args.k} : {rec:.6f}") + + results.append({ + "dataset": "GIST1M", + "dataset_size": base_np.shape[0], + "dimension": base_np.shape[1], + "num_queries": query_np.shape[0], + "k": args.k, + "graph_degree": args.graph_degree, + "intermediate_graph_degree": args.intermediate_graph_degree, + "build_algo": args.build_algo, + "nn_descent_niter": args.nn_descent_niter, + "build_time_sec": build_time, + "algo": args.algo, + "search_width": args.search_width, + "itopk_size": itopk_size, + "avg_search_time_sec_all_queries": avg_search_time, + "search_time_sec_per_query": search_time_sec_per_query, + "search_time_ms_per_query": search_time_ms_per_query, + "search_time_us_per_query": search_time_us_per_query, + "qps": qps, + f"recall@{args.k}": rec, + }) + + print() + print("==== Save results ====") + + fieldnames = [ + "dataset", + "dataset_size", + "dimension", + "num_queries", + "k", + "graph_degree", + "intermediate_graph_degree", + "build_algo", + "nn_descent_niter", + "build_time_sec", + "algo", + "search_width", + "itopk_size", + "avg_search_time_sec_all_queries", + "search_time_sec_per_query", + "search_time_ms_per_query", + "search_time_us_per_query", + "qps", + f"recall@{args.k}", + ] + + with open(args.output, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(results) + + print(f"saved: {args.output}") + + print() + print("==== Summary ====") + for row in results: + print( + f"dataset={row['dataset']}, " + f"algo={row['algo']}, " + f"itopk_size={row['itopk_size']:4d}, " + f"time={row['search_time_us_per_query']:.3f} us/query, " + f"time={row['search_time_ms_per_query']:.6f} ms/query, " + f"QPS={row['qps']:.3f}, " + f"recall@{args.k}={row[f'recall@{args.k}']:.6f}" + ) + + print() + print("==== Example results from last parameter ====") + if results: + print("neighbors[:5]:") + print(neighbors_np[:5]) + print() + print("distances[:5]:") + print(distances_np[:5]) + + +if __name__ == "__main__": + main() diff --git a/cagra_test/cagra_sift1m.py b/cagra_test/cagra_sift1m.py new file mode 100644 index 0000000000..9a72b00eb9 --- /dev/null +++ b/cagra_test/cagra_sift1m.py @@ -0,0 +1,417 @@ +import argparse +import csv +import math +import os +import time + +import numpy as np +import cupy as cp +from cuvs.neighbors import cagra + + +def read_fvecs(filename, limit=None): + """ + fvecs形式のベクトルを読み込む。 + 各ベクトルは [dim(int32), data(float32)*dim] の形式。 + """ + data = np.fromfile(filename, dtype=np.int32) + if data.size == 0: + raise ValueError(f"Empty file: {filename}") + + dim = data[0] + vector_size = dim + 1 + + if data.size % vector_size != 0: + raise ValueError( + f"Invalid fvecs file: {filename}, " + f"data.size={data.size}, dim={dim}" + ) + + num_vectors = data.size // vector_size + data = data.reshape(num_vectors, vector_size) + + vectors = data[:, 1:].view(np.float32) + + if limit is not None: + vectors = vectors[:limit] + + return np.ascontiguousarray(vectors) + + +def read_ivecs(filename, limit=None): + """ + ivecs形式のベクトルを読み込む。 + 各ベクトルは [dim(int32), data(int32)*dim] の形式。 + """ + data = np.fromfile(filename, dtype=np.int32) + if data.size == 0: + raise ValueError(f"Empty file: {filename}") + + dim = data[0] + vector_size = dim + 1 + + if data.size % vector_size != 0: + raise ValueError( + f"Invalid ivecs file: {filename}, " + f"data.size={data.size}, dim={dim}" + ) + + num_vectors = data.size // vector_size + data = data.reshape(num_vectors, vector_size) + + vectors = data[:, 1:] + + if limit is not None: + vectors = vectors[:limit] + + return np.ascontiguousarray(vectors) + + +def parse_itopk_list(text): + """ + "100,110,120" のような文字列を [100, 110, 120] に変換する。 + """ + return [int(x.strip()) for x in text.split(",") if x.strip()] + + +def recall_at_k(result_neighbors, groundtruth, k): + """ + recall@k を計算する。 + + result_neighbors: shape = (num_queries, k) + groundtruth : shape = (num_queries, >=k) + """ + result_neighbors = result_neighbors[:, :k] + groundtruth = groundtruth[:, :k] + + total = result_neighbors.shape[0] * k + correct = 0 + + for i in range(result_neighbors.shape[0]): + correct += len(set(result_neighbors[i]) & set(groundtruth[i])) + + return correct / total + + +def get_team_size(search_params): + """ + cuVSのバージョンによって team_size が存在しない可能性があるため、 + 安全に取得する。 + """ + return getattr(search_params, "team_size", "auto") + + +def get_cta_per_query(algo, search_width, itopk_size): + """ + multi-CTA時の1クエリあたりCTA数を計算する。 + + cuVS CAGRAのmulti-CTAでは、内部的に1 CTAあたりの + itopkサイズが32として扱われるため、 + + cta_per_query = max(search_width, ceil(itopk_size / 32)) + + として計算する。 + + single-CTAでは1クエリあたり1 CTAとして表示する。 + """ + if algo == "multi_cta": + return max(search_width, math.ceil(itopk_size / 32)) + else: + return 1 + + +def make_index_params(args): + """ + CAGRAのグラフ構築パラメータを作成する。 + cuVSのバージョン差を考慮して、まず通常の指定を試す。 + """ + try: + return cagra.IndexParams( + metric=args.metric, + graph_degree=args.graph_degree, + intermediate_graph_degree=args.intermediate_graph_degree, + build_algo=args.build_algo, + nn_descent_niter=args.nn_descent_niter, + ) + except TypeError: + # 古い/異なるcuVSバージョンで一部引数名が合わない場合の保険 + params = cagra.IndexParams() + params.metric = args.metric + params.graph_degree = args.graph_degree + params.intermediate_graph_degree = args.intermediate_graph_degree + params.build_algo = args.build_algo + + if hasattr(params, "nn_descent_niter"): + params.nn_descent_niter = args.nn_descent_niter + + return params + + +def make_search_params(args, itopk_size): + """ + CAGRAの探索パラメータを作成する。 + team_sizeを明示した場合は設定し、指定しない場合はcuVSの自動設定に任せる。 + """ + kwargs = { + "algo": args.algo, + "itopk_size": itopk_size, + "search_width": args.search_width, + } + + if args.team_size is not None: + kwargs["team_size"] = args.team_size + + try: + return cagra.SearchParams(**kwargs) + except TypeError: + # cuVSのバージョン差を考慮した保険 + params = cagra.SearchParams() + params.algo = args.algo + params.itopk_size = itopk_size + params.search_width = args.search_width + + if args.team_size is not None and hasattr(params, "team_size"): + params.team_size = args.team_size + + return params + + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument("--base", type=str, required=True) + parser.add_argument("--query", type=str, required=True) + parser.add_argument("--gt", type=str, required=True) + + parser.add_argument("--base-limit", type=int, default=None) + parser.add_argument("--query-limit", type=int, default=None) + + parser.add_argument("--k", type=int, default=100) + + parser.add_argument("--metric", type=str, default="sqeuclidean") + parser.add_argument("--graph-degree", type=int, default=32) + parser.add_argument("--intermediate-graph-degree", type=int, default=96) + parser.add_argument("--build-algo", type=str, default="nn_descent") + parser.add_argument("--nn-descent-niter", type=int, default=20) + + parser.add_argument( + "--algo", + type=str, + default="single_cta", + choices=["single_cta", "multi_cta"], + ) + parser.add_argument("--search-width", type=int, default=1) + + parser.add_argument( + "--itopk-list", + type=str, + default=None, + help="例: 100,110,120,130,140,150,160,170,180,190,200", + ) + parser.add_argument( + "--itopk-size", + type=int, + default=128, + help="--itopk-list を指定しない場合に使用する itopk_size", + ) + + parser.add_argument( + "--team-size", + type=int, + default=None, + help="指定しない場合は cuVS のデフォルト/自動設定を使用する", + ) + + parser.add_argument("--repeat", type=int, default=5) + parser.add_argument("--output", type=str, default=None) + + args = parser.parse_args() + + if args.itopk_list is not None: + itopk_list = parse_itopk_list(args.itopk_list) + else: + itopk_list = [args.itopk_size] + + print("==== Load dataset ====") + t0 = time.perf_counter() + + base = read_fvecs(args.base, args.base_limit) + query = read_fvecs(args.query, args.query_limit) + gt = read_ivecs(args.gt, args.query_limit) + + load_time = time.perf_counter() - t0 + + print(f"base shape : {base.shape}") + print(f"query shape : {query.shape}") + print(f"groundtruth shape: {gt.shape}") + print(f"load time [s] : {load_time:.6f}") + + print() + print("==== Transfer dataset to GPU ====") + t0 = time.perf_counter() + + base_gpu = cp.asarray(base) + query_gpu = cp.asarray(query) + + cp.cuda.Stream.null.synchronize() + transfer_time = time.perf_counter() - t0 + + print(f"transfer time [s]: {transfer_time:.6f}") + + print() + print("==== Build CAGRA index ====") + print(f"metric : {args.metric}") + print(f"build_algo : {args.build_algo}") + print(f"graph_degree : {args.graph_degree}") + print(f"intermediate_graph_degree: {args.intermediate_graph_degree}") + print(f"nn_descent_niter : {args.nn_descent_niter}") + + index_params = make_index_params(args) + + cp.cuda.Stream.null.synchronize() + t0 = time.perf_counter() + + index = cagra.build(index_params, base_gpu) + + cp.cuda.Stream.null.synchronize() + build_time = time.perf_counter() - t0 + + print(f"graph build time [s]: {build_time:.6f}") + + print() + print("==== Sweep search parameters ====") + print(f"k : {args.k}") + print(f"search_algo : {args.algo}") + print(f"search_width: {args.search_width}") + print(f"team_size : {args.team_size if args.team_size is not None else 'auto'}") + print(f"itopk_list : {itopk_list}") + print(f"repeat : {args.repeat}") + + results = [] + + for itopk_size in itopk_list: + search_params = make_search_params(args, itopk_size) + + team_size = get_team_size(search_params) + cta_per_query = get_cta_per_query( + algo=args.algo, + search_width=args.search_width, + itopk_size=itopk_size, + ) + + print() + print( + f"---- algo = {args.algo}, " + f"itopk_size = {itopk_size}, " + f"team_size = {team_size}, " + f"cta_per_query = {cta_per_query} ----" + ) + + # ウォームアップ + distances, neighbors = cagra.search( + search_params, + index, + query_gpu, + args.k, + ) + cp.cuda.Stream.null.synchronize() + + run_times = [] + + for r in range(args.repeat): + cp.cuda.Stream.null.synchronize() + t0 = time.perf_counter() + + distances, neighbors = cagra.search( + search_params, + index, + query_gpu, + args.k, + ) + + cp.cuda.Stream.null.synchronize() + elapsed = time.perf_counter() - t0 + + run_times.append(elapsed) + print(f"run {r + 1}: {elapsed:.6f} s") + + avg_time = sum(run_times) / len(run_times) + + num_queries = query.shape[0] + time_ms_per_query = avg_time / num_queries * 1000.0 + time_us_per_query = avg_time / num_queries * 1_000_000.0 + qps = num_queries / avg_time + + neighbors_cpu = cp.asnumpy(neighbors) + rec = recall_at_k(neighbors_cpu, gt, args.k) + + print(f"avg search time for all queries [s]: {avg_time:.6f}") + print(f"search time [ms/query] : {time_ms_per_query:.6f}") + print(f"search time [us/query] : {time_us_per_query:.6f}") + print(f"QPS [queries/sec] : {qps:.3f}") + print(f"recall@{args.k} : {rec:.6f}") + print(f"team_size : {team_size}") + print(f"cta_per_query : {cta_per_query}") + + results.append( + { + "dataset": "SIFT1M", + "algo": args.algo, + "k": args.k, + "metric": args.metric, + "graph_degree": args.graph_degree, + "intermediate_graph_degree": args.intermediate_graph_degree, + "build_algo": args.build_algo, + "nn_descent_niter": args.nn_descent_niter, + "search_width": args.search_width, + "itopk_size": itopk_size, + "team_size": team_size, + "cta_per_query": cta_per_query, + "repeat": args.repeat, + "num_base": base.shape[0], + "num_queries": query.shape[0], + "dimension": base.shape[1], + "graph_build_time_s": build_time, + "avg_search_time_s": avg_time, + "time_ms_per_query": time_ms_per_query, + "time_us_per_query": time_us_per_query, + "qps": qps, + f"recall@{args.k}": rec, + } + ) + + if args.output is not None: + print() + print("==== Save results ====") + + output_dir = os.path.dirname(args.output) + if output_dir: + os.makedirs(output_dir, exist_ok=True) + + fieldnames = list(results[0].keys()) + + with open(args.output, "w", newline="") as f: + writer = csv.DictWriter(f, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(results) + + print(f"saved: {args.output}") + + print() + print("==== Summary ====") + + for row in results: + print( + f"algo={row['algo']}, " + f"itopk_size={row['itopk_size']:4d}, " + f"team_size={row['team_size']}, " + f"cta_per_query={row['cta_per_query']}, " + f"time={row['time_us_per_query']:.3f} us/query, " + f"time={row['time_ms_per_query']:.6f} ms/query, " + f"QPS={row['qps']:.3f}, " + f"recall@{args.k}={row[f'recall@{args.k}']:.6f}" + ) + + +if __name__ == "__main__": + main() diff --git a/cagra_test/test_cagra_random.py b/cagra_test/test_cagra_random.py new file mode 100644 index 0000000000..88594a9387 --- /dev/null +++ b/cagra_test/test_cagra_random.py @@ -0,0 +1,37 @@ +import cupy as cp +import numpy as np +from cuvs.neighbors import cagra + +# データセット +n_samples = 10000 +dim = 128 + +# クエリ数 +n_queries = 100 +k = 10 + +dataset = cp.random.random((n_samples, dim), dtype=cp.float32) +queries = cp.random.random((n_queries, dim), dtype=cp.float32) + +# CAGRA インデックス構築 +index_params = cagra.IndexParams(metric="sqeuclidean") +index = cagra.build(index_params, dataset) + +# 探索 +search_params = cagra.SearchParams() +distances, neighbors = cagra.search(search_params, index, queries, k) + +# device_ndarray → NumPy に変換 +neighbors_np = np.asarray(neighbors.copy_to_host()) +distances_np = np.asarray(distances.copy_to_host()) + +print("dataset shape:", dataset.shape) +print("queries shape:", queries.shape) +print("neighbors shape:", neighbors_np.shape) +print("distances shape:", distances_np.shape) + +print("neighbors[:5]:") +print(neighbors_np[:5]) + +print("distances[:5]:") +print(distances_np[:5])