From 8365717a0dbc741b6603a67efd8c19a84cd5ed89 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Mon, 22 Jun 2026 13:50:24 +0100 Subject: [PATCH 1/7] ci(api): Add ClickHouse to the local and CI test stack Stand up a real ClickHouse so segment-membership code can be tested against actual ClickHouse execution instead of a mocked cursor. - Add a clickhouse service (clickhouse-server:25.12, matching ClickHouse Cloud) to docker-compose.local.yml, so it comes up for `make test` both locally and in CI. - Wire CLICKHOUSE_* into .env-ci/.env-local and wait for it in `make wait-for-db`. - Add a `clickhouse_db` fixture and a `clickhouse` marker. ClickHouse has no transactional rollback, so the fixture truncates IDENTITIES on teardown for per-test isolation. - Move the segment-membership tests that stubbed out ClickHouse -- compute, refresh, backfill, and log_comment attribution -- to live-ClickHouse integration tests; leave skip/defensive paths mocked. --- api/.env-ci | 5 + api/.env-local | 5 + api/Makefile | 1 + api/pyproject.toml | 3 + api/segment_membership/services.py | 1 - api/tests/conftest.py | 23 +++ .../segment_membership/__init__.py | 0 .../test_segment_membership_clickhouse.py | 168 ++++++++++++++++++ .../test_unit_segment_membership_services.py | 58 +----- .../test_unit_segment_membership_tasks.py | 113 +----------- docker/api/docker-compose.local.yml | 28 +++ .../observability/_events-catalogue.md | 2 +- 12 files changed, 247 insertions(+), 160 deletions(-) create mode 100644 api/tests/integration/segment_membership/__init__.py create mode 100644 api/tests/integration/segment_membership/test_segment_membership_clickhouse.py diff --git a/api/.env-ci b/api/.env-ci index cbc64251524f..efb12e4b6f8d 100644 --- a/api/.env-ci +++ b/api/.env-ci @@ -1,4 +1,9 @@ DATABASE_URL=postgresql://postgres:password@localhost:5432/flagsmith ANALYTICS_DATABASE_URL=postgresql://postgres:password@localhost:5433/analytics +CLICKHOUSE_HOST=localhost +CLICKHOUSE_PORT=9000 +CLICKHOUSE_USER=flagsmith +CLICKHOUSE_PASSWORD=password +CLICKHOUSE_DATABASE=default PYTEST_ADDOPTS=--cov . --cov-report xml -n auto --ci COVERAGE_CORE=sysmon diff --git a/api/.env-local b/api/.env-local index a8b1b1aba326..28383d1233d2 100644 --- a/api/.env-local +++ b/api/.env-local @@ -1,4 +1,9 @@ DATABASE_URL=postgresql://postgres:password@localhost:5432/flagsmith ANALYTICS_DATABASE_URL=postgresql://postgres:password@localhost:5433/analytics +CLICKHOUSE_HOST=localhost +CLICKHOUSE_PORT=9000 +CLICKHOUSE_USER=flagsmith +CLICKHOUSE_PASSWORD=password +CLICKHOUSE_DATABASE=default DJANGO_SETTINGS_MODULE=app.settings.local PYTEST_ADDOPTS=--cov . --cov-report html -n auto diff --git a/api/Makefile b/api/Makefile index 47a7f6283406..3eabf612a0cf 100644 --- a/api/Makefile +++ b/api/Makefile @@ -63,6 +63,7 @@ docker-build: wait-for-db: uv run python manage.py waitfordb uv run python manage.py waitfordb --database analytics + uv run python manage.py waitfordb --database clickhouse .PHONY: test test: docker-up wait-for-db diff --git a/api/pyproject.toml b/api/pyproject.toml index 508ad17c55db..73de0c8f0e5b 100644 --- a/api/pyproject.toml +++ b/api/pyproject.toml @@ -484,6 +484,9 @@ addopts = [ ] console_output_style = 'count' log_level = 'INFO' +markers = [ + "clickhouse: test requires a live ClickHouse database (see the clickhouse_db fixture)", +] [tool.mypy] plugins = ["mypy_django_plugin.main"] diff --git a/api/segment_membership/services.py b/api/segment_membership/services.py index 57f45c471116..9883120e88ad 100644 --- a/api/segment_membership/services.py +++ b/api/segment_membership/services.py @@ -63,7 +63,6 @@ def open_clickhouse_cursor( """ with connections["clickhouse"].cursor() as cursor: if log_comment: - # Underlying clickhouse-driver cursor exposes set_settings(...). cursor.cursor.set_settings({"log_comment": log_comment}) yield cursor diff --git a/api/tests/conftest.py b/api/tests/conftest.py index 0ef751a26568..2bac903e4e8a 100644 --- a/api/tests/conftest.py +++ b/api/tests/conftest.py @@ -43,6 +43,7 @@ from django.conf import settings from django.contrib.contenttypes.models import ContentType from django.core.cache import caches +from django.db import connections from django.db.backends.base.creation import TEST_DATABASE_PREFIX from django.test.utils import setup_databases from django_test_migrations.migrator import Migrator @@ -1344,6 +1345,28 @@ def clear_content_type_cache() -> typing.Generator[None, None, None]: ContentType.objects.clear_cache() +@pytest.fixture +def clickhouse_db( + request: pytest.FixtureRequest, settings: SettingsWrapper +) -> typing.Generator[None, None, None]: + """ + Opt a test into a live ClickHouse database. + + Skips when no `clickhouse` alias is configured (i.e. ClickHouse isn't + running). ClickHouse has no transactional rollback, so -- unlike the + Postgres-backed `db` fixture -- we can't rely on Django wrapping the test + in a transaction. We truncate IDENTITIES on teardown instead to isolate + tests from one another. + """ + if "clickhouse" not in settings.DATABASES: # pragma: no cover + pytest.skip("No ClickHouse database configured, skipping") + request.applymarker(pytest.mark.django_db(databases=["default", "clickhouse"])) + request.getfixturevalue("db") + yield + with connections["clickhouse"].cursor() as cursor: + cursor.execute("TRUNCATE TABLE IDENTITIES") + + @pytest.fixture def use_analytics_db(request: pytest.FixtureRequest, settings: SettingsWrapper) -> None: """ diff --git a/api/tests/integration/segment_membership/__init__.py b/api/tests/integration/segment_membership/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/api/tests/integration/segment_membership/test_segment_membership_clickhouse.py b/api/tests/integration/segment_membership/test_segment_membership_clickhouse.py new file mode 100644 index 000000000000..4ae3df4f08f6 --- /dev/null +++ b/api/tests/integration/segment_membership/test_segment_membership_clickhouse.py @@ -0,0 +1,168 @@ +import uuid + +import pytest +from django.db import connections +from pytest_django.fixtures import SettingsWrapper +from pytest_mock import MockerFixture +from pytest_structlog import StructuredLogCapture + +from projects.models import Project +from segment_membership.models import SegmentMembershipCount +from segment_membership.services import ( + compute_segment_counts_for_project, + open_clickhouse_cursor, +) +from segment_membership.tasks import ( + backfill_identities_to_clickhouse, + refresh_project_segment_counts, +) +from tests.types import EnableFeaturesFixture + + +@pytest.fixture +def seeded_identities(clickhouse_db: None, environment_api_key: str) -> None: + """Seed three IDENTITIES rows for the environment: two match the `segment` + fixture's `foo EQUAL bar` condition, one does not.""" + rows = [ + (environment_api_key, "alice", "alice_key", {"foo": "bar"}), + (environment_api_key, "bob", "bob_key", {"foo": "bar"}), + (environment_api_key, "carol", "carol_key", {"foo": "baz"}), + ] + with connections["clickhouse"].cursor() as cursor: + # Django's CursorWrapper stub forbids dicts in the params sequence; + # clickhouse-driver accepts them as JSON-column payloads. + cursor.executemany( + "INSERT INTO IDENTITIES (environment_id, identifier, identity_key, traits) VALUES", + rows, # type: ignore[arg-type] + ) + + +@pytest.mark.clickhouse +def test_compute_segment_counts_for_project__matching_identities__counts_real_rows( + seeded_identities: None, + project: int, + environment: int, + segment: int, +) -> None: + # Given the `segment` fixture (matches `foo=bar`) and the seeded identities + + # When + with open_clickhouse_cursor() as cursor: + result = compute_segment_counts_for_project( + Project.objects.get(pk=project), cursor + ) + + # Then only the two matching identities are counted, for the right env + [membership] = result + assert membership.segment_id == segment + assert membership.environment_id == environment + assert membership.count == 2 + + +@pytest.mark.clickhouse +def test_refresh_project_segment_counts__matching_identities__upserts_real_counts( + seeded_identities: None, + settings: SettingsWrapper, + project: int, + environment: int, + segment: int, + enable_features: EnableFeaturesFixture, +) -> None: + # Given the org has segment-membership inspection on and ClickHouse enabled + enable_features("segment_membership_inspection") + settings.CLICKHOUSE_ENABLED = True + + # When the refresh task runs end-to-end against real ClickHouse + refresh_project_segment_counts(project) + + # Then the (segment, environment) count row reflects the two matches + membership = SegmentMembershipCount.objects.get( + segment_id=segment, environment_id=environment + ) + assert membership.count == 2 + assert membership.last_synced_at is not None + + +@pytest.mark.clickhouse +def test_backfill_identities_to_clickhouse__happy_path__rows_land_in_clickhouse( + clickhouse_db: None, + settings: SettingsWrapper, + mocker: MockerFixture, + project: int, + environment: int, + environment_api_key: str, + segment: int, + enable_features: EnableFeaturesFixture, + log: StructuredLogCapture, +) -> None: + # Given segment-membership inspection is on, ClickHouse is enabled, and + # Dynamo yields two identities for the environment + enable_features("segment_membership_inspection") + settings.CLICKHOUSE_ENABLED = True + refresh_dispatch = mocker.patch( + "segment_membership.tasks.refresh_project_segment_counts" + ) + wrapper = mocker.MagicMock(is_enabled=True) + wrapper.iter_all_items_paginated.return_value = iter( + [ + { + "identity_uuid": "f47ac10b-58cc-4372-a567-0e02b2c3d479", + "identifier": "a", + "composite_key": "k1", + "environment_api_key": environment_api_key, + "created_date": "2026-05-08T00:00:00Z", + "identity_traits": [{"trait_key": "foo", "trait_value": "bar"}], + }, + { + "identity_uuid": "550e8400-e29b-41d4-a716-446655440000", + "identifier": "b", + "composite_key": "k2", + "environment_api_key": environment_api_key, + "created_date": "2026-05-08T00:00:00Z", + "identity_traits": [], + }, + ] + ) + mocker.patch("segment_membership.tasks.DynamoIdentityWrapper", return_value=wrapper) + + # When the backfill task runs end-to-end against real ClickHouse + backfill_identities_to_clickhouse() + + # Then both identities actually land in IDENTITIES, keyed by env api key + with open_clickhouse_cursor() as cursor: + cursor.execute( + "SELECT identifier, identity_key FROM IDENTITIES FINAL " + "WHERE environment_id = %(env)s ORDER BY identifier", + {"env": environment_api_key}, + ) + rows = cursor.fetchall() + assert [(row[0], row[1]) for row in rows] == [("a", "k1"), ("b", "k2")] + # and the project's count refresh is dispatched + refresh_dispatch.delay.assert_called_once_with(args=(project,)) + assert any( + e["event"] == "backfill.environment.completed" and e["rows__count"] == 2 + for e in log.events + ) + + +@pytest.mark.clickhouse +def test_open_clickhouse_cursor__with_log_comment__lands_in_query_log( + clickhouse_db: None, +) -> None: + # Given a unique log_comment + log_comment = f"flagsmith:test:{uuid.uuid4()}" + + # When a query runs on a cursor opened with that log_comment + with open_clickhouse_cursor(log_comment=log_comment) as cursor: + cursor.execute("SELECT 1") + + # Then the query is attributable in CH's query_log by that comment. The + # query_log flushes asynchronously, so flush before reading. + with open_clickhouse_cursor() as cursor: + cursor.execute("SYSTEM FLUSH LOGS") + cursor.execute( + "SELECT count() FROM system.query_log WHERE log_comment = %(lc)s", + {"lc": log_comment}, + ) + [(count,)] = cursor.fetchall() + assert count >= 1 diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_services.py b/api/tests/unit/segment_membership/test_unit_segment_membership_services.py index 9138b8f18914..72bc45050fcc 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_services.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_services.py @@ -38,6 +38,10 @@ def test_is_membership_enabled__flag_on__returns_true( assert is_membership_enabled(organisation) is True +# NOTE: that a `log_comment` actually lands on the session and is attributable +# in `system.query_log` is verified end-to-end against a live ClickHouse in +# tests/integration/segment_membership/test_segment_membership_clickhouse.py. +# This case covers the no-comment branch, where no session setting is applied. def test_open_clickhouse_cursor__no_log_comment__yields_cursor( mocker: MockerFixture, ) -> None: @@ -56,29 +60,6 @@ def test_open_clickhouse_cursor__no_log_comment__yields_cursor( cursor.cursor.set_settings.assert_not_called() -def test_open_clickhouse_cursor__with_log_comment__sets_session_attribution( - mocker: MockerFixture, -) -> None: - # Given - cursor = MagicMock() - connections = mocker.patch("segment_membership.services.connections") - connections.__getitem__.return_value.cursor.return_value.__enter__.return_value = ( - cursor - ) - - # When - with open_clickhouse_cursor( - log_comment="flagsmith:segment_membership:refresh:org_1:project_2" - ): - pass - - # Then the comment lands as a clickhouse-driver session setting so every - # query the cursor issues is attributable in CH's query_log. - cursor.cursor.set_settings.assert_called_once_with( - {"log_comment": "flagsmith:segment_membership:refresh:org_1:project_2"} - ) - - def test_get_projects_to_process__no_canonical_segments__yields_nothing( project: Project, ) -> None: @@ -151,37 +132,6 @@ def test_compute_segment_counts_for_project__no_environments__returns_empty( cursor.execute.assert_not_called() -def test_compute_segment_counts_for_project__one_segment__returns_membership_instances( - project: Project, - environment: Environment, - segment: Segment, - segment_rule: SegmentRule, - mocker: MockerFixture, -) -> None: - # Given - mocker.patch( - "segment_membership.services.translate_segment", - return_value="TRUE", - ) - cursor = MagicMock() - cursor.fetchall.return_value = [(segment.id, environment.api_key, 7)] - - # When - result = compute_segment_counts_for_project(project, cursor) - - # Then - [membership] = result - assert membership.segment_id == segment.id - assert membership.environment_id == environment.id - assert membership.count == 7 - assert membership.last_synced_at is None - sql = cursor.execute.call_args.args[0] - assert f"SELECT {segment.id} AS segment_id" in sql - # FINAL forces ReplacingMergeTree dedup at read time. - assert "FROM IDENTITIES AS i FINAL" in sql - assert "GROUP BY i.environment_id" in sql - - def test_compute_segment_counts_for_project__unknown_env_key_in_row__skips( project: Project, environment: Environment, diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py index 3eff7ba86c38..d352babb6479 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py @@ -54,68 +54,10 @@ def test_backfill_identities_to_clickhouse__dynamo_disabled__skips( spy.assert_not_called() -def test_backfill_identities_to_clickhouse__happy_path__bulk_inserts( - mocker: MockerFixture, - settings: SettingsWrapper, - project: Project, - environment: Environment, - segment: Segment, - enable_features: EnableFeaturesFixture, - log: StructuredLogCapture, -) -> None: - # Given - enable_features("segment_membership_inspection") - settings.CLICKHOUSE_ENABLED = True - cursor = MagicMock() - open_cursor = mocker.patch.object(tasks, "open_clickhouse_cursor") - open_cursor.return_value.__enter__.return_value = cursor - refresh_dispatch = mocker.patch.object(tasks, "refresh_project_segment_counts") - wrapper = MagicMock(is_enabled=True) - wrapper.iter_all_items_paginated.return_value = iter( - [ - { - "identity_uuid": "f47ac10b-58cc-4372-a567-0e02b2c3d479", - "identifier": "a", - "composite_key": "k1", - "environment_api_key": environment.api_key, - "created_date": "2026-05-08T00:00:00Z", - "identity_traits": [], - }, - { - "identity_uuid": "550e8400-e29b-41d4-a716-446655440000", - "identifier": "b", - "composite_key": "k2", - "environment_api_key": environment.api_key, - "created_date": "2026-05-08T00:00:00Z", - "identity_traits": [], - }, - ] - ) - mocker.patch.object(tasks, "DynamoIdentityWrapper", return_value=wrapper) - - # When - backfill_identities_to_clickhouse() - - # Then - open_cursor.assert_called_with( - log_comment=( - f"flagsmith:segment_membership:backfill" - f":org_{project.organisation_id}" - f":project_{project.id}" - ) - ) - sql, rows_arg = cursor.executemany.call_args.args - assert sql == ( - "INSERT INTO IDENTITIES " - "(environment_id, identifier, identity_key, traits) VALUES" - ) - assert {row[0] for row in rows_arg} == {environment.api_key} - assert {row[1] for row in rows_arg} == {"a", "b"} - assert any( - e["event"] == "backfill.environment.completed" and e["rows__count"] == 2 - for e in log.events - ) - refresh_dispatch.delay.assert_called_once_with(args=(project.id,)) +# NOTE: the happy path (rows actually landing in IDENTITIES, refresh dispatched) +# is covered end-to-end against a live ClickHouse in +# tests/integration/segment_membership/test_segment_membership_clickhouse.py. +# The cases below cover the skip/error branches with the cursor mocked. def test_backfill_identities_to_clickhouse__insert_fails__logs_and_continues( @@ -254,48 +196,11 @@ def test_refresh_project_segment_counts__compute_fails__logs( assert any(e["event"] == "refresh.project.failed" for e in log.events) -def test_refresh_project_segment_counts__counts_returned__upserts_per_env_rows( - mocker: MockerFixture, - settings: SettingsWrapper, - project: Project, - environment: Environment, - segment: Segment, - enable_features: EnableFeaturesFixture, -) -> None: - # Given - enable_features("segment_membership_inspection") - settings.CLICKHOUSE_ENABLED = True - cursor = MagicMock() - open_cursor = mocker.patch.object(tasks, "open_clickhouse_cursor") - open_cursor.return_value.__enter__.return_value = cursor - mocker.patch.object( - tasks, - "compute_segment_counts_for_project", - return_value=[ - SegmentMembershipCount( - segment_id=segment.id, - environment_id=environment.id, - count=42, - ), - ], - ) - - # When - refresh_project_segment_counts(project.id) - - # Then - membership = SegmentMembershipCount.objects.get( - segment=segment, environment=environment - ) - assert membership.count == 42 - assert membership.last_synced_at is not None - open_cursor.assert_called_once_with( - log_comment=( - f"flagsmith:segment_membership:refresh" - f":org_{project.organisation_id}" - f":project_{project.id}" - ) - ) +# NOTE: the happy path (real predicate over real IDENTITIES rows upserting a +# real count) is covered end-to-end against a live ClickHouse in +# tests/integration/segment_membership/test_segment_membership_clickhouse.py. +# The cases below mock `compute_segment_counts_for_project` to cover the +# upsert/delete bookkeeping in isolation. def test_refresh_project_segment_counts__previously_matching_pair_drops_to_zero__row_deleted( diff --git a/docker/api/docker-compose.local.yml b/docker/api/docker-compose.local.yml index 050716c1a3b4..a57477708e18 100644 --- a/docker/api/docker-compose.local.yml +++ b/docker/api/docker-compose.local.yml @@ -5,6 +5,7 @@ name: flagsmith volumes: pg_11_data: pg_11_data_analytics: + clickhouse_data: services: db: @@ -32,3 +33,30 @@ services: environment: POSTGRES_DB: analytics POSTGRES_PASSWORD: password + + clickhouse: + # Match the version Flagsmith runs on ClickHouse Cloud. + image: clickhouse/clickhouse-server:25.12 + pull_policy: always + restart: unless-stopped + # ClickHouse opens a file descriptor per column/part, so it needs a far + # higher open-files limit than the OS default (~1024); 262144 is the value + # ClickHouse's docs and official image recommend. + ulimits: + nofile: + soft: 262144 + hard: 262144 + volumes: + - clickhouse_data:/var/lib/clickhouse + ports: + - 8123:8123 + - 9000:9000 + environment: + CLICKHOUSE_USER: flagsmith + CLICKHOUSE_PASSWORD: password + CLICKHOUSE_DEFAULT_ACCESS_MANAGEMENT: 1 + healthcheck: + test: ["CMD", "wget", "--spider", "-q", "http://localhost:8123/ping"] + interval: 5s + timeout: 3s + retries: 10 diff --git a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md index 645eb0decb76..a0cab3529233 100644 --- a/docs/docs/deployment-self-hosting/observability/_events-catalogue.md +++ b/docs/docs/deployment-self-hosting/observability/_events-catalogue.md @@ -387,7 +387,7 @@ Attributes: ### `segment_membership.compute.segment.skipped` Logged at `error` from: - - `api/segment_membership/services.py:120` + - `api/segment_membership/services.py:119` Attributes: - `project.id` From 7f1342e051a9d1517506a79daef3e4d593822fec Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Mon, 22 Jun 2026 15:08:08 +0100 Subject: [PATCH 2/7] fix(api): Parallel test runs crash when ClickHouse is configured The upstream clickhouse_backend leaves _clone_test_db unimplemented, so setup_databases(parallel=...) raises NotImplementedError and aborts the whole session under `pytest -n auto`. Add a thin backend subclass that clones the primary test database per xdist worker, and point the engine at it. beep boop --- api/app/settings/common.py | 2 +- api/core/db_backends/__init__.py | 0 api/core/db_backends/clickhouse/__init__.py | 0 api/core/db_backends/clickhouse/base.py | 9 +++++ api/core/db_backends/clickhouse/creation.py | 43 +++++++++++++++++++++ 5 files changed, 53 insertions(+), 1 deletion(-) create mode 100644 api/core/db_backends/__init__.py create mode 100644 api/core/db_backends/clickhouse/__init__.py create mode 100644 api/core/db_backends/clickhouse/base.py create mode 100644 api/core/db_backends/clickhouse/creation.py diff --git a/api/app/settings/common.py b/api/app/settings/common.py index 55c65102bd75..ec998177e4ff 100644 --- a/api/app/settings/common.py +++ b/api/app/settings/common.py @@ -1483,7 +1483,7 @@ if CLICKHOUSE_ENABLED: _clickhouse_db: dict[str, Any] = { - "ENGINE": "clickhouse_backend.backend", + "ENGINE": "core.db_backends.clickhouse", "HOST": CLICKHOUSE_HOST, "PORT": CLICKHOUSE_PORT, "USER": CLICKHOUSE_USER, diff --git a/api/core/db_backends/__init__.py b/api/core/db_backends/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/api/core/db_backends/clickhouse/__init__.py b/api/core/db_backends/clickhouse/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/api/core/db_backends/clickhouse/base.py b/api/core/db_backends/clickhouse/base.py new file mode 100644 index 000000000000..8da618d1c268 --- /dev/null +++ b/api/core/db_backends/clickhouse/base.py @@ -0,0 +1,9 @@ +from clickhouse_backend.backend.base import ( + DatabaseWrapper as ClickHouseDatabaseWrapper, +) + +from core.db_backends.clickhouse.creation import DatabaseCreation + + +class DatabaseWrapper(ClickHouseDatabaseWrapper): # type: ignore[misc] + creation_class = DatabaseCreation diff --git a/api/core/db_backends/clickhouse/creation.py b/api/core/db_backends/clickhouse/creation.py new file mode 100644 index 000000000000..a1ffefc2aaa8 --- /dev/null +++ b/api/core/db_backends/clickhouse/creation.py @@ -0,0 +1,43 @@ +from clickhouse_backend.backend.creation import ( + DatabaseCreation as ClickHouseDatabaseCreation, +) + + +class DatabaseCreation(ClickHouseDatabaseCreation): # type: ignore[misc] + """ClickHouse test-database creation with parallel-clone support. + + ClickHouse has no transactional rollback, so each xdist worker needs its own physical + database for isolation, mirroring how the Postgres backend clones the + primary test database per worker. + + TODO Remove this subclass once https://github.com/jayvynl/django-clickhouse-backend/issues/167 + ships. + """ + + def _clone_test_db( + self, + suffix: str, + verbosity: int, + keepdb: bool = False, + ) -> None: + source_database_name: str = self.connection.settings_dict["NAME"] + target_database_name: str = self.get_test_db_clone_settings(suffix)["NAME"] + quote_name = self.connection.ops.quote_name + + with self._nodb_cursor() as cursor: + cursor.execute( + f"DROP DATABASE IF EXISTS {quote_name(target_database_name)} SYNC" + ) + cursor.execute(f"CREATE DATABASE {quote_name(target_database_name)}") + # Recreate every source table as an empty copy; `CREATE TABLE ... AS` + # copies the engine and schema without any rows. + cursor.execute( + "SELECT name FROM system.tables WHERE database = %s", + [source_database_name], + ) + for (table_name,) in cursor.fetchall(): + cursor.execute( + f"CREATE TABLE " + f"{quote_name(target_database_name)}.{quote_name(table_name)} " + f"AS {quote_name(source_database_name)}.{quote_name(table_name)}" + ) From 2de66db8214f6bb1d69f8cd6f1c32696754044b0 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Mon, 22 Jun 2026 16:42:52 +0100 Subject: [PATCH 3/7] slop cleanup --- .../test_unit_segment_membership_services.py | 46 +++++++++---------- .../test_unit_segment_membership_tasks.py | 13 ------ 2 files changed, 23 insertions(+), 36 deletions(-) diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_services.py b/api/tests/unit/segment_membership/test_unit_segment_membership_services.py index 72bc45050fcc..340518efd644 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_services.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_services.py @@ -11,8 +11,8 @@ compute_segment_counts_for_project, enqueue_membership_refresh, get_projects_to_process, + get_segment_members, is_membership_enabled, - open_clickhouse_cursor, ) from segment_membership.tasks import refresh_project_segment_counts from segments.models import Segment, SegmentRule @@ -38,28 +38,6 @@ def test_is_membership_enabled__flag_on__returns_true( assert is_membership_enabled(organisation) is True -# NOTE: that a `log_comment` actually lands on the session and is attributable -# in `system.query_log` is verified end-to-end against a live ClickHouse in -# tests/integration/segment_membership/test_segment_membership_clickhouse.py. -# This case covers the no-comment branch, where no session setting is applied. -def test_open_clickhouse_cursor__no_log_comment__yields_cursor( - mocker: MockerFixture, -) -> None: - # Given - cursor = MagicMock() - connections = mocker.patch("segment_membership.services.connections") - connections.__getitem__.return_value.cursor.return_value.__enter__.return_value = ( - cursor - ) - - # When - with open_clickhouse_cursor() as opened: - assert opened is cursor - - # Then - cursor.cursor.set_settings.assert_not_called() - - def test_get_projects_to_process__no_canonical_segments__yields_nothing( project: Project, ) -> None: @@ -176,6 +154,28 @@ def test_compute_segment_counts_for_project__untranslatable_segment__skips( cursor.execute.assert_not_called() +def test_get_segment_members__untranslatable_segment__returns_empty_without_querying( + project: Project, + environment: Environment, + segment: Segment, + segment_rule: SegmentRule, + mocker: MockerFixture, +) -> None: + # Given a segment whose rules can't be translated to a SQL predicate + mocker.patch( + "segment_membership.services.translate_segment", + return_value=None, + ) + open_cursor = mocker.patch("segment_membership.services.open_clickhouse_cursor") + + # When + result = get_segment_members(segment, environment, cursor=None, limit=100) + + # Then no ClickHouse query is issued and no members are returned + assert result == [] + open_cursor.assert_not_called() + + def test_enqueue_membership_refresh__flag_on__enqueues_refresh( run_tasks: RunTasksFixture, mocker: MockerFixture, diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py index d352babb6479..a4cb467ce4cf 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_tasks.py @@ -54,12 +54,6 @@ def test_backfill_identities_to_clickhouse__dynamo_disabled__skips( spy.assert_not_called() -# NOTE: the happy path (rows actually landing in IDENTITIES, refresh dispatched) -# is covered end-to-end against a live ClickHouse in -# tests/integration/segment_membership/test_segment_membership_clickhouse.py. -# The cases below cover the skip/error branches with the cursor mocked. - - def test_backfill_identities_to_clickhouse__insert_fails__logs_and_continues( mocker: MockerFixture, settings: SettingsWrapper, @@ -196,13 +190,6 @@ def test_refresh_project_segment_counts__compute_fails__logs( assert any(e["event"] == "refresh.project.failed" for e in log.events) -# NOTE: the happy path (real predicate over real IDENTITIES rows upserting a -# real count) is covered end-to-end against a live ClickHouse in -# tests/integration/segment_membership/test_segment_membership_clickhouse.py. -# The cases below mock `compute_segment_counts_for_project` to cover the -# upsert/delete bookkeeping in isolation. - - def test_refresh_project_segment_counts__previously_matching_pair_drops_to_zero__row_deleted( mocker: MockerFixture, settings: SettingsWrapper, From a1a7863748e8cc67da95dec674e39c24df6330c0 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Mon, 22 Jun 2026 18:35:49 +0100 Subject: [PATCH 4/7] exclude permissions_permissionmodel from sql_flush --- api/app/settings/test.py | 11 ++++++- api/core/db_backends/postgresql/__init__.py | 0 api/core/db_backends/postgresql/base.py | 9 ++++++ api/core/db_backends/postgresql/operations.py | 29 +++++++++++++++++++ 4 files changed, 48 insertions(+), 1 deletion(-) create mode 100644 api/core/db_backends/postgresql/__init__.py create mode 100644 api/core/db_backends/postgresql/base.py create mode 100644 api/core/db_backends/postgresql/operations.py diff --git a/api/app/settings/test.py b/api/app/settings/test.py index 55df384a9889..c659649528a0 100644 --- a/api/app/settings/test.py +++ b/api/app/settings/test.py @@ -1,5 +1,14 @@ from app.settings.common import * # noqa -from app.settings.common import INSTALLED_APPS, LDAP_INSTALLED, REST_FRAMEWORK +from app.settings.common import ( + DATABASES, + INSTALLED_APPS, + LDAP_INSTALLED, + REST_FRAMEWORK, +) + +# TODO: remove once permissions are an enum -- +# https://github.com/Flagsmith/flagsmith/issues/7850 +DATABASES["default"]["ENGINE"] = "core.db_backends.postgresql" if LDAP_INSTALLED: INSTALLED_APPS = INSTALLED_APPS + ["flagsmith_ldap"] diff --git a/api/core/db_backends/postgresql/__init__.py b/api/core/db_backends/postgresql/__init__.py new file mode 100644 index 000000000000..e69de29bb2d1 diff --git a/api/core/db_backends/postgresql/base.py b/api/core/db_backends/postgresql/base.py new file mode 100644 index 000000000000..70534c2de5bb --- /dev/null +++ b/api/core/db_backends/postgresql/base.py @@ -0,0 +1,9 @@ +from django.db.backends.postgresql.base import ( + DatabaseWrapper as PostgresDatabaseWrapper, +) + +from core.db_backends.postgresql.operations import DatabaseOperations + + +class DatabaseWrapper(PostgresDatabaseWrapper): + ops_class = DatabaseOperations diff --git a/api/core/db_backends/postgresql/operations.py b/api/core/db_backends/postgresql/operations.py new file mode 100644 index 000000000000..7a4efdbd6dad --- /dev/null +++ b/api/core/db_backends/postgresql/operations.py @@ -0,0 +1,29 @@ +from collections.abc import Sequence +from typing import Any + +from django.db.backends.postgresql.operations import ( + DatabaseOperations as PostgresDatabaseOperations, +) + +# Tables holding migration-seeded reference data that must survive `flush`. +# +# TODO: remove this backend once https://github.com/Flagsmith/flagsmith/issues/7850 is closed +PRESERVED_TABLES = frozenset({"permissions_permissionmodel"}) + + +class DatabaseOperations(PostgresDatabaseOperations): + def sql_flush( + self, + style: Any, + tables: Sequence[str], + *, + reset_sequences: bool = False, + allow_cascade: bool = False, + ) -> list[str]: + retained = [table for table in tables if table not in PRESERVED_TABLES] + return super().sql_flush( + style, + retained, + reset_sequences=reset_sequences, + allow_cascade=allow_cascade, + ) From 40cb5a209c0fe8f4b0e6228dfaeac4907f46a074 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Mon, 22 Jun 2026 18:41:19 +0100 Subject: [PATCH 5/7] truncate all tables --- api/tests/conftest.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/api/tests/conftest.py b/api/tests/conftest.py index 2bac903e4e8a..d35862a32d29 100644 --- a/api/tests/conftest.py +++ b/api/tests/conftest.py @@ -1355,7 +1355,7 @@ def clickhouse_db( Skips when no `clickhouse` alias is configured (i.e. ClickHouse isn't running). ClickHouse has no transactional rollback, so -- unlike the Postgres-backed `db` fixture -- we can't rely on Django wrapping the test - in a transaction. We truncate IDENTITIES on teardown instead to isolate + in a transaction. We truncate every table on teardown instead to isolate tests from one another. """ if "clickhouse" not in settings.DATABASES: # pragma: no cover @@ -1363,8 +1363,10 @@ def clickhouse_db( request.applymarker(pytest.mark.django_db(databases=["default", "clickhouse"])) request.getfixturevalue("db") yield - with connections["clickhouse"].cursor() as cursor: - cursor.execute("TRUNCATE TABLE IDENTITIES") + connection = connections["clickhouse"] + with connection.cursor() as cursor: + for table_name in connection.introspection.table_names(cursor): + cursor.execute(f"TRUNCATE TABLE {connection.ops.quote_name(table_name)}") @pytest.fixture From 6dcf528939cb4fa389f16d9de5a6f41b7cc68eb0 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Mon, 22 Jun 2026 19:10:41 +0100 Subject: [PATCH 6/7] improve docs Co-authored-by: Evandro Myller <22429+emyller@users.noreply.github.com> --- api/tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/api/tests/conftest.py b/api/tests/conftest.py index d35862a32d29..561e01ceb9a1 100644 --- a/api/tests/conftest.py +++ b/api/tests/conftest.py @@ -1361,7 +1361,7 @@ def clickhouse_db( if "clickhouse" not in settings.DATABASES: # pragma: no cover pytest.skip("No ClickHouse database configured, skipping") request.applymarker(pytest.mark.django_db(databases=["default", "clickhouse"])) - request.getfixturevalue("db") + request.getfixturevalue("db") # Resolve `db` only after injecting the clickhouse db yield connection = connections["clickhouse"] with connection.cursor() as cursor: From 6eecd36a4dff76a708886378c68656f8f8de3e70 Mon Sep 17 00:00:00 2001 From: Kim Gustyr Date: Mon, 22 Jun 2026 19:17:41 +0100 Subject: [PATCH 7/7] test(api): Drop unrelated get_segment_members unit test This read-endpoint test leaked into the ClickHouse test-stack branch and references segment_membership.services.get_segment_members, which doesn't exist here. Remove it so the PR only contains ClickHouse changes. beep boop --- .../test_unit_segment_membership_services.py | 23 ------------------- 1 file changed, 23 deletions(-) diff --git a/api/tests/unit/segment_membership/test_unit_segment_membership_services.py b/api/tests/unit/segment_membership/test_unit_segment_membership_services.py index 340518efd644..2bd9158a04f9 100644 --- a/api/tests/unit/segment_membership/test_unit_segment_membership_services.py +++ b/api/tests/unit/segment_membership/test_unit_segment_membership_services.py @@ -11,7 +11,6 @@ compute_segment_counts_for_project, enqueue_membership_refresh, get_projects_to_process, - get_segment_members, is_membership_enabled, ) from segment_membership.tasks import refresh_project_segment_counts @@ -154,28 +153,6 @@ def test_compute_segment_counts_for_project__untranslatable_segment__skips( cursor.execute.assert_not_called() -def test_get_segment_members__untranslatable_segment__returns_empty_without_querying( - project: Project, - environment: Environment, - segment: Segment, - segment_rule: SegmentRule, - mocker: MockerFixture, -) -> None: - # Given a segment whose rules can't be translated to a SQL predicate - mocker.patch( - "segment_membership.services.translate_segment", - return_value=None, - ) - open_cursor = mocker.patch("segment_membership.services.open_clickhouse_cursor") - - # When - result = get_segment_members(segment, environment, cursor=None, limit=100) - - # Then no ClickHouse query is issued and no members are returned - assert result == [] - open_cursor.assert_not_called() - - def test_enqueue_membership_refresh__flag_on__enqueues_refresh( run_tasks: RunTasksFixture, mocker: MockerFixture,