diff --git a/.devcontainer/setup_env.sh b/.devcontainer/setup_env.sh index 1f7d7a2da..20fe798f5 100644 --- a/.devcontainer/setup_env.sh +++ b/.devcontainer/setup_env.sh @@ -4,6 +4,10 @@ set -euo pipefail if [ ! -f test.env ]; then cp test.env.sample test.env fi + +sudo apt-get update +sudo apt-get install -y libltdl7 libkrb5-3 libgssapi-krb5-2 + export UV_VENV_CLEAR=1 uv venv .venv uv pip install --python .venv/bin/python -r dev_requirements.txt diff --git a/.github/workflows/patched-integration-tests-sqlserver.yml b/.github/workflows/patched-integration-tests-sqlserver.yml index e08f7a195..5f802f93d 100644 --- a/.github/workflows/patched-integration-tests-sqlserver.yml +++ b/.github/workflows/patched-integration-tests-sqlserver.yml @@ -27,7 +27,7 @@ jobs: sqlserver: image: ghcr.io/${{ github.repository }}:server-${{ matrix.sqlserver_version }} env: - ACCEPT_EULA: 'Y' + ACCEPT_EULA: "Y" SA_PASSWORD: 5atyaNadella DBT_TEST_USER_1: DBT_TEST_USER_1 DBT_TEST_USER_2: DBT_TEST_USER_2 @@ -45,4 +45,40 @@ jobs: DBT_TEST_USER_1: DBT_TEST_USER_1 DBT_TEST_USER_2: DBT_TEST_USER_2 DBT_TEST_USER_3: DBT_TEST_USER_3 - SQLSERVER_TEST_DRIVER: 'ODBC Driver ${{ matrix.msodbc_version }} for SQL Server' + SQLSERVER_TEST_DRIVER: "ODBC Driver ${{ matrix.msodbc_version }} for SQL Server" + + integration-tests-sql-server-mssql-python: + name: mssql-python + runs-on: ubuntu-latest + permissions: + contents: read + packages: read + container: + image: ghcr.io/${{ github.repository }}:CI-3.13-msodbc18 + credentials: + username: ${{ github.actor }} + password: ${{ secrets.GITHUB_TOKEN }} + services: + sqlserver: + image: ghcr.io/${{ github.repository }}:server-2022 + env: + ACCEPT_EULA: "Y" + SA_PASSWORD: 5atyaNadella + DBT_TEST_USER_1: DBT_TEST_USER_1 + DBT_TEST_USER_2: DBT_TEST_USER_2 + DBT_TEST_USER_3: DBT_TEST_USER_3 + COLLATION: SQL_Latin1_General_CP1_CS_AS + steps: + - uses: actions/checkout@v4 + + - name: Install dependencies + run: pip install -r dev_requirements.txt + + - name: Run functional tests with mssql-python + run: pytest -ra -v tests/functional --profile "ci_sql_server" + env: + DBT_TEST_USER_1: DBT_TEST_USER_1 + DBT_TEST_USER_2: DBT_TEST_USER_2 + DBT_TEST_USER_3: DBT_TEST_USER_3 + SQLSERVER_TEST_DRIVER: "ODBC Driver 18 for SQL Server" + SQLSERVER_TEST_USE_MSSQL_PYTHON: "True" diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 006092c7d..0cf24bccc 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,12 +1,12 @@ default_language_version: python: python3 repos: - - repo: 'https://github.com/pre-commit/pre-commit-hooks' + - repo: "https://github.com/pre-commit/pre-commit-hooks" rev: v4.6.0 hooks: - id: check-yaml args: - - '--unsafe' + - "--unsafe" - id: check-json - id: end-of-file-fixer - id: trailing-whitespace @@ -20,79 +20,79 @@ repos: - id: fix-byte-order-marker - id: mixed-line-ending - id: check-docstring-first - - repo: 'https://github.com/adrienverge/yamllint' + - repo: "https://github.com/adrienverge/yamllint" rev: v1.35.1 hooks: - id: yamllint args: - - '-d {extends: default, rules: {line-length: disable, document-start: disable}}' - - '-s' - - repo: 'https://github.com/MarcoGorelli/absolufy-imports' + - "-d {extends: default, rules: {line-length: disable, document-start: disable}}" + - "-s" + - repo: "https://github.com/MarcoGorelli/absolufy-imports" rev: v0.3.1 hooks: - id: absolufy-imports - - repo: 'https://github.com/hadialqattan/pycln' + - repo: "https://github.com/hadialqattan/pycln" rev: v2.5.0 hooks: - id: pycln args: - - '--all' - - repo: 'https://github.com/pycqa/isort' + - "--all" + - repo: "https://github.com/pycqa/isort" rev: 5.13.2 hooks: - id: isort args: - - '--profile' + - "--profile" - black - - '--atomic' - - '--line-length' - - '99' - - '--python-version' - - '39' - - repo: 'https://github.com/psf/black' + - "--atomic" + - "--line-length" + - "99" + - "--python-version" + - "39" + - repo: "https://github.com/psf/black" rev: 24.8.0 hooks: - id: black args: - - '--line-length=99' - - '--target-version=py39' + - "--line-length=99" + - "--target-version=py39" - id: black alias: black-check stages: - manual args: - - '--line-length=99' - - '--target-version=py310' - - '--check' - - '--diff' - - repo: 'https://github.com/pycqa/flake8' + - "--line-length=99" + - "--target-version=py310" + - "--check" + - "--diff" + - repo: "https://github.com/pycqa/flake8" rev: 7.1.1 hooks: - id: flake8 args: - - '--max-line-length=99' + - "--max-line-length=99" - id: flake8 args: - - '--max-line-length=99' + - "--max-line-length=99" alias: flake8-check stages: - manual - - repo: 'https://github.com/pre-commit/mirrors-mypy' + - repo: "https://github.com/pre-commit/mirrors-mypy" rev: v1.11.1 hooks: - id: mypy args: - - '--show-error-codes' - - '--ignore-missing-imports' - - '--explicit-package-bases' - files: '^dbt/adapters' + - "--show-error-codes" + - "--ignore-missing-imports" + - "--explicit-package-bases" + files: "^dbt/adapters" - id: mypy alias: mypy-check stages: - manual args: - - '--show-error-codes' - - '--pretty' - - '--ignore-missing-imports' - - '--explicit-package-bases' - files: '^dbt/adapters' + - "--show-error-codes" + - "--pretty" + - "--ignore-missing-imports" + - "--explicit-package-bases" + files: "^dbt/adapters" diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 50b958977..751b1d130 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -32,15 +32,35 @@ The functional tests require a running SQL Server instance. You can easily spin make server ``` +The default development flow uses the existing ODBC-based path. If you want to develop or test the optional `mssql-python` backend instead, make sure the package is installed in your environment before running tests. + +```shell +pip install mssql-python +``` + +On Debian/Ubuntu-based environments, `mssql-python` may also require these system libraries: + +```shell +sudo apt-get install -y libltdl7 libkrb5-3 libgssapi-krb5-2 +``` + This will use Docker Compose to spin up a local instance of SQL Server. Docker Compose is now bundled with Docker, so make sure to [install the latest version of Docker](https://docs.docker.com/get-docker/). Next, tell our tests how they should connect to the local instance by creating a file called `test.env` in the root of the project. You can use the provided `test.env.sample` as a base and if you started the server with `make server`, then this matches the instance running on your local machine. +If you are testing the optional `mssql-python` backend, also enable its profile flag in `test.env` so the adapter selects that implementation instead of the legacy driver-based one. + ```shell cp test.env.sample test.env ``` +When using the optional `mssql-python` backend, update `test.env` with: + +```shell +SQLSERVER_TEST_USE_MSSQL_PYTHON=True +``` + You can tweak the contents of this file to test against a different database. Note that we need 3 users to be able to run tests related to the grants. @@ -57,6 +77,8 @@ make unit make functional ``` +This remains the documented test procedure for both connection backends. When the `mssql-python` flag is enabled, run the same commands after installing `mssql-python` and setting `SQLSERVER_TEST_USE_MSSQL_PYTHON=True` in `test.env`. + ## CI/CD We use Docker images that have all the things we need to test the adapter in the CI/CD workflows. diff --git a/README.md b/README.md index 80641670e..be88afdcd 100644 --- a/README.md +++ b/README.md @@ -16,10 +16,12 @@ Join us on the [dbt Slack](https://getdbt.slack.com/archives/CMRMDDQ9W) to ask q ## Installation +By default this adapter uses the Microsoft ODBC driver. + This adapter requires the Microsoft ODBC driver to be installed: [Windows](https://docs.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver16#download-for-windows) | [macOS](https://docs.microsoft.com/en-us/sql/connect/odbc/linux-mac/install-microsoft-odbc-driver-sql-server-macos?view=sql-server-ver16) | -[Linux](https://docs.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-for-sql-server?view=sql-server-ver16) +[Linux](https://docs.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-sql-server?view=sql-server-ver16)
Debian/Ubuntu

@@ -45,6 +47,18 @@ Latest pre-release: ![GitHub tag (latest SemVer pre-release)](https://img.shield pip install -U --pre dbt-sqlserver ``` +### Optional: `mssql-python` backend + +This adapter can also use the `mssql-python` driver behind a feature flag. + +Install it explicitly when you want to use that backend: + +```shell +pip install -U mssql-python +``` + +When this backend is enabled, the adapter does not require the ODBC driver-based connection path for that profile. + ## Changelog See [the changelog](CHANGELOG.md) @@ -55,6 +69,8 @@ See [the changelog](CHANGELOG.md) - `dbt_sqlserver_use_default_schema_concat`: *(default: `false`)* Controls schema name generation when a [custom schema](https://docs.getdbt.com/docs/build/custom-schemas) is set on a model. +- `use_mssql_python`: *(default: `false` in the profile)* Switches the connection backend from the legacy ODBC / `pyodbc` path to the `mssql-python` driver for that target profile. + | Flag value | `custom_schema_name` | Result | |---|---|---| | `false` (default, legacy) | *(none)* | `target.schema` | @@ -74,6 +90,55 @@ See [the changelog](CHANGELOG.md) > **Note:** If you want to permanently customise schema generation and avoid any future deprecation of this flag, override the `sqlserver__generate_schema_name` macro directly in your project. +### `mssql-python` feature flag usage + +Enable the backend per target in your `profiles.yml`: + +```yaml +your_profile: + target: dev + outputs: + dev: + type: sqlserver + host: your-server + port: 1433 + database: your-database + schema: dbo + user: your-user + password: your-password + encrypt: true + trust_cert: false + use_mssql_python: true +``` + +#### Notes + +- `use_mssql_python: true` is a profile-level feature flag. +- When enabled, the adapter uses `mssql-python` instead of the legacy `pyodbc` connection path. +- The legacy ODBC driver setting is only needed for profiles that continue to use the ODBC backend. +- If you enable `use_mssql_python`, make sure the `mssql-python` package is installed in the environment running dbt. +- This path is intended to fail fast when required dependencies or unsupported settings are missing. + +#### Testing + +For local development and validation, use the documented adapter workflow from `CONTRIBUTING.md`: + +```shell +make dev +make server +cp test.env.sample test.env +make unit +make functional +``` + +To exercise the `mssql-python` backend in tests, configure the profile or environment so that the target under test sets: + +```yaml +use_mssql_python: true +``` + +If you are testing in the devcontainer, ensure the `mssql-python` package is installed in that environment before running the unit or functional suite. + ## Contributing diff --git a/dbt/adapters/sqlserver/sqlserver_connections.py b/dbt/adapters/sqlserver/sqlserver_connections.py index ffaf9f826..1c7eb88e0 100644 --- a/dbt/adapters/sqlserver/sqlserver_connections.py +++ b/dbt/adapters/sqlserver/sqlserver_connections.py @@ -10,11 +10,20 @@ import dbt_common.exceptions import pyodbc +try: + import mssql_python as MSSQL_PYTHON +except ModuleNotFoundError as exc: + MSSQL_PYTHON = None + _MSSQL_PYTHON_IMPORT_ERROR: Optional[ModuleNotFoundError] = exc +else: + _MSSQL_PYTHON_IMPORT_ERROR = None + try: from azure.core.credentials import AccessToken except ModuleNotFoundError: + @dataclass - class AccessToken: + class AccessToken: # type: ignore[no-redef] token: str expires_on: int @@ -71,16 +80,67 @@ class AccessToken: "decimal.Decimal": "decimal", } +MSSQL_PYTHON_UNSUPPORTED_AUTHENTICATIONS = { + "cli", + "auto", + "environment", + "serviceprincipal", + "activedirectoryaccesstoken", +} + def _require_azure_identity(authentication: str) -> None: if _AZURE_IDENTITY_IMPORT_ERROR is not None: raise dbt_common.exceptions.DbtRuntimeError( "Azure authentication '{}' requires the optional dependency 'azure-identity'. " - "Install it with `pip install azure-identity` or use a non-Azure authentication mode." - .format(authentication) + "Install it with `pip install azure-identity` " + "or use a non-Azure authentication mode.".format(authentication) ) from _AZURE_IDENTITY_IMPORT_ERROR +def _require_mssql_python() -> None: + if _MSSQL_PYTHON_IMPORT_ERROR is not None: + raise dbt_common.exceptions.DbtRuntimeError( + "The `mssql-python` backend was requested, but the optional dependency " + "`mssql-python` is not installed. Install it with `pip install mssql-python` " + "or disable `use_mssql_python` in the profile." + ) from _MSSQL_PYTHON_IMPORT_ERROR + + +def _requires_pyodbc_backend(credentials: SQLServerCredentials) -> bool: + authentication = str(credentials.authentication or "sql").lower().strip() + return authentication in AZURE_AUTH_FUNCTIONS or authentication == "activedirectoryaccesstoken" + + +def _use_mssql_python_backend(credentials: SQLServerCredentials) -> bool: + return bool(getattr(credentials, "use_mssql_python", False)) + + +def _validate_pyodbc_requirements(credentials: SQLServerCredentials) -> None: + if not credentials.driver: + raise dbt_common.exceptions.DbtRuntimeError( + "The pyodbc backend requires a SQL Server ODBC driver name " + "in the `driver` profile field." + ) + + +def _validate_mssql_python_requirements(credentials: SQLServerCredentials) -> None: + authentication = str(credentials.authentication or "sql").strip() + authentication_lower = authentication.lower() + + if authentication_lower in MSSQL_PYTHON_UNSUPPORTED_AUTHENTICATIONS: + raise dbt_common.exceptions.DbtRuntimeError( + "Authentication '{}' is currently only supported by the pyodbc backend " + "in this adapter. " + "Disable `use_mssql_python` or use a connection-string-supported " + "authentication mode such as " + "`sql`, `ActiveDirectoryPassword`, `ActiveDirectoryInteractive`, " + "`ActiveDirectoryIntegrated`, " + "`ActiveDirectoryMSI`, `ActiveDirectoryDeviceCode`, " + "or `ActiveDirectoryDefault`.".format(authentication) + ) + + def convert_bytes_to_mswindows_byte_string(value: bytes) -> bytes: """ Convert bytes to a Microsoft windows byte string. @@ -313,7 +373,7 @@ def bool_to_connection_string_arg(key: str, value: bool) -> str: out : str The connection string argument. """ - return f'{key}={"Yes" if value else "No"}' + return f"{key}={'Yes' if value else 'No'}" def byte_array_to_datetime(value: bytes) -> dt.datetime: @@ -350,6 +410,128 @@ def byte_array_to_datetime(value: bytes) -> dt.datetime: ) +def _build_server_arg(credentials: SQLServerCredentials) -> str: + if "\\" in credentials.host: + # If there is a backslash \ in the host name, the host is a + # SQL Server named instance. In this case then port number has to be omitted. + return credentials.host + return f"{credentials.host},{credentials.port}" + + +def _build_common_connection_string_parts( + credentials: SQLServerCredentials, +) -> list[str]: + con_str = [f"SERVER={_build_server_arg(credentials)}"] + con_str.append(f"Database={credentials.database}") + + assert credentials.authentication is not None + + if ( + "ActiveDirectory" in credentials.authentication + and credentials.authentication != "ActiveDirectoryAccessToken" + ): + con_str.append(f"Authentication={credentials.authentication}") + + if credentials.authentication == "ActiveDirectoryPassword": + con_str.append(f"UID={{{credentials.UID}}}") + con_str.append(f"PWD={{{credentials.PWD}}}") + if credentials.authentication == "ActiveDirectoryServicePrincipal": + con_str.append(f"UID={{{credentials.client_id}}}") + con_str.append(f"PWD={{{credentials.client_secret}}}") + elif credentials.authentication == "ActiveDirectoryInteractive": + con_str.append(f"UID={{{credentials.UID}}}") + + elif credentials.windows_login: + con_str.append("trusted_connection=Yes") + elif credentials.authentication == "sql": + con_str.append(f"UID={{{credentials.UID}}}") + con_str.append(f"PWD={{{credentials.PWD}}}") + + assert credentials.encrypt is not None + assert credentials.trust_cert is not None + + con_str.append(bool_to_connection_string_arg("encrypt", credentials.encrypt)) + con_str.append(bool_to_connection_string_arg("TrustServerCertificate", credentials.trust_cert)) + + return con_str + + +def _build_pyodbc_connection_string(credentials: SQLServerCredentials) -> str: + con_str = [f"DRIVER={{{credentials.driver}}}"] + con_str.extend(_build_common_connection_string_parts(credentials)) + con_str.append("Pooling=true") + + if credentials.trace_flag: + con_str.append("SQL_ATTR_TRACE=SQL_OPT_TRACE_ON") + else: + con_str.append("SQL_ATTR_TRACE=SQL_OPT_TRACE_OFF") + + plugin_version = __version__.version + application_name = f"dbt-{credentials.type}/{plugin_version}" + con_str.append(f"APP={application_name}") + + try: + con_str.append("ConnectRetryCount=3") + con_str.append("ConnectRetryInterval=10") + except Exception as e: + logger.debug( + ( + "Retry count should be a integer value. " + "Skipping retries in the connection string." + ), + str(e), + ) + + return ";".join(con_str) + + +def _build_mssql_python_connection_string(credentials: SQLServerCredentials) -> str: + con_str = _build_common_connection_string_parts(credentials) + con_str.append("ConnectRetryCount=3") + con_str.append("ConnectRetryInterval=10") + return ";".join(con_str) + + +def _sanitize_connection_string_for_logging(connection_string: str) -> str: + parts = connection_string.split(";") + sanitized = [] + for part in parts: + if part.lower().startswith("pwd="): + sanitized.append("PWD=***") + else: + sanitized.append(part) + return ";".join(sanitized) + + +def _get_backend_exceptions( + credentials: SQLServerCredentials, +) -> Tuple[Type[Exception], ...]: + if _use_mssql_python_backend(credentials): + _require_mssql_python() + retryable_exceptions = [] + retryable_exceptions.append(getattr(MSSQL_PYTHON, "InternalError", Exception)) + retryable_exceptions.append(getattr(MSSQL_PYTHON, "OperationalError", Exception)) + + if _requires_pyodbc_backend(credentials): + retryable_exceptions.append(getattr(MSSQL_PYTHON, "InterfaceError", Exception)) + + return tuple(retryable_exceptions) + + retryable_exceptions = [ # https://github.com/mkleehammer/pyodbc/wiki/Exceptions + pyodbc.InternalError, # not used according to docs, but defined in PEP-249 + pyodbc.OperationalError, + ] + + if credentials.authentication.lower() in AZURE_AUTH_FUNCTIONS: + retryable_exceptions.append(pyodbc.InterfaceError) + + return tuple(retryable_exceptions) + + +def _is_pyodbc_handle(handle: Any) -> bool: + return hasattr(handle, "add_output_converter") + + class SQLServerConnectionManager(SQLConnectionManager): TYPE = "sqlserver" @@ -362,7 +544,6 @@ def exception_handler(self, sql): logger.debug("Database error: {}".format(str(e))) try: - # attempt to release the connection self.release() except pyodbc.Error: logger.debug("Failed to release connection!") @@ -370,13 +551,23 @@ def exception_handler(self, sql): raise dbt_common.exceptions.DbtDatabaseError(str(e).strip()) from e except Exception as e: + if _use_mssql_python_backend(self.get_thread_connection().credentials): + if MSSQL_PYTHON is not None and isinstance( + e, getattr(MSSQL_PYTHON, "DatabaseError", tuple()) + ): + logger.debug("Database error: {}".format(str(e))) + + try: + self.release() + except Exception: + logger.debug("Failed to release connection!") + + raise dbt_common.exceptions.DbtDatabaseError(str(e).strip()) from e + logger.debug(f"Error running SQL: {sql}") logger.debug("Rolling back transaction.") self.release() if isinstance(e, dbt_common.exceptions.DbtRuntimeError): - # during a sql query, an internal to dbt exception was raised. - # this sounds a lot like a signal handler and probably has - # useful information, so raise it without modification. raise raise dbt_common.exceptions.DbtRuntimeError(e) @@ -389,101 +580,38 @@ def open(cls, connection: Connection) -> Connection: credentials = cls.get_credentials(connection.credentials) - con_str = [f"DRIVER={{{credentials.driver}}}"] - - if "\\" in credentials.host: - # If there is a backslash \ in the host name, the host is a - # SQL Server named instance. In this case then port number has to be omitted. - con_str.append(f"SERVER={credentials.host}") + if _use_mssql_python_backend(credentials): + _require_mssql_python() + _validate_mssql_python_requirements(credentials) + con_str_concat = _build_mssql_python_connection_string(credentials) else: - con_str.append(f"SERVER={credentials.host},{credentials.port}") + _validate_pyodbc_requirements(credentials) + con_str_concat = _build_pyodbc_connection_string(credentials) - con_str.append(f"Database={credentials.database}") - con_str.append("Pooling=true") - - # Enabling trace flag - if credentials.trace_flag: - con_str.append("SQL_ATTR_TRACE=SQL_OPT_TRACE_ON") - else: - con_str.append("SQL_ATTR_TRACE=SQL_OPT_TRACE_OFF") - - assert credentials.authentication is not None - - # Access token authentication does not additional connection string parameters. - # The access token is passed in the pyodbc attributes. - if ( - "ActiveDirectory" in credentials.authentication - and credentials.authentication != "ActiveDirectoryAccessToken" - ): - con_str.append(f"Authentication={credentials.authentication}") - - if credentials.authentication == "ActiveDirectoryPassword": - con_str.append(f"UID={{{credentials.UID}}}") - con_str.append(f"PWD={{{credentials.PWD}}}") - if credentials.authentication == "ActiveDirectoryServicePrincipal": - con_str.append(f"UID={{{credentials.client_id}}}") - con_str.append(f"PWD={{{credentials.client_secret}}}") - elif credentials.authentication == "ActiveDirectoryInteractive": - con_str.append(f"UID={{{credentials.UID}}}") - - elif credentials.windows_login: - con_str.append("trusted_connection=Yes") - elif credentials.authentication == "sql": - con_str.append(f"UID={{{credentials.UID}}}") - con_str.append(f"PWD={{{credentials.PWD}}}") - - # https://docs.microsoft.com/en-us/sql/relational-databases/native-client/features/using-encryption-without-validation?view=sql-server-ver15 - assert credentials.encrypt is not None - assert credentials.trust_cert is not None - - con_str.append(bool_to_connection_string_arg("encrypt", credentials.encrypt)) - con_str.append( - bool_to_connection_string_arg("TrustServerCertificate", credentials.trust_cert) - ) - - plugin_version = __version__.version - application_name = f"dbt-{credentials.type}/{plugin_version}" - con_str.append(f"APP={application_name}") - - try: - con_str.append("ConnectRetryCount=3") - con_str.append("ConnectRetryInterval=10") - - except Exception as e: - logger.debug( - ( - "Retry count should be a integer value. " - "Skipping retries in the connection string." - ), - str(e), - ) - - con_str_concat = ";".join(con_str) - - index = [] - for i, elem in enumerate(con_str): - if "pwd=" in elem.lower(): - index.append(i) - - if len(index) != 0: - con_str[index[0]] = "PWD=***" - - con_str_display = ";".join(con_str) - - retryable_exceptions = [ # https://github.com/mkleehammer/pyodbc/wiki/Exceptions - pyodbc.InternalError, # not used according to docs, but defined in PEP-249 - pyodbc.OperationalError, - ] - - if credentials.authentication.lower() in AZURE_AUTH_FUNCTIONS: - # Temporary login/token errors fall into this category when using AAD - retryable_exceptions.append(pyodbc.InterfaceError) + con_str_display = _sanitize_connection_string_for_logging(con_str_concat) + retryable_exceptions = _get_backend_exceptions(credentials) def connect(): logger.debug(f"Using connection string: {con_str_display}") - pyodbc.pooling = True - # pyodbc attributes includes the access token provided by the user if required. + if _use_mssql_python_backend(credentials): + MSSQL_PYTHON.pooling(enabled=False) + handle = MSSQL_PYTHON.connect( + con_str_concat, + autocommit=True, + timeout=credentials.login_timeout, + ) + try: + handle.timeout = credentials.query_timeout + except Exception: + logger.debug( + "The mssql-python connection object does not expose a mutable `timeout` " + "attribute; continuing without setting query timeout on the handle." + ) + logger.debug(f"Connected to db: {credentials.database}") + return handle + + pyodbc.pooling = True attrs_before = get_pyodbc_attrs_before_credentials(credentials) handle = pyodbc.connect( @@ -510,11 +638,9 @@ def cancel(self, connection: Connection): logger.debug("Cancel query") def add_begin_query(self): - # return self.add_query('BEGIN TRANSACTION', auto_begin=False) pass def add_commit_query(self): - # return self.add_query('COMMIT TRANSACTION', auto_begin=False) pass def add_query( @@ -545,7 +671,6 @@ def _execute_query_with_retry( retries. Failure begins a sleep and retry routine. """ try: - # pyodbc does not handle a None type binding! if bindings is None: cursor.execute(sql) else: @@ -555,14 +680,13 @@ def _execute_query_with_retry( ] cursor.execute(sql, bindings) except retryable_exceptions as e: - # Cease retries and fail when limit is hit. if attempt >= retry_limit: raise e fire_event( AdapterEventDebug( message=( - f"Got a retryable error {type(e)}. {retry_limit-attempt} " + f"Got a retryable error {type(e)}. {retry_limit - attempt} " "retries left. Retrying in 1 second.\n" f"Error:\n{e}" ) @@ -600,7 +724,9 @@ def _execute_query_with_retry( fire_event( SQLQuery( - conn_name=cast_to_str(connection.name), sql=log_sql, node_info=get_node_info() + conn_name=cast_to_str(connection.name), + sql=log_sql, + node_info=get_node_info(), ) ) @@ -618,9 +744,8 @@ def _execute_query_with_retry( attempt=1, ) - # convert DATETIMEOFFSET binary structures to datetime ojbects - # https://github.com/mkleehammer/pyodbc/issues/134#issuecomment-281739794 - connection.handle.add_output_converter(-155, byte_array_to_datetime) + if _is_pyodbc_handle(connection.handle): + connection.handle.add_output_converter(-155, byte_array_to_datetime) fire_event( SQLQueryStatus( @@ -653,18 +778,25 @@ def data_type_code_to_name(cls, type_code: Union[str, str]) -> str: return datatypes[data_type] def execute( - self, sql: str, auto_begin: bool = True, fetch: bool = False, limit: Optional[int] = None + self, + sql: str, + auto_begin: bool = True, + fetch: bool = False, + limit: Optional[int] = None, ) -> Tuple[AdapterResponse, agate.Table]: sql = self._add_query_comment(sql) _, cursor = self.add_query(sql, auto_begin) - response = self.get_response(cursor) - if fetch: - while cursor.description is None: - if not cursor.nextset(): - break - table = self.get_result_from_cursor(cursor, limit) - else: - table = empty_table() - while cursor.nextset(): - pass - return response, table + try: + response = self.get_response(cursor) + if fetch: + while cursor.description is None: + if not cursor.nextset(): + break + table = self.get_result_from_cursor(cursor, limit) + else: + table = empty_table() + while cursor.nextset(): + pass + return response, table + finally: + cursor.close() diff --git a/dbt/adapters/sqlserver/sqlserver_credentials.py b/dbt/adapters/sqlserver/sqlserver_credentials.py index 37bba77ea..4c0811d3d 100644 --- a/dbt/adapters/sqlserver/sqlserver_credentials.py +++ b/dbt/adapters/sqlserver/sqlserver_credentials.py @@ -6,10 +6,10 @@ @dataclass class SQLServerCredentials(Credentials): - driver: str - host: str - database: str - schema: str + driver: Optional[str] = None + host: str = "" + database: str = "" + schema: str = "" UID: Optional[str] = None PWD: Optional[str] = None port: Optional[int] = 1433 @@ -27,6 +27,7 @@ class SQLServerCredentials(Credentials): schema_authorization: Optional[str] = None login_timeout: Optional[int] = 0 query_timeout: Optional[int] = 0 + use_mssql_python: bool = False _ALIASES = { "user": "UID", @@ -41,6 +42,8 @@ class SQLServerCredentials(Credentials): "TrustServerCertificate": "trust_cert", "schema_auth": "schema_authorization", "SQL_ATTR_TRACE": "trace_flag", + "mssql_python": "use_mssql_python", + "use_mssql_python_backend": "use_mssql_python", } @property @@ -54,7 +57,7 @@ def _connection_keys(self): if self.authentication.lower().strip() == "serviceprincipal": self.authentication = "ActiveDirectoryServicePrincipal" - return ( + keys = ( "server", "port", "database", @@ -67,8 +70,14 @@ def _connection_keys(self): "trace_flag", "encrypt", "trust_cert", + "use_mssql_python", ) + if not self.use_mssql_python: + keys = ("driver",) + keys + + return keys + @property def unique_field(self): return self.host diff --git a/dev_requirements.txt b/dev_requirements.txt index 239825e67..7b5301d8b 100644 --- a/dev_requirements.txt +++ b/dev_requirements.txt @@ -20,5 +20,6 @@ tox>=3.13 twine wheel pyodbc +mssql-python azure-identity -e . diff --git a/setup.py b/setup.py index e15e6ccfa..fc1306419 100644 --- a/setup.py +++ b/setup.py @@ -7,7 +7,13 @@ from setuptools.command.install import install package_name = "dbt-sqlserver" -authors_list = ["Mikael Ene", "Anders Swanson", "Sam Debruyn", "Cor Zuurmond", "Cody Scott"] +authors_list = [ + "Mikael Ene", + "Anders Swanson", + "Sam Debruyn", + "Cor Zuurmond", + "Cody Scott", +] dbt_version = "1.9" description = """A Microsoft SQL Server adapter plugin for dbt""" @@ -70,6 +76,11 @@ def run(self): "dbt-common>=1.0,<2.0", "dbt-adapters>=1.11.0,<2.0", ], + extras_require={ + "mssql-python": [ + "mssql-python>=1.0.0", + ], + }, cmdclass={ "verify": VerifyVersionCommand, }, diff --git a/test.env.sample b/test.env.sample index 09982ccc9..b66c97497 100644 --- a/test.env.sample +++ b/test.env.sample @@ -6,6 +6,7 @@ SQLSERVER_TEST_PORT=1433 SQLSERVER_TEST_DBNAME=TestDB SQLSERVER_TEST_ENCRYPT=True SQLSERVER_TEST_TRUST_CERT=True +SQLSERVER_TEST_USE_MSSQL_PYTHON=False DBT_TEST_USER_1=DBT_TEST_USER_1 DBT_TEST_USER_2=DBT_TEST_USER_2 DBT_TEST_USER_3=DBT_TEST_USER_3 diff --git a/tests/conftest.py b/tests/conftest.py index 540ee3025..376bdd93c 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -54,6 +54,8 @@ def _all_profiles_base(): "driver": os.getenv("SQLSERVER_TEST_DRIVER", "ODBC Driver 18 for SQL Server"), "port": int(os.getenv("SQLSERVER_TEST_PORT", "1433")), "retries": 2, + "use_mssql_python": os.getenv("SQLSERVER_TEST_USE_MSSQL_PYTHON", "False").lower() + == "true", } diff --git a/tests/unit/adapters/mssql/test_sqlserver_connection_manager.py b/tests/unit/adapters/mssql/test_sqlserver_connection_manager.py index be909afb4..5619d6795 100644 --- a/tests/unit/adapters/mssql/test_sqlserver_connection_manager.py +++ b/tests/unit/adapters/mssql/test_sqlserver_connection_manager.py @@ -1,9 +1,14 @@ +from types import SimpleNamespace +from typing import Any, Dict, List + import pytest from azure.identity import AzureCliCredential +from dbt.adapters.contracts.connection import Connection, ConnectionState from dbt_common.exceptions import DbtRuntimeError from dbt.adapters.sqlserver import sqlserver_connections -from dbt.adapters.sqlserver.sqlserver_connections import ( # byte_array_to_datetime, +from dbt.adapters.sqlserver.sqlserver_connections import ( + SQLServerConnectionManager, bool_to_connection_string_arg, get_pyodbc_attrs_before_credentials, ) @@ -16,13 +21,12 @@ @pytest.fixture def credentials() -> SQLServerCredentials: - credentials = SQLServerCredentials( + return SQLServerCredentials( driver="ODBC Driver 17 for SQL Server", host="fake.sql.sqlserver.net", database="dbt", schema="sqlserver", ) - return credentials def test_get_pyodbc_attrs_before_empty_dict_when_service_principal( @@ -38,7 +42,9 @@ def test_get_pyodbc_attrs_before_empty_dict_when_service_principal( def test_get_pyodbc_attrs_before_sql_auth_without_azure_identity( credentials: SQLServerCredentials, monkeypatch: pytest.MonkeyPatch ) -> None: - monkeypatch.setattr(sqlserver_connections, "_AZURE_IDENTITY_IMPORT_ERROR", ModuleNotFoundError()) + monkeypatch.setattr( + sqlserver_connections, "_AZURE_IDENTITY_IMPORT_ERROR", ModuleNotFoundError() + ) attrs_before = get_pyodbc_attrs_before_credentials(credentials) @@ -49,7 +55,9 @@ def test_get_pyodbc_attrs_before_cli_auth_requires_azure_identity( credentials: SQLServerCredentials, monkeypatch: pytest.MonkeyPatch ) -> None: credentials.authentication = "cli" - monkeypatch.setattr(sqlserver_connections, "_AZURE_IDENTITY_IMPORT_ERROR", ModuleNotFoundError()) + monkeypatch.setattr( + sqlserver_connections, "_AZURE_IDENTITY_IMPORT_ERROR", ModuleNotFoundError() + ) with pytest.raises(DbtRuntimeError, match="requires the optional dependency 'azure-identity'"): get_pyodbc_attrs_before_credentials(credentials) @@ -61,3 +69,146 @@ def test_get_pyodbc_attrs_before_cli_auth_requires_azure_identity( ) def test_bool_to_connection_string_arg(key: str, value: bool, expected: str) -> None: assert bool_to_connection_string_arg(key, value) == expected + + +def test_open_with_mssql_python_feature_flag_requires_optional_dependency( + credentials: SQLServerCredentials, monkeypatch: pytest.MonkeyPatch +) -> None: + credentials.driver = None + credentials.use_mssql_python = True + + connection = Connection(type="sqlserver", name="feature-flag-test", credentials=credentials) + + monkeypatch.setattr(sqlserver_connections, "MSSQL_PYTHON", None, raising=False) + monkeypatch.setattr( + sqlserver_connections, + "_MSSQL_PYTHON_IMPORT_ERROR", + ModuleNotFoundError("No module named 'mssql_python'"), + raising=False, + ) + + with pytest.raises(DbtRuntimeError, match="mssql-python"): + SQLServerConnectionManager.open(connection) + + +def test_open_with_mssql_python_feature_flag_builds_connection_without_odbc_driver( + credentials: SQLServerCredentials, monkeypatch: pytest.MonkeyPatch +) -> None: + credentials.driver = None + credentials.UID = "dbt_user" + credentials.PWD = "super-secret" + credentials.encrypt = True + credentials.trust_cert = True + credentials.login_timeout = 17 + credentials.query_timeout = 23 + credentials.retries = 5 + credentials.use_mssql_python = True + + captured: Dict[str, Any] = {} + pooling_calls: List[Dict[str, Any]] = [] + + class FakeHandle: + def __init__(self): + self.timeout = None + + fake_handle = FakeHandle() + + def fake_connect(connection_string, autocommit, timeout): + captured["connection_string"] = connection_string + captured["autocommit"] = autocommit + captured["timeout"] = timeout + return fake_handle + + def fake_pooling(*, enabled): + pooling_calls.append({"enabled": enabled}) + + fake_module = SimpleNamespace( + connect=fake_connect, + pooling=fake_pooling, + OperationalError=type("OperationalError", (Exception,), {}), + InterfaceError=type("InterfaceError", (Exception,), {}), + InternalError=type("InternalError", (Exception,), {}), + ) + + def fake_retry_connection( + cls, + connection, + connect, + logger, + retry_limit, + retryable_exceptions, + ): + captured["retry_limit"] = retry_limit + captured["retryable_exceptions"] = retryable_exceptions + handle = connect() + connection.handle = handle + connection.state = ConnectionState.OPEN + return connection + + monkeypatch.setattr(sqlserver_connections, "MSSQL_PYTHON", fake_module, raising=False) + monkeypatch.setattr(sqlserver_connections, "_MSSQL_PYTHON_IMPORT_ERROR", None, raising=False) + monkeypatch.setattr( + SQLServerConnectionManager, + "retry_connection", + classmethod(fake_retry_connection), + ) + + connection = Connection(type="sqlserver", name="feature-flag-test", credentials=credentials) + opened = SQLServerConnectionManager.open(connection) + + assert opened is connection + assert opened.handle is fake_handle + assert opened.state == ConnectionState.OPEN + + assert captured["autocommit"] is True + assert captured["timeout"] == 17 + assert captured["retry_limit"] == 5 + assert pooling_calls == [{"enabled": False}] + + con_str = captured["connection_string"] + assert "DRIVER=" not in con_str + assert "SERVER=fake.sql.sqlserver.net,1433" in con_str + assert "Database=dbt" in con_str + assert "UID={dbt_user}" in con_str + assert "PWD={super-secret}" in con_str + assert "encrypt=Yes" in con_str + assert "TrustServerCertificate=Yes" in con_str + assert "APP=dbt-sqlserver/" not in con_str + + assert fake_module.OperationalError in captured["retryable_exceptions"] + assert fake_module.InternalError in captured["retryable_exceptions"] + + +def test_open_with_mssql_python_feature_flag_fails_fast_for_pyodbc_token_auth_aliases( + credentials: SQLServerCredentials, monkeypatch: pytest.MonkeyPatch +) -> None: + credentials.driver = None + credentials.use_mssql_python = True + credentials.authentication = "cli" + + fake_module = SimpleNamespace( + connect=lambda *args, **kwargs: None, + OperationalError=type("OperationalError", (Exception,), {}), + InterfaceError=type("InterfaceError", (Exception,), {}), + InternalError=type("InternalError", (Exception,), {}), + ) + + monkeypatch.setattr(sqlserver_connections, "MSSQL_PYTHON", fake_module, raising=False) + monkeypatch.setattr(sqlserver_connections, "_MSSQL_PYTHON_IMPORT_ERROR", None, raising=False) + + connection = Connection(type="sqlserver", name="feature-flag-test", credentials=credentials) + + with pytest.raises(DbtRuntimeError, match="authentication"): + SQLServerConnectionManager.open(connection) + + +def test_open_with_pyodbc_path_still_requires_driver_when_feature_flag_disabled( + credentials: SQLServerCredentials, +) -> None: + credentials.driver = None + credentials.use_mssql_python = False + + connection = Connection(type="sqlserver", name="pyodbc-test", credentials=credentials) + + with pytest.raises(DbtRuntimeError, match="driver"): + SQLServerConnectionManager.open(connection)