Skip to content
34 changes: 34 additions & 0 deletions conftest.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,41 @@
import importlib.util
import os
import pytest


def _kernel_wheel_available() -> bool:
"""The ``use_sea=True`` code path now routes through the Rust
kernel via PyO3. The ``databricks_sql_kernel`` wheel is not
yet on PyPI (built from a separate repo); CI environments
without it should skip ``use_sea=True`` parametrized cases
rather than fail with a hard ImportError."""
return importlib.util.find_spec("databricks_sql_kernel") is not None


def pytest_collection_modifyitems(config, items):
"""Skip parametrized test cases that pass ``use_sea=True`` when
the kernel wheel isn't installed.

The existing e2e suite uses ``@pytest.mark.parametrize(
"extra_params", [{}, {"use_sea": True}])`` to exercise both
backends. When the kernel wheel is missing those cases die at
``connect()`` time with our pointed ImportError; mark them
skipped at collection time so CI signal stays accurate.
"""
if _kernel_wheel_available():
return
skip_marker = pytest.mark.skip(
reason="use_sea=True requires databricks-sql-kernel (not installed)"
)
for item in items:
params = getattr(item, "callspec", None)
if params is None:
continue
extra_params = params.params.get("extra_params")
if isinstance(extra_params, dict) and extra_params.get("use_sea") is True:
item.add_marker(skip_marker)


@pytest.fixture(scope="session")
def host():
return os.getenv("DATABRICKS_SERVER_HOSTNAME")
Expand Down
14 changes: 14 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,20 @@ requests-kerberos = {version = "^0.15.0", optional = true}

[tool.poetry.extras]
pyarrow = ["pyarrow"]
# `[kernel]` extra is intentionally not declared here yet.
# `databricks-sql-kernel` is built from the databricks-sql-kernel
# repo and not yet published to PyPI; declaring it as a poetry dep
# breaks `poetry lock` for every CI job. Once the wheel is on PyPI
# the extra will be added back here:
#
# databricks-sql-kernel = {version = "^0.1.0", optional = true}
# [tool.poetry.extras]
# kernel = ["databricks-sql-kernel"]
#
# Until then, install the kernel separately:
# pip install databricks-sql-kernel
# or (local dev):
# cd databricks-sql-kernel/pyo3 && maturin develop --release

[tool.poetry.group.dev.dependencies]
pytest = "^7.1.2"
Expand Down
25 changes: 25 additions & 0 deletions src/databricks/sql/backend/kernel/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
"""Backend that delegates to the Databricks SQL Kernel (Rust) via PyO3.

Routed when ``use_sea=True`` is passed to ``databricks.sql.connect``.
The module's identity is "delegates to the kernel" — not the wire
protocol the kernel happens to use today (SEA REST). The kernel may
switch its default transport (SEA REST → SEA gRPC → …) without
renaming this module.

This ``__init__`` deliberately does **not** re-export
``KernelDatabricksClient`` from ``.client``. Importing ``.client``
loads the ``databricks_sql_kernel`` PyO3 extension at module-import
time; doing that eagerly here would make ``import
databricks.sql.backend.kernel.type_mapping`` (used by tests / by
``KernelResultSet`` consumers) require the kernel wheel even when
the caller never plans to open a kernel-backed session. Callers
that need the client import it directly:

from databricks.sql.backend.kernel.client import KernelDatabricksClient

``session.py::_create_backend`` already does this lazy import under
the ``use_sea=True`` branch.

See ``docs/designs/pysql-kernel-integration.md`` in
``databricks-sql-kernel`` for the full integration design.
"""
97 changes: 97 additions & 0 deletions src/databricks/sql/backend/kernel/auth_bridge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""Translate the connector's ``AuthProvider`` into ``databricks_sql_kernel``
``Session`` auth kwargs.

This phase ships PAT only. The kernel-side PyO3 binding accepts
``auth_type='pat'``; OAuth / federation / custom credentials
providers are reserved but not yet wired in either layer. Non-PAT
auth raises ``NotSupportedError`` from this bridge so the failure
surfaces at session-open time with a clear message rather than
deep inside the kernel.

Token extraction goes through ``AuthProvider.add_headers({})``
rather than touching auth-provider-specific attributes, so the
bridge works uniformly for every PAT shape — including
``AccessTokenAuthProvider`` wrapped in ``TokenFederationProvider``
(which ``get_python_sql_connector_auth_provider`` does for every
provider it builds).
"""

from __future__ import annotations

import logging
from typing import Any, Dict, Optional

from databricks.sql.auth.authenticators import AccessTokenAuthProvider, AuthProvider
from databricks.sql.auth.token_federation import TokenFederationProvider
from databricks.sql.exc import NotSupportedError

logger = logging.getLogger(__name__)


_BEARER_PREFIX = "Bearer "


def _is_pat(auth_provider: AuthProvider) -> bool:
"""Return True iff this provider ultimately wraps an
``AccessTokenAuthProvider``.

``get_python_sql_connector_auth_provider`` always wraps the
base provider in a ``TokenFederationProvider``, so an
``isinstance`` check against ``AccessTokenAuthProvider`` alone
never matches in practice. We peek through the federation
wrapper to find the real type.
"""
if isinstance(auth_provider, AccessTokenAuthProvider):
return True
if isinstance(auth_provider, TokenFederationProvider) and isinstance(
auth_provider.external_provider, AccessTokenAuthProvider
):
return True
return False


def _extract_bearer_token(auth_provider: AuthProvider) -> Optional[str]:
"""Pull the current bearer token out of an ``AuthProvider``.

The connector's ``AuthProvider.add_headers`` mutates a header
dict and writes the ``Authorization: Bearer <token>`` value.
Going through that public surface keeps us insulated from
provider-specific internals.

Returns ``None`` if the provider did not write an Authorization
header or wrote a non-Bearer scheme — neither is representable
in the kernel's PAT auth surface.
"""
headers: Dict[str, str] = {}
auth_provider.add_headers(headers)
auth = headers.get("Authorization")
if not auth:
return None
if not auth.startswith(_BEARER_PREFIX):
return None
return auth[len(_BEARER_PREFIX) :]


def kernel_auth_kwargs(auth_provider: AuthProvider) -> Dict[str, Any]:
"""Build the kwargs passed to ``databricks_sql_kernel.Session(...)``.

PAT (including ``TokenFederationProvider``-wrapped PAT) routes
through the kernel's PAT path. Anything else raises
``NotSupportedError`` — the kernel binding doesn't accept OAuth
today, and routing OAuth through PAT would silently break
token refresh during long-running sessions.
"""
if _is_pat(auth_provider):
token = _extract_bearer_token(auth_provider)
if not token:
raise ValueError(
"PAT auth provider did not produce a Bearer Authorization "
"header; cannot route through the kernel's PAT path"
)
return {"auth_type": "pat", "access_token": token}

raise NotSupportedError(
f"The kernel backend (use_sea=True) currently only supports PAT auth, "
f"but got {type(auth_provider).__name__}. Use use_sea=False (Thrift) "
"for OAuth / federation / custom credential providers."
)
Loading
Loading