databricks · vikrantpuppala · May 14, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/conftest.py b/conftest.py
@@ -1,7 +1,41 @@
+import importlib.util
 import os
 import pytest
 
 
+def _kernel_wheel_available() -> bool:
+    """The ``use_sea=True`` code path now routes through the Rust
+    kernel via PyO3. The ``databricks_sql_kernel`` wheel is not
+    yet on PyPI (built from a separate repo); CI environments
+    without it should skip ``use_sea=True`` parametrized cases
+    rather than fail with a hard ImportError."""
+    return importlib.util.find_spec("databricks_sql_kernel") is not None
+
+
+def pytest_collection_modifyitems(config, items):
+    """Skip parametrized test cases that pass ``use_sea=True`` when
+    the kernel wheel isn't installed.
+
+    The existing e2e suite uses ``@pytest.mark.parametrize(
+    "extra_params", [{}, {"use_sea": True}])`` to exercise both
+    backends. When the kernel wheel is missing those cases die at
+    ``connect()`` time with our pointed ImportError; mark them
+    skipped at collection time so CI signal stays accurate.
+    """
+    if _kernel_wheel_available():
+        return
+    skip_marker = pytest.mark.skip(
+        reason="use_sea=True requires databricks-sql-kernel (not installed)"
+    )
+    for item in items:
+        params = getattr(item, "callspec", None)
+        if params is None:
+            continue
+        extra_params = params.params.get("extra_params")
+        if isinstance(extra_params, dict) and extra_params.get("use_sea") is True:
+            item.add_marker(skip_marker)
+
+
 @pytest.fixture(scope="session")
 def host():
     return os.getenv("DATABRICKS_SERVER_HOSTNAME")

diff --git a/pyproject.toml b/pyproject.toml
@@ -36,6 +36,20 @@ requests-kerberos = {version = "^0.15.0", optional = true}
 
 [tool.poetry.extras]
 pyarrow = ["pyarrow"]
+# `[kernel]` extra is intentionally not declared here yet.
+# `databricks-sql-kernel` is built from the databricks-sql-kernel
+# repo and not yet published to PyPI; declaring it as a poetry dep
+# breaks `poetry lock` for every CI job. Once the wheel is on PyPI
+# the extra will be added back here:
+#
+#     databricks-sql-kernel = {version = "^0.1.0", optional = true}
+#     [tool.poetry.extras]
+#     kernel = ["databricks-sql-kernel"]
+#
+# Until then, install the kernel separately:
+#     pip install databricks-sql-kernel
+# or (local dev):
+#     cd databricks-sql-kernel/pyo3 && maturin develop --release
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^7.1.2"

diff --git a/src/databricks/sql/backend/kernel/__init__.py b/src/databricks/sql/backend/kernel/__init__.py
@@ -0,0 +1,25 @@
+"""Backend that delegates to the Databricks SQL Kernel (Rust) via PyO3.
+
+Routed when ``use_sea=True`` is passed to ``databricks.sql.connect``.
+The module's identity is "delegates to the kernel" — not the wire
+protocol the kernel happens to use today (SEA REST). The kernel may
+switch its default transport (SEA REST → SEA gRPC → …) without
+renaming this module.
+
+This ``__init__`` deliberately does **not** re-export
+``KernelDatabricksClient`` from ``.client``. Importing ``.client``
+loads the ``databricks_sql_kernel`` PyO3 extension at module-import
+time; doing that eagerly here would make ``import
+databricks.sql.backend.kernel.type_mapping`` (used by tests / by
+``KernelResultSet`` consumers) require the kernel wheel even when
+the caller never plans to open a kernel-backed session. Callers
+that need the client import it directly:
+
+    from databricks.sql.backend.kernel.client import KernelDatabricksClient
+
+``session.py::_create_backend`` already does this lazy import under
+the ``use_sea=True`` branch.
+
+See ``docs/designs/pysql-kernel-integration.md`` in
+``databricks-sql-kernel`` for the full integration design.
+"""
diff --git a/src/databricks/sql/backend/kernel/auth_bridge.py b/src/databricks/sql/backend/kernel/auth_bridge.py
@@ -0,0 +1,97 @@
+"""Translate the connector's ``AuthProvider`` into ``databricks_sql_kernel``
+``Session`` auth kwargs.
+
+This phase ships PAT only. The kernel-side PyO3 binding accepts
+``auth_type='pat'``; OAuth / federation / custom credentials
+providers are reserved but not yet wired in either layer. Non-PAT
+auth raises ``NotSupportedError`` from this bridge so the failure
+surfaces at session-open time with a clear message rather than
+deep inside the kernel.
+
+Token extraction goes through ``AuthProvider.add_headers({})``
+rather than touching auth-provider-specific attributes, so the
+bridge works uniformly for every PAT shape — including
+``AccessTokenAuthProvider`` wrapped in ``TokenFederationProvider``
+(which ``get_python_sql_connector_auth_provider`` does for every
+provider it builds).
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import Any, Dict, Optional
+
+from databricks.sql.auth.authenticators import AccessTokenAuthProvider, AuthProvider
+from databricks.sql.auth.token_federation import TokenFederationProvider
+from databricks.sql.exc import NotSupportedError
+
+logger = logging.getLogger(__name__)
+
+
+_BEARER_PREFIX = "Bearer "
+
+
+def _is_pat(auth_provider: AuthProvider) -> bool:
+    """Return True iff this provider ultimately wraps an
+    ``AccessTokenAuthProvider``.
+
+    ``get_python_sql_connector_auth_provider`` always wraps the
+    base provider in a ``TokenFederationProvider``, so an
+    ``isinstance`` check against ``AccessTokenAuthProvider`` alone
+    never matches in practice. We peek through the federation
+    wrapper to find the real type.
+    """
+    if isinstance(auth_provider, AccessTokenAuthProvider):
+        return True
+    if isinstance(auth_provider, TokenFederationProvider) and isinstance(
+        auth_provider.external_provider, AccessTokenAuthProvider
+    ):
+        return True
+    return False
+
+
+def _extract_bearer_token(auth_provider: AuthProvider) -> Optional[str]:
+    """Pull the current bearer token out of an ``AuthProvider``.
+
+    The connector's ``AuthProvider.add_headers`` mutates a header
+    dict and writes the ``Authorization: Bearer <token>`` value.
+    Going through that public surface keeps us insulated from
+    provider-specific internals.
+
+    Returns ``None`` if the provider did not write an Authorization
+    header or wrote a non-Bearer scheme — neither is representable
+    in the kernel's PAT auth surface.
+    """
+    headers: Dict[str, str] = {}
+    auth_provider.add_headers(headers)
+    auth = headers.get("Authorization")
+    if not auth:
+        return None
+    if not auth.startswith(_BEARER_PREFIX):
+        return None
+    return auth[len(_BEARER_PREFIX) :]
+
+
+def kernel_auth_kwargs(auth_provider: AuthProvider) -> Dict[str, Any]:
+    """Build the kwargs passed to ``databricks_sql_kernel.Session(...)``.
+
+    PAT (including ``TokenFederationProvider``-wrapped PAT) routes
+    through the kernel's PAT path. Anything else raises
+    ``NotSupportedError`` — the kernel binding doesn't accept OAuth
+    today, and routing OAuth through PAT would silently break
+    token refresh during long-running sessions.
+    """
+    if _is_pat(auth_provider):
+        token = _extract_bearer_token(auth_provider)
+        if not token:
+            raise ValueError(
+                "PAT auth provider did not produce a Bearer Authorization "
+                "header; cannot route through the kernel's PAT path"
+            )
+        return {"auth_type": "pat", "access_token": token}
+
+    raise NotSupportedError(
+        f"The kernel backend (use_sea=True) currently only supports PAT auth, "
+        f"but got {type(auth_provider).__name__}. Use use_sea=False (Thrift) "
+        "for OAuth / federation / custom credential providers."
+    )