I'm using:
python 3.12
pandas==2.1.3
sqlalchemy==2.0.22
databricks-sql-connector==3.0.0
Windows
If I have a pandas DataFrame with a datetime64[ns] data type and use to_sql to write it into databricks, then read_sql_query to read the same table, it fails to handle the TIMESTAMP_NTZ data type.
Here is example code:
from os import environ
from pandas import DataFrame, Timestamp, read_sql_table
from sqlalchemy import create_engine
from sqlalchemy.event import listen
host = environ["DATABRICKS_HOST"]
http_path = environ["DATABRICKS_HTTP_PATH"] # SQL Warehouse
catalog = environ["DATABRICKS_CATALOG"]
schema = environ["DATABRICKS_SCHEMA"]
def do_connect(dialect, conn_rec, cargs, cparams):
cparams["access_token"] = environ["DATABRICKS_ACCESS_TOKEN"]
engine = create_engine(
f"databricks://{host}:443?http_path={http_path}&catalog={catalog}&schema={schema}"
)
listen(engine, "do_connect", do_connect)
with engine.connect() as conn:
dtf = DataFrame(
{
"timestamp_field": [
Timestamp("12/31/2023 12:00pm"),
Timestamp("1/31/2023 2:00pm"),
]
}
)
conn.exec_driver_sql("drop table if exists test_timestamp")
dtf.to_sql("test_timestamp", conn, index=False)
read_sql_table("test_timestamp", conn)
Here is my output:
C:\Temp>python databricks_timestamp_ntz.py
C:\Temp\databricks_timestamp_ntz.py:17: SADeprecationWarning: The dbapi() classmethod on dialect classes has been renamed to import_dbapi(). Implement an import_dbapi() classmethod directly on class <class 'databricks.sqlalchemy.base.DatabricksDialect'> to remove this warning; the old .dbapi() classmethod may be maintained for backwards compatibility.
engine = create_engine(
Traceback (most recent call last):
File "C:\Temp\databricks_timestamp_ntz.py", line 32, in <module>
read_sql_table("test_timestamp", conn)
File "C:\Program Files\Python312\Lib\site-packages\pandas\io\sql.py", line 348, in read_sql_table
table = pandas_sql.read_table(
^^^^^^^^^^^^^^^^^^^^^^
File "C:\Program Files\Python312\Lib\site-packages\pandas\io\sql.py", line 1665, in read_table
self.meta.reflect(bind=self.con, only=[table_name], views=True)
File "C:\Program Files\Python312\Lib\site-packages\sqlalchemy\sql\schema.py", line 5788, in reflect
_reflect_info = insp._get_reflection_info(
^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Program Files\Python312\Lib\site-packages\sqlalchemy\engine\reflection.py", line 2006, in _get_reflection_info
columns=run(
^^^^
File "C:\Program Files\Python312\Lib\site-packages\sqlalchemy\engine\reflection.py", line 1992, in run
res = meth(filter_names=_fn, **kw)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Program Files\Python312\Lib\site-packages\sqlalchemy\engine\reflection.py", line 921, in get_multi_columns
table_col_defs = dict(
^^^^^
File "C:\Program Files\Python312\Lib\site-packages\sqlalchemy\engine\default.py", line 1099, in _default_multi_reflect
single_tbl_method(
File "C:\Program Files\Python312\Lib\site-packages\databricks\sqlalchemy\base.py", line 150, in get_columns
row_dict = parse_column_info_from_tgetcolumnsresponse(col)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "C:\Program Files\Python312\Lib\site-packages\databricks\sqlalchemy\_parse.py", line 330, in parse_column_info_from_tgetcolumnsresponse
_col_type = GET_COLUMNS_TYPE_MAP[_raw_col_type]
~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^
KeyError: 'timestamp_ntz'
I'm using:
python 3.12
pandas==2.1.3
sqlalchemy==2.0.22
databricks-sql-connector==3.0.0
Windows
If I have a pandas
DataFramewith adatetime64[ns]data type and useto_sqlto write it into databricks, thenread_sql_queryto read the same table, it fails to handle the TIMESTAMP_NTZ data type.Here is example code:
Here is my output: