From 8b426d73528368396e8babad63fc1f18fdc3fa6e Mon Sep 17 00:00:00 2001 From: Tim Swast Date: Fri, 31 May 2019 09:27:30 -0700 Subject: [PATCH] Fix breaking change. Don't require pyarrow if schema is set, but warn. --- bigquery/google/cloud/bigquery/client.py | 15 +++++++- bigquery/tests/unit/test_client.py | 48 ++++++++++++++++++++++++ 2 files changed, 62 insertions(+), 1 deletion(-) diff --git a/bigquery/google/cloud/bigquery/client.py b/bigquery/google/cloud/bigquery/client.py index 78d718aa6a2a..9fd4c5368efa 100644 --- a/bigquery/google/cloud/bigquery/client.py +++ b/bigquery/google/cloud/bigquery/client.py @@ -28,7 +28,12 @@ import os import tempfile import uuid +import warnings +try: + import pyarrow +except ImportError: # pragma: NO COVER + pyarrow = None import six from google import resumable_media @@ -1304,9 +1309,17 @@ def load_table_from_dataframe( os.close(tmpfd) try: - if job_config.schema: + if pyarrow and job_config.schema: _pandas_helpers.to_parquet(dataframe, job_config.schema, tmppath) else: + if job_config.schema: + warnings.warn( + "job_config.schema is set, but not used to assist in " + "identifying correct types for data serialization. " + "Please install the pyarrow package.", + PendingDeprecationWarning, + stacklevel=2, + ) dataframe.to_parquet(tmppath) with open(tmppath, "rb") as parquet_file: diff --git a/bigquery/tests/unit/test_client.py b/bigquery/tests/unit/test_client.py index dd98f2bcce64..826916081d4d 100644 --- a/bigquery/tests/unit/test_client.py +++ b/bigquery/tests/unit/test_client.py @@ -20,6 +20,7 @@ import io import json import unittest +import warnings import mock import requests @@ -5000,6 +5001,53 @@ def test_load_table_from_dataframe_w_custom_job_config(self): assert sent_config is job_config assert sent_config.source_format == job.SourceFormat.PARQUET + @unittest.skipIf(pandas is None, "Requires `pandas`") + @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") + def test_load_table_from_dataframe_w_schema_wo_pyarrow(self): + from google.cloud.bigquery.client import _DEFAULT_NUM_RETRIES + from google.cloud.bigquery import job + from google.cloud.bigquery.schema import SchemaField + + client = self._make_client() + records = [{"name": "Monty", "age": 100}, {"name": "Python", "age": 60}] + dataframe = pandas.DataFrame(records) + schema = (SchemaField("name", "STRING"), SchemaField("age", "INTEGER")) + job_config = job.LoadJobConfig(schema=schema) + + load_patch = mock.patch( + "google.cloud.bigquery.client.Client.load_table_from_file", autospec=True + ) + pyarrow_patch = mock.patch("google.cloud.bigquery.client.pyarrow", None) + + with load_patch as load_table_from_file, pyarrow_patch, warnings.catch_warnings( + record=True + ) as warned: + client.load_table_from_dataframe( + dataframe, self.TABLE_REF, job_config=job_config, location=self.LOCATION + ) + + assert len(warned) == 1 + warning = warned[0] + assert warning.category is PendingDeprecationWarning + assert "pyarrow" in str(warning) + + load_table_from_file.assert_called_once_with( + client, + mock.ANY, + self.TABLE_REF, + num_retries=_DEFAULT_NUM_RETRIES, + rewind=True, + job_id=mock.ANY, + job_id_prefix=None, + location=self.LOCATION, + project=None, + job_config=mock.ANY, + ) + + sent_config = load_table_from_file.mock_calls[0][2]["job_config"] + assert sent_config.source_format == job.SourceFormat.PARQUET + assert tuple(sent_config.schema) == schema + @unittest.skipIf(pandas is None, "Requires `pandas`") @unittest.skipIf(pyarrow is None, "Requires `pyarrow`") def test_load_table_from_dataframe_w_nulls(self):