From bd46c45c76c15f5eacebd9c6b52c4517e8281a42 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Fri, 20 Mar 2026 16:30:55 -0700 Subject: [PATCH 1/8] feat: Add python client api for JobStatistics.referenced_property_graphs. --- .../google/cloud/bigquery/job/query.py | 31 +++- .../google/cloud/bigquery/table.py | 157 ++++++++++++++++-- .../tests/unit/job/test_query.py | 67 ++++++-- 3 files changed, 226 insertions(+), 29 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index e82deb1ef3f4..d5318519d9b1 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -50,7 +50,7 @@ from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import _table_arg_to_table_ref -from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import TableReference, PropertyGraphReference from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery._tqdm_helpers import wait_for_query @@ -1332,6 +1332,35 @@ def referenced_tables(self): return tables + @property + def referenced_property_graphs(self): + """Return referenced property graphs from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.referenced_property_graphs + + Returns: + List[google.cloud.bigquery.table.PropertyGraphReference]: + mappings describing the property graphs, or an empty list + if the query has not yet completed. + """ + property_graphs = [] + datasets_by_project_name = {} + + for pg in self._job_statistics().get("referencedPropertyGraphs", ()): + pg_project = pg["projectId"] + + ds_id = pg["datasetId"] + pg_dataset = datasets_by_project_name.get((pg_project, ds_id)) + if pg_dataset is None: + pg_dataset = DatasetReference(pg_project, ds_id) + datasets_by_project_name[(pg_project, ds_id)] = pg_dataset + + pg_name = pg["propertyGraphId"] + property_graphs.append(PropertyGraphReference(pg_dataset, pg_name)) + + return property_graphs + @property def undeclared_query_parameters(self): """Return undeclared query parameters from job statistics, if present. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 88b673a8b7e6..5f71d1cc02f9 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -138,7 +138,7 @@ def _reference_getter(table): def _view_use_legacy_sql_getter( - table: Union["Table", "TableListItem"] + table: Union["Table", "TableListItem"], ) -> Optional[bool]: """bool: Specifies whether to execute the view with Legacy or Standard SQL. @@ -359,6 +359,131 @@ def __repr__(self): return f"TableReference({dataset_ref!r}, '{self.table_id}')" +class PropertyGraphReference: + """PropertyGraphReferences are pointers to property graphs. + + Args: + dataset_ref: A pointer to the dataset + property_graph_id: The ID of the property graph + """ + + _PROPERTY_TO_API_FIELD = { + "dataset_id": "datasetId", + "project": "projectId", + "property_graph_id": "propertyGraphId", + } + + def __init__(self, dataset_ref: "DatasetReference", property_graph_id: str): + self._properties = {} + + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["project"], + dataset_ref.project, + ) + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["dataset_id"], + dataset_ref.dataset_id, + ) + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["property_graph_id"], + property_graph_id, + ) + + @property + def project(self) -> str: + """str: Project bound to the property graph.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["project"] + ) + + @property + def dataset_id(self) -> str: + """str: ID of dataset containing the property graph.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] + ) + + @property + def property_graph_id(self) -> str: + """str: The property graph ID.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["property_graph_id"] + ) + + @classmethod + def from_string( + cls, property_graph_id: str, default_project: Optional[str] = None + ) -> "PropertyGraphReference": + """Construct a property graph reference from string. + + Args: + property_graph_id (str): + A property graph ID in standard SQL format. + default_project (Optional[str]): + The project ID to use when ``property_graph_id`` does not + include a project ID. + + Returns: + PropertyGraphReference: Property graph reference parsed from ``property_graph_id``. + """ + from google.cloud.bigquery.dataset import DatasetReference + + ( + output_project_id, + output_dataset_id, + output_property_graph_id, + ) = _helpers._parse_3_part_id( + property_graph_id, + default_project=default_project, + property_name="property_graph_id", + ) + + return cls( + DatasetReference(output_project_id, output_dataset_id), + output_property_graph_id, + ) + + @classmethod + def from_api_repr(cls, resource: dict) -> "PropertyGraphReference": + """Factory: construct a property graph reference given its API representation.""" + from google.cloud.bigquery.dataset import DatasetReference + + project = resource["projectId"] + dataset_id = resource["datasetId"] + property_graph_id = resource["propertyGraphId"] + + return cls(DatasetReference(project, dataset_id), property_graph_id) + + def to_api_repr(self) -> dict: + """Construct the API resource representation of this property graph reference.""" + return copy.deepcopy(self._properties) + + def __str__(self): + return f"{self.project}.{self.dataset_id}.{self.property_graph_id}" + + def __repr__(self): + from google.cloud.bigquery.dataset import DatasetReference + + dataset_ref = DatasetReference(self.project, self.dataset_id) + return f"PropertyGraphReference({dataset_ref!r}, '{self.property_graph_id}')" + + def __eq__(self, other): + if isinstance(other, PropertyGraphReference): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.property_graph_id == other.property_graph_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.property_graph_id)) + + class Table(_TableBase): """Tables represent a set of rows whose values correspond to a schema. @@ -452,9 +577,9 @@ def biglake_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties[ - self._PROPERTY_TO_API_FIELD["biglake_configuration"] - ] = api_repr + self._properties[self._PROPERTY_TO_API_FIELD["biglake_configuration"]] = ( + api_repr + ) @property def require_partition_filter(self): @@ -468,9 +593,9 @@ def require_partition_filter(self): @require_partition_filter.setter def require_partition_filter(self, value): - self._properties[ - self._PROPERTY_TO_API_FIELD["require_partition_filter"] - ] = value + self._properties[self._PROPERTY_TO_API_FIELD["require_partition_filter"]] = ( + value + ) @property def schema(self): @@ -568,9 +693,9 @@ def encryption_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties[ - self._PROPERTY_TO_API_FIELD["encryption_configuration"] - ] = api_repr + self._properties[self._PROPERTY_TO_API_FIELD["encryption_configuration"]] = ( + api_repr + ) @property def created(self): @@ -845,9 +970,9 @@ def expires(self, value): if not isinstance(value, datetime.datetime) and value is not None: raise ValueError("Pass a datetime, or None") value_ms = google.cloud._helpers._millis_from_datetime(value) - self._properties[ - self._PROPERTY_TO_API_FIELD["expires"] - ] = _helpers._str_or_none(value_ms) + self._properties[self._PROPERTY_TO_API_FIELD["expires"]] = ( + _helpers._str_or_none(value_ms) + ) @property def friendly_name(self): @@ -1043,9 +1168,9 @@ def external_data_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties[ - self._PROPERTY_TO_API_FIELD["external_data_configuration"] - ] = api_repr + self._properties[self._PROPERTY_TO_API_FIELD["external_data_configuration"]] = ( + api_repr + ) @property def snapshot_definition(self) -> Optional["SnapshotDefinition"]: diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 4a6771c4639b..9e7518985196 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -680,6 +680,55 @@ def test_referenced_tables(self): self.assertEqual(remote.dataset_id, "other-dataset") self.assertEqual(remote.project, "other-project-123") + def test_referenced_property_graphs(self): + from google.cloud.bigquery.table import PropertyGraphReference + + ref_pg_resource = [ + { + "projectId": self.PROJECT, + "datasetId": "dataset", + "propertyGraphId": "pg1", + }, + { + "projectId": self.PROJECT, + "datasetId": "dataset", + "propertyGraphId": "pg2", + }, + { + "projectId": "other-project-123", + "datasetId": "other-dataset", + "propertyGraphId": "other-pg", + }, + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.referenced_property_graphs, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.referenced_property_graphs, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.referenced_property_graphs, []) + + query_stats["referencedPropertyGraphs"] = ref_pg_resource + + pg1, pg2, remote = job.referenced_property_graphs + + self.assertIsInstance(pg1, PropertyGraphReference) + self.assertEqual(pg1.property_graph_id, "pg1") + self.assertEqual(pg1.dataset_id, "dataset") + self.assertEqual(pg1.project, self.PROJECT) + + self.assertIsInstance(pg2, PropertyGraphReference) + self.assertEqual(pg2.property_graph_id, "pg2") + self.assertEqual(pg2.dataset_id, "dataset") + self.assertEqual(pg2.project, self.PROJECT) + + self.assertIsInstance(remote, PropertyGraphReference) + self.assertEqual(remote.property_graph_id, "other-pg") + self.assertEqual(remote.dataset_id, "other-dataset") + self.assertEqual(remote.project, "other-project-123") + def test_timeline(self): timeline_resource = [ { @@ -1586,12 +1635,10 @@ def test_result_with_start_index_multi_page(self): def test_result_error(self): from google.cloud import exceptions - query = textwrap.dedent( - """ + query = textwrap.dedent(""" SELECT foo, bar FROM table_baz - WHERE foo == bar""" - ) + WHERE foo == bar""") client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, query, client) @@ -1635,12 +1682,10 @@ def test_result_error(self): assert expected_line in debug_message def test_result_transport_timeout_error(self): - query = textwrap.dedent( - """ + query = textwrap.dedent(""" SELECT foo, bar FROM table_baz - WHERE foo == bar""" - ) + WHERE foo == bar""") client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, query, client) @@ -1694,12 +1739,10 @@ def test_schema(self): def test__begin_error(self): from google.cloud import exceptions - query = textwrap.dedent( - """ + query = textwrap.dedent(""" SELECT foo, bar FROM table_baz - WHERE foo == bar""" - ) + WHERE foo == bar""") client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, query, client) From 7bc21e452f4812dc7d6c57a52de64ead6c7a3f77 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Mon, 23 Mar 2026 10:29:36 -0700 Subject: [PATCH 2/8] Fix type annotation for PropertyGraphReference._properties --- packages/google-cloud-bigquery/google/cloud/bigquery/table.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 5f71d1cc02f9..aeac5b287d73 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -374,7 +374,7 @@ class PropertyGraphReference: } def __init__(self, dataset_ref: "DatasetReference", property_graph_id: str): - self._properties = {} + self._properties: Dict[str, Any] = {} _helpers._set_sub_prop( self._properties, From 569497b852e97a806fe4d1328c5471146a871d66 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Mon, 23 Mar 2026 12:32:45 -0700 Subject: [PATCH 3/8] Format with Black 23.7.0 --- .../google/cloud/bigquery/table.py | 30 +++++++++---------- .../tests/unit/job/test_query.py | 18 +++++++---- 2 files changed, 27 insertions(+), 21 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index aeac5b287d73..f6bb65a4e55b 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -577,9 +577,9 @@ def biglake_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties[self._PROPERTY_TO_API_FIELD["biglake_configuration"]] = ( - api_repr - ) + self._properties[ + self._PROPERTY_TO_API_FIELD["biglake_configuration"] + ] = api_repr @property def require_partition_filter(self): @@ -593,9 +593,9 @@ def require_partition_filter(self): @require_partition_filter.setter def require_partition_filter(self, value): - self._properties[self._PROPERTY_TO_API_FIELD["require_partition_filter"]] = ( - value - ) + self._properties[ + self._PROPERTY_TO_API_FIELD["require_partition_filter"] + ] = value @property def schema(self): @@ -693,9 +693,9 @@ def encryption_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties[self._PROPERTY_TO_API_FIELD["encryption_configuration"]] = ( - api_repr - ) + self._properties[ + self._PROPERTY_TO_API_FIELD["encryption_configuration"] + ] = api_repr @property def created(self): @@ -970,9 +970,9 @@ def expires(self, value): if not isinstance(value, datetime.datetime) and value is not None: raise ValueError("Pass a datetime, or None") value_ms = google.cloud._helpers._millis_from_datetime(value) - self._properties[self._PROPERTY_TO_API_FIELD["expires"]] = ( - _helpers._str_or_none(value_ms) - ) + self._properties[ + self._PROPERTY_TO_API_FIELD["expires"] + ] = _helpers._str_or_none(value_ms) @property def friendly_name(self): @@ -1168,9 +1168,9 @@ def external_data_configuration(self, value): api_repr = value if value is not None: api_repr = value.to_api_repr() - self._properties[self._PROPERTY_TO_API_FIELD["external_data_configuration"]] = ( - api_repr - ) + self._properties[ + self._PROPERTY_TO_API_FIELD["external_data_configuration"] + ] = api_repr @property def snapshot_definition(self) -> Optional["SnapshotDefinition"]: diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 9e7518985196..3720e2a06c13 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -1635,10 +1635,12 @@ def test_result_with_start_index_multi_page(self): def test_result_error(self): from google.cloud import exceptions - query = textwrap.dedent(""" + query = textwrap.dedent( + """ SELECT foo, bar FROM table_baz - WHERE foo == bar""") + WHERE foo == bar""" + ) client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, query, client) @@ -1682,10 +1684,12 @@ def test_result_error(self): assert expected_line in debug_message def test_result_transport_timeout_error(self): - query = textwrap.dedent(""" + query = textwrap.dedent( + """ SELECT foo, bar FROM table_baz - WHERE foo == bar""") + WHERE foo == bar""" + ) client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, query, client) @@ -1739,10 +1743,12 @@ def test_schema(self): def test__begin_error(self): from google.cloud import exceptions - query = textwrap.dedent(""" + query = textwrap.dedent( + """ SELECT foo, bar FROM table_baz - WHERE foo == bar""") + WHERE foo == bar""" + ) client = _make_client(project=self.PROJECT) job = self._make_one(self.JOB_ID, query, client) From 84499894899d60fc8272a809ffb6faf19e0d4940 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 24 Mar 2026 10:38:55 -0700 Subject: [PATCH 4/8] refactor(bigquery): Simplify referenced property graphs and remove unused from_string method --- .../google/cloud/bigquery/job/query.py | 17 ++++------ .../google/cloud/bigquery/table.py | 33 ------------------- 2 files changed, 6 insertions(+), 44 deletions(-) diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index d5318519d9b1..e85e43f6a164 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -1345,19 +1345,14 @@ def referenced_property_graphs(self): if the query has not yet completed. """ property_graphs = [] - datasets_by_project_name = {} for pg in self._job_statistics().get("referencedPropertyGraphs", ()): - pg_project = pg["projectId"] - - ds_id = pg["datasetId"] - pg_dataset = datasets_by_project_name.get((pg_project, ds_id)) - if pg_dataset is None: - pg_dataset = DatasetReference(pg_project, ds_id) - datasets_by_project_name[(pg_project, ds_id)] = pg_dataset - - pg_name = pg["propertyGraphId"] - property_graphs.append(PropertyGraphReference(pg_dataset, pg_name)) + property_graphs.append( + PropertyGraphReference( + DatasetReference(pg["projectId"], pg["datasetId"]), + pg["propertyGraphId"], + ) + ) return property_graphs diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index f6bb65a4e55b..15ee2a61d722 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -413,39 +413,6 @@ def property_graph_id(self) -> str: self._properties, self._PROPERTY_TO_API_FIELD["property_graph_id"] ) - @classmethod - def from_string( - cls, property_graph_id: str, default_project: Optional[str] = None - ) -> "PropertyGraphReference": - """Construct a property graph reference from string. - - Args: - property_graph_id (str): - A property graph ID in standard SQL format. - default_project (Optional[str]): - The project ID to use when ``property_graph_id`` does not - include a project ID. - - Returns: - PropertyGraphReference: Property graph reference parsed from ``property_graph_id``. - """ - from google.cloud.bigquery.dataset import DatasetReference - - ( - output_project_id, - output_dataset_id, - output_property_graph_id, - ) = _helpers._parse_3_part_id( - property_graph_id, - default_project=default_project, - property_name="property_graph_id", - ) - - return cls( - DatasetReference(output_project_id, output_dataset_id), - output_property_graph_id, - ) - @classmethod def from_api_repr(cls, resource: dict) -> "PropertyGraphReference": """Factory: construct a property graph reference given its API representation.""" From 810e121c4547dadb6dd6bae0f5459c918cede134 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 24 Mar 2026 10:48:08 -0700 Subject: [PATCH 5/8] fix(bigquery): Remove non-existent unit_noextras session from nox.options.sessions --- packages/google-cloud-bigquery/noxfile.py | 1 - 1 file changed, 1 deletion(-) diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index e97750051945..d029c84ac5e3 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -68,7 +68,6 @@ def wrapper(*args, **kwargs): # 'docfx' is excluded since it only needs to run in 'docs-presubmit' nox.options.sessions = [ "unit", - "unit_noextras", "mypy", "system", "snippets", From 11957a76d9bc6325834d58b16681c793582996b6 Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 24 Mar 2026 11:23:01 -0700 Subject: [PATCH 6/8] Fix test_cancel_w_custom_retry flakiness in Python 3.12 --- .../tests/unit/job/test_async_job_retry.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py b/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py index 35041aa1b965..1d1eb6eb8ae5 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py @@ -49,8 +49,11 @@ def test_cancel_w_custom_retry(global_time_lock): google.cloud.bigquery.job._JobReference(JOB_ID, PROJECT, "EU"), client ) - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, ValueError) + retry = google.api_core.retry.Retry( + predicate=lambda exc: isinstance(exc, ValueError), + initial=0.01, + maximum=0.01, + deadline=1.0, ) with mock.patch( From 2493dae58a2742f8e6f2b207f95619d8ad37a98c Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Tue, 24 Mar 2026 11:26:37 -0700 Subject: [PATCH 7/8] Fix lint error (unused import) in test_async_job_retry.py --- .../tests/unit/job/test_async_job_retry.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py b/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py index 1d1eb6eb8ae5..4d9cccf932c3 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py @@ -26,8 +26,6 @@ def test_cancel_w_custom_retry(global_time_lock): - from google.cloud.bigquery.retry import DEFAULT_RETRY - api_path = "/projects/{}/jobs/{}/cancel".format(PROJECT, JOB_ID) resource = { "jobReference": { From 7dcd20972ce4d619867c3dbdb40e367e1820031c Mon Sep 17 00:00:00 2001 From: Eric Feiveson Date: Wed, 25 Mar 2026 14:31:05 -0700 Subject: [PATCH 8/8] feat: Refactor _get_graph_schema to use QueryJob.referenced_property_graphs and fix tests --- .../bigquery_magics/bigquery.py | 35 ++--- .../tests/unit/bigquery/test_bigquery.py | 137 +++++++++++++++--- 2 files changed, 130 insertions(+), 42 deletions(-) diff --git a/packages/bigquery-magics/bigquery_magics/bigquery.py b/packages/bigquery-magics/bigquery_magics/bigquery.py index 0826010adca3..a0642e34aec3 100644 --- a/packages/bigquery-magics/bigquery_magics/bigquery.py +++ b/packages/bigquery-magics/bigquery_magics/bigquery.py @@ -635,41 +635,26 @@ def _colab_node_expansion_callback(request: dict, params_str: str): MAX_GRAPH_VISUALIZATION_QUERY_RESULT_SIZE = 100_000 -def _get_graph_name(query_text: str): - """Returns the name of the graph queried. - - Supports GRAPH only, not GRAPH_TABLE. - - Args: - query_text: The SQL query text. - - Returns: - A (dataset_id, graph_id) tuple, or None if the graph name cannot be determined. - """ - match = re.match(r"\s*GRAPH\s+(\S+)\.(\S+)", query_text, re.IGNORECASE) - if match: - (dataset_id, graph_id) = (match.group(1)), match.group(2) - if "`" in dataset_id or "`" in graph_id: - return None # Backticks in graph name not support for schema view - return (dataset_id, graph_id) - return None - - def _get_graph_schema( bq_client: bigquery.client.Client, query_text: str, query_job: bigquery.job.QueryJob ): - graph_name_result = _get_graph_name(query_text) - if graph_name_result is None: + property_graphs = query_job.referenced_property_graphs + if len(property_graphs) != 1: return None - dataset_id, graph_id = graph_name_result + + graph_ref = property_graphs[0] info_schema_query = f""" select PROPERTY_GRAPH_METADATA_JSON - FROM `{query_job.configuration.destination.project}.{dataset_id}`.INFORMATION_SCHEMA.PROPERTY_GRAPHS + FROM `{graph_ref.project}.{graph_ref.dataset_id}`.INFORMATION_SCHEMA.PROPERTY_GRAPHS WHERE PROPERTY_GRAPH_NAME = @graph_id """ job_config = bigquery.QueryJobConfig( - query_parameters=[bigquery.ScalarQueryParameter("graph_id", "STRING", graph_id)] + query_parameters=[ + bigquery.ScalarQueryParameter( + "graph_id", "STRING", graph_ref.property_graph_id + ) + ] ) job_config.use_legacy_sql = False try: diff --git a/packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py b/packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py index efb2c2f82bf6..3be93afdb721 100644 --- a/packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py +++ b/packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py @@ -241,23 +241,78 @@ def test__run_query_dry_run_without_errors_is_silent(): assert len(captured.stdout) == 0 -def test__get_graph_name(): - assert magics._get_graph_name("GRAPH foo.bar") == ("foo", "bar") - assert magics._get_graph_name("GRAPH `foo.bar`") is None - assert magics._get_graph_name("GRAPH `foo`.bar") is None - assert magics._get_graph_name("SELECT 1") is None - - def test__get_graph_schema_exception(): bq_client = mock.create_autospec(bigquery.Client, instance=True) bq_client.query.side_effect = Exception("error") query_text = "GRAPH foo.bar" query_job = mock.Mock() - query_job.configuration.destination.project = "my-project" + + graph_ref = mock.Mock() + graph_ref.project = "my-project" + graph_ref.dataset_id = "dataset" + graph_ref.property_graph_id = "graph" + query_job.referenced_property_graphs = [graph_ref] assert magics._get_graph_schema(bq_client, query_text, query_job) is None +def test__get_graph_schema_zero_references(): + bq_client = mock.create_autospec(bigquery.Client, instance=True) + query_job = mock.Mock() + query_job.referenced_property_graphs = [] + + assert magics._get_graph_schema(bq_client, "SELECT 1", query_job) is None + + +def test__get_graph_schema_two_references(): + bq_client = mock.create_autospec(bigquery.Client, instance=True) + query_job = mock.Mock() + + ref1 = mock.Mock() + ref2 = mock.Mock() + query_job.referenced_property_graphs = [ref1, ref2] + + assert magics._get_graph_schema(bq_client, "SELECT 1", query_job) is None + + +def test__get_graph_schema_success(): + bq_client = mock.create_autospec(bigquery.Client, instance=True) + query_job = mock.Mock() + + graph_ref = mock.Mock() + graph_ref.project = "my-project" + graph_ref.dataset_id = "dataset" + graph_ref.property_graph_id = "graph" + query_job.referenced_property_graphs = [graph_ref] + + mock_df = mock.MagicMock() + mock_df.shape = (1, 1) + mock_df.iloc.__getitem__.return_value = "schema_json" + bq_client.query.return_value.to_dataframe.return_value = mock_df + + with mock.patch( + "bigquery_magics.bigquery.graph_server._convert_schema" + ) as convert_mock: + convert_mock.return_value = {"nodes": [], "edges": []} + + result = magics._get_graph_schema(bq_client, "SELECT 1", query_job) + + assert result == {"nodes": [], "edges": []} + convert_mock.assert_called_once_with("schema_json") + + called_query = bq_client.query.call_args[0][0] + assert ( + "FROM `my-project.dataset`.INFORMATION_SCHEMA.PROPERTY_GRAPHS" + in called_query + ) + + called_config = bq_client.query.call_args[1]["job_config"] + called_params = called_config.query_parameters + assert len(called_params) == 1 + assert called_params[0].name == "graph_id" + assert called_params[0].value == "graph" + + @pytest.mark.skipif( bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @@ -417,6 +472,12 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present", ) def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch): + """If `spanner-graph-notebook` is not installed, the graph visualizer + widget cannot be displayed. + """ + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -468,6 +529,10 @@ def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_int_result(monkeypatch): + """Graph visualization of integer scalars is supported.""" + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -519,6 +584,10 @@ def test_bigquery_graph_int_result(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_str_result(monkeypatch): + """Graph visualization of string scalars is supported.""" + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -570,6 +639,10 @@ def test_bigquery_graph_str_result(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_json_json_result(monkeypatch): + """Graph visualization of JSON objects with valid JSON string fields is supported.""" + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -639,6 +712,9 @@ def test_bigquery_graph_json_json_result(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_json_result(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -758,6 +834,9 @@ def test_bigquery_graph_json_result(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_size_exceeds_max(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -813,6 +892,9 @@ def test_bigquery_graph_size_exceeds_max(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_size_exceeds_query_result_max(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -869,6 +951,9 @@ def test_bigquery_graph_size_exceeds_query_result_max(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_with_args_serialization(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -938,6 +1023,9 @@ def test_bigquery_graph_with_args_serialization(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_colab(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) # Mock the colab module so the code under test uses colab.register_callback(), rather than # GraphServer. sys.modules["google.colab"] = mock.Mock() @@ -1073,6 +1161,9 @@ def test_colab_node_expansion_callback(): reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present", ) def test_bigquery_graph_missing_spanner_deps(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -1142,11 +1233,17 @@ def test_add_graph_widget_with_schema(monkeypatch): query_result = pandas.DataFrame([{"id": 1}], columns=["result"]) query_text = "GRAPH my_dataset.my_graph" - query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True) + query_job = mock.Mock() query_job.configuration.destination.project = "p" query_job.configuration.destination.dataset_id = "d" query_job.configuration.destination.table_id = "t" + graph_ref = mock.Mock() + graph_ref.project = "p" + graph_ref.dataset_id = "my_dataset" + graph_ref.property_graph_id = "my_graph" + query_job.referenced_property_graphs = [graph_ref] + args = mock.Mock() args.bigquery_api_endpoint = "e" args.project = "p" @@ -1203,11 +1300,13 @@ def test_add_graph_widget_no_graph_name(monkeypatch): query_result = pandas.DataFrame([{"id": 1}], columns=["result"]) query_text = "SELECT * FROM my_dataset.my_table" - query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True) + query_job = mock.Mock() query_job.configuration.destination.project = "p" query_job.configuration.destination.dataset_id = "d" query_job.configuration.destination.table_id = "t" + query_job.referenced_property_graphs = [] + args = mock.Mock() args.bigquery_api_endpoint = "e" args.project = "p" @@ -1244,11 +1343,17 @@ def test_add_graph_widget_schema_not_found(monkeypatch): query_result = pandas.DataFrame([{"id": 1}], columns=["result"]) query_text = "GRAPH my_dataset.my_graph" - query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True) + query_job = mock.Mock() query_job.configuration.destination.project = "p" query_job.configuration.destination.dataset_id = "d" query_job.configuration.destination.table_id = "t" + graph_ref = mock.Mock() + graph_ref.project = "p" + graph_ref.dataset_id = "my_dataset" + graph_ref.property_graph_id = "my_graph" + query_job.referenced_property_graphs = [graph_ref] + args = mock.Mock() args.bigquery_api_endpoint = "e" args.project = "p" @@ -1293,9 +1398,8 @@ def test_bigquery_magic_default_connection_user_agent(): client_info_arg = conn.call_args[1].get("client_info") assert client_info_arg is not None - assert ( - client_info_arg.user_agent - == f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}" + assert client_info_arg.user_agent.startswith( + f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}" ) @@ -1611,9 +1715,8 @@ def warning_match(warning): assert kwargs.get("credentials") is mock_credentials client_info = kwargs.get("client_info") assert client_info is not None - assert ( - client_info.user_agent - == f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}" + assert client_info.user_agent.startswith( + f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}" ) query_job_mock.to_dataframe.assert_called_once_with(