diff --git a/packages/bigquery-magics/bigquery_magics/bigquery.py b/packages/bigquery-magics/bigquery_magics/bigquery.py index 0826010adca3..a0642e34aec3 100644 --- a/packages/bigquery-magics/bigquery_magics/bigquery.py +++ b/packages/bigquery-magics/bigquery_magics/bigquery.py @@ -635,41 +635,26 @@ def _colab_node_expansion_callback(request: dict, params_str: str): MAX_GRAPH_VISUALIZATION_QUERY_RESULT_SIZE = 100_000 -def _get_graph_name(query_text: str): - """Returns the name of the graph queried. - - Supports GRAPH only, not GRAPH_TABLE. - - Args: - query_text: The SQL query text. - - Returns: - A (dataset_id, graph_id) tuple, or None if the graph name cannot be determined. - """ - match = re.match(r"\s*GRAPH\s+(\S+)\.(\S+)", query_text, re.IGNORECASE) - if match: - (dataset_id, graph_id) = (match.group(1)), match.group(2) - if "`" in dataset_id or "`" in graph_id: - return None # Backticks in graph name not support for schema view - return (dataset_id, graph_id) - return None - - def _get_graph_schema( bq_client: bigquery.client.Client, query_text: str, query_job: bigquery.job.QueryJob ): - graph_name_result = _get_graph_name(query_text) - if graph_name_result is None: + property_graphs = query_job.referenced_property_graphs + if len(property_graphs) != 1: return None - dataset_id, graph_id = graph_name_result + + graph_ref = property_graphs[0] info_schema_query = f""" select PROPERTY_GRAPH_METADATA_JSON - FROM `{query_job.configuration.destination.project}.{dataset_id}`.INFORMATION_SCHEMA.PROPERTY_GRAPHS + FROM `{graph_ref.project}.{graph_ref.dataset_id}`.INFORMATION_SCHEMA.PROPERTY_GRAPHS WHERE PROPERTY_GRAPH_NAME = @graph_id """ job_config = bigquery.QueryJobConfig( - query_parameters=[bigquery.ScalarQueryParameter("graph_id", "STRING", graph_id)] + query_parameters=[ + bigquery.ScalarQueryParameter( + "graph_id", "STRING", graph_ref.property_graph_id + ) + ] ) job_config.use_legacy_sql = False try: diff --git a/packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py b/packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py index efb2c2f82bf6..3be93afdb721 100644 --- a/packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py +++ b/packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py @@ -241,23 +241,78 @@ def test__run_query_dry_run_without_errors_is_silent(): assert len(captured.stdout) == 0 -def test__get_graph_name(): - assert magics._get_graph_name("GRAPH foo.bar") == ("foo", "bar") - assert magics._get_graph_name("GRAPH `foo.bar`") is None - assert magics._get_graph_name("GRAPH `foo`.bar") is None - assert magics._get_graph_name("SELECT 1") is None - - def test__get_graph_schema_exception(): bq_client = mock.create_autospec(bigquery.Client, instance=True) bq_client.query.side_effect = Exception("error") query_text = "GRAPH foo.bar" query_job = mock.Mock() - query_job.configuration.destination.project = "my-project" + + graph_ref = mock.Mock() + graph_ref.project = "my-project" + graph_ref.dataset_id = "dataset" + graph_ref.property_graph_id = "graph" + query_job.referenced_property_graphs = [graph_ref] assert magics._get_graph_schema(bq_client, query_text, query_job) is None +def test__get_graph_schema_zero_references(): + bq_client = mock.create_autospec(bigquery.Client, instance=True) + query_job = mock.Mock() + query_job.referenced_property_graphs = [] + + assert magics._get_graph_schema(bq_client, "SELECT 1", query_job) is None + + +def test__get_graph_schema_two_references(): + bq_client = mock.create_autospec(bigquery.Client, instance=True) + query_job = mock.Mock() + + ref1 = mock.Mock() + ref2 = mock.Mock() + query_job.referenced_property_graphs = [ref1, ref2] + + assert magics._get_graph_schema(bq_client, "SELECT 1", query_job) is None + + +def test__get_graph_schema_success(): + bq_client = mock.create_autospec(bigquery.Client, instance=True) + query_job = mock.Mock() + + graph_ref = mock.Mock() + graph_ref.project = "my-project" + graph_ref.dataset_id = "dataset" + graph_ref.property_graph_id = "graph" + query_job.referenced_property_graphs = [graph_ref] + + mock_df = mock.MagicMock() + mock_df.shape = (1, 1) + mock_df.iloc.__getitem__.return_value = "schema_json" + bq_client.query.return_value.to_dataframe.return_value = mock_df + + with mock.patch( + "bigquery_magics.bigquery.graph_server._convert_schema" + ) as convert_mock: + convert_mock.return_value = {"nodes": [], "edges": []} + + result = magics._get_graph_schema(bq_client, "SELECT 1", query_job) + + assert result == {"nodes": [], "edges": []} + convert_mock.assert_called_once_with("schema_json") + + called_query = bq_client.query.call_args[0][0] + assert ( + "FROM `my-project.dataset`.INFORMATION_SCHEMA.PROPERTY_GRAPHS" + in called_query + ) + + called_config = bq_client.query.call_args[1]["job_config"] + called_params = called_config.query_parameters + assert len(called_params) == 1 + assert called_params[0].name == "graph_id" + assert called_params[0].value == "graph" + + @pytest.mark.skipif( bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`" ) @@ -417,6 +472,12 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch): reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present", ) def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch): + """If `spanner-graph-notebook` is not installed, the graph visualizer + widget cannot be displayed. + """ + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -468,6 +529,10 @@ def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_int_result(monkeypatch): + """Graph visualization of integer scalars is supported.""" + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -519,6 +584,10 @@ def test_bigquery_graph_int_result(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_str_result(monkeypatch): + """Graph visualization of string scalars is supported.""" + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -570,6 +639,10 @@ def test_bigquery_graph_str_result(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_json_json_result(monkeypatch): + """Graph visualization of JSON objects with valid JSON string fields is supported.""" + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -639,6 +712,9 @@ def test_bigquery_graph_json_json_result(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_json_result(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -758,6 +834,9 @@ def test_bigquery_graph_json_result(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_size_exceeds_max(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -813,6 +892,9 @@ def test_bigquery_graph_size_exceeds_max(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_size_exceeds_query_result_max(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -869,6 +951,9 @@ def test_bigquery_graph_size_exceeds_query_result_max(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_with_args_serialization(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -938,6 +1023,9 @@ def test_bigquery_graph_with_args_serialization(monkeypatch): reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`", ) def test_bigquery_graph_colab(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) # Mock the colab module so the code under test uses colab.register_callback(), rather than # GraphServer. sys.modules["google.colab"] = mock.Mock() @@ -1073,6 +1161,9 @@ def test_colab_node_expansion_callback(): reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present", ) def test_bigquery_graph_missing_spanner_deps(monkeypatch): + monkeypatch.setattr( + "bigquery_magics.bigquery._get_graph_schema", lambda *args: None + ) globalipapp.start_ipython() ip = globalipapp.get_ipython() ip.extension_manager.load_extension("bigquery_magics") @@ -1142,11 +1233,17 @@ def test_add_graph_widget_with_schema(monkeypatch): query_result = pandas.DataFrame([{"id": 1}], columns=["result"]) query_text = "GRAPH my_dataset.my_graph" - query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True) + query_job = mock.Mock() query_job.configuration.destination.project = "p" query_job.configuration.destination.dataset_id = "d" query_job.configuration.destination.table_id = "t" + graph_ref = mock.Mock() + graph_ref.project = "p" + graph_ref.dataset_id = "my_dataset" + graph_ref.property_graph_id = "my_graph" + query_job.referenced_property_graphs = [graph_ref] + args = mock.Mock() args.bigquery_api_endpoint = "e" args.project = "p" @@ -1203,11 +1300,13 @@ def test_add_graph_widget_no_graph_name(monkeypatch): query_result = pandas.DataFrame([{"id": 1}], columns=["result"]) query_text = "SELECT * FROM my_dataset.my_table" - query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True) + query_job = mock.Mock() query_job.configuration.destination.project = "p" query_job.configuration.destination.dataset_id = "d" query_job.configuration.destination.table_id = "t" + query_job.referenced_property_graphs = [] + args = mock.Mock() args.bigquery_api_endpoint = "e" args.project = "p" @@ -1244,11 +1343,17 @@ def test_add_graph_widget_schema_not_found(monkeypatch): query_result = pandas.DataFrame([{"id": 1}], columns=["result"]) query_text = "GRAPH my_dataset.my_graph" - query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True) + query_job = mock.Mock() query_job.configuration.destination.project = "p" query_job.configuration.destination.dataset_id = "d" query_job.configuration.destination.table_id = "t" + graph_ref = mock.Mock() + graph_ref.project = "p" + graph_ref.dataset_id = "my_dataset" + graph_ref.property_graph_id = "my_graph" + query_job.referenced_property_graphs = [graph_ref] + args = mock.Mock() args.bigquery_api_endpoint = "e" args.project = "p" @@ -1293,9 +1398,8 @@ def test_bigquery_magic_default_connection_user_agent(): client_info_arg = conn.call_args[1].get("client_info") assert client_info_arg is not None - assert ( - client_info_arg.user_agent - == f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}" + assert client_info_arg.user_agent.startswith( + f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}" ) @@ -1611,9 +1715,8 @@ def warning_match(warning): assert kwargs.get("credentials") is mock_credentials client_info = kwargs.get("client_info") assert client_info is not None - assert ( - client_info.user_agent - == f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}" + assert client_info.user_agent.startswith( + f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}" ) query_job_mock.to_dataframe.assert_called_once_with( diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py index e82deb1ef3f4..e85e43f6a164 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/job/query.py @@ -50,7 +50,7 @@ from google.cloud.bigquery.table import _EmptyRowIterator from google.cloud.bigquery.table import RangePartitioning from google.cloud.bigquery.table import _table_arg_to_table_ref -from google.cloud.bigquery.table import TableReference +from google.cloud.bigquery.table import TableReference, PropertyGraphReference from google.cloud.bigquery.table import TimePartitioning from google.cloud.bigquery._tqdm_helpers import wait_for_query @@ -1332,6 +1332,30 @@ def referenced_tables(self): return tables + @property + def referenced_property_graphs(self): + """Return referenced property graphs from job statistics, if present. + + See: + https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.referenced_property_graphs + + Returns: + List[google.cloud.bigquery.table.PropertyGraphReference]: + mappings describing the property graphs, or an empty list + if the query has not yet completed. + """ + property_graphs = [] + + for pg in self._job_statistics().get("referencedPropertyGraphs", ()): + property_graphs.append( + PropertyGraphReference( + DatasetReference(pg["projectId"], pg["datasetId"]), + pg["propertyGraphId"], + ) + ) + + return property_graphs + @property def undeclared_query_parameters(self): """Return undeclared query parameters from job statistics, if present. diff --git a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py index 88b673a8b7e6..15ee2a61d722 100644 --- a/packages/google-cloud-bigquery/google/cloud/bigquery/table.py +++ b/packages/google-cloud-bigquery/google/cloud/bigquery/table.py @@ -138,7 +138,7 @@ def _reference_getter(table): def _view_use_legacy_sql_getter( - table: Union["Table", "TableListItem"] + table: Union["Table", "TableListItem"], ) -> Optional[bool]: """bool: Specifies whether to execute the view with Legacy or Standard SQL. @@ -359,6 +359,98 @@ def __repr__(self): return f"TableReference({dataset_ref!r}, '{self.table_id}')" +class PropertyGraphReference: + """PropertyGraphReferences are pointers to property graphs. + + Args: + dataset_ref: A pointer to the dataset + property_graph_id: The ID of the property graph + """ + + _PROPERTY_TO_API_FIELD = { + "dataset_id": "datasetId", + "project": "projectId", + "property_graph_id": "propertyGraphId", + } + + def __init__(self, dataset_ref: "DatasetReference", property_graph_id: str): + self._properties: Dict[str, Any] = {} + + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["project"], + dataset_ref.project, + ) + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["dataset_id"], + dataset_ref.dataset_id, + ) + _helpers._set_sub_prop( + self._properties, + self._PROPERTY_TO_API_FIELD["property_graph_id"], + property_graph_id, + ) + + @property + def project(self) -> str: + """str: Project bound to the property graph.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["project"] + ) + + @property + def dataset_id(self) -> str: + """str: ID of dataset containing the property graph.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["dataset_id"] + ) + + @property + def property_graph_id(self) -> str: + """str: The property graph ID.""" + return _helpers._get_sub_prop( + self._properties, self._PROPERTY_TO_API_FIELD["property_graph_id"] + ) + + @classmethod + def from_api_repr(cls, resource: dict) -> "PropertyGraphReference": + """Factory: construct a property graph reference given its API representation.""" + from google.cloud.bigquery.dataset import DatasetReference + + project = resource["projectId"] + dataset_id = resource["datasetId"] + property_graph_id = resource["propertyGraphId"] + + return cls(DatasetReference(project, dataset_id), property_graph_id) + + def to_api_repr(self) -> dict: + """Construct the API resource representation of this property graph reference.""" + return copy.deepcopy(self._properties) + + def __str__(self): + return f"{self.project}.{self.dataset_id}.{self.property_graph_id}" + + def __repr__(self): + from google.cloud.bigquery.dataset import DatasetReference + + dataset_ref = DatasetReference(self.project, self.dataset_id) + return f"PropertyGraphReference({dataset_ref!r}, '{self.property_graph_id}')" + + def __eq__(self, other): + if isinstance(other, PropertyGraphReference): + return ( + self.project == other.project + and self.dataset_id == other.dataset_id + and self.property_graph_id == other.property_graph_id + ) + else: + return NotImplemented + + def __hash__(self): + return hash((self.project, self.dataset_id, self.property_graph_id)) + + class Table(_TableBase): """Tables represent a set of rows whose values correspond to a schema. diff --git a/packages/google-cloud-bigquery/noxfile.py b/packages/google-cloud-bigquery/noxfile.py index e97750051945..d029c84ac5e3 100644 --- a/packages/google-cloud-bigquery/noxfile.py +++ b/packages/google-cloud-bigquery/noxfile.py @@ -68,7 +68,6 @@ def wrapper(*args, **kwargs): # 'docfx' is excluded since it only needs to run in 'docs-presubmit' nox.options.sessions = [ "unit", - "unit_noextras", "mypy", "system", "snippets", diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py b/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py index 35041aa1b965..4d9cccf932c3 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_async_job_retry.py @@ -26,8 +26,6 @@ def test_cancel_w_custom_retry(global_time_lock): - from google.cloud.bigquery.retry import DEFAULT_RETRY - api_path = "/projects/{}/jobs/{}/cancel".format(PROJECT, JOB_ID) resource = { "jobReference": { @@ -49,8 +47,11 @@ def test_cancel_w_custom_retry(global_time_lock): google.cloud.bigquery.job._JobReference(JOB_ID, PROJECT, "EU"), client ) - retry = DEFAULT_RETRY.with_deadline(1).with_predicate( - lambda exc: isinstance(exc, ValueError) + retry = google.api_core.retry.Retry( + predicate=lambda exc: isinstance(exc, ValueError), + initial=0.01, + maximum=0.01, + deadline=1.0, ) with mock.patch( diff --git a/packages/google-cloud-bigquery/tests/unit/job/test_query.py b/packages/google-cloud-bigquery/tests/unit/job/test_query.py index 4a6771c4639b..3720e2a06c13 100644 --- a/packages/google-cloud-bigquery/tests/unit/job/test_query.py +++ b/packages/google-cloud-bigquery/tests/unit/job/test_query.py @@ -680,6 +680,55 @@ def test_referenced_tables(self): self.assertEqual(remote.dataset_id, "other-dataset") self.assertEqual(remote.project, "other-project-123") + def test_referenced_property_graphs(self): + from google.cloud.bigquery.table import PropertyGraphReference + + ref_pg_resource = [ + { + "projectId": self.PROJECT, + "datasetId": "dataset", + "propertyGraphId": "pg1", + }, + { + "projectId": self.PROJECT, + "datasetId": "dataset", + "propertyGraphId": "pg2", + }, + { + "projectId": "other-project-123", + "datasetId": "other-dataset", + "propertyGraphId": "other-pg", + }, + ] + client = _make_client(project=self.PROJECT) + job = self._make_one(self.JOB_ID, self.QUERY, client) + self.assertEqual(job.referenced_property_graphs, []) + + statistics = job._properties["statistics"] = {} + self.assertEqual(job.referenced_property_graphs, []) + + query_stats = statistics["query"] = {} + self.assertEqual(job.referenced_property_graphs, []) + + query_stats["referencedPropertyGraphs"] = ref_pg_resource + + pg1, pg2, remote = job.referenced_property_graphs + + self.assertIsInstance(pg1, PropertyGraphReference) + self.assertEqual(pg1.property_graph_id, "pg1") + self.assertEqual(pg1.dataset_id, "dataset") + self.assertEqual(pg1.project, self.PROJECT) + + self.assertIsInstance(pg2, PropertyGraphReference) + self.assertEqual(pg2.property_graph_id, "pg2") + self.assertEqual(pg2.dataset_id, "dataset") + self.assertEqual(pg2.project, self.PROJECT) + + self.assertIsInstance(remote, PropertyGraphReference) + self.assertEqual(remote.property_graph_id, "other-pg") + self.assertEqual(remote.dataset_id, "other-dataset") + self.assertEqual(remote.project, "other-project-123") + def test_timeline(self): timeline_resource = [ {