Skip to content
35 changes: 10 additions & 25 deletions packages/bigquery-magics/bigquery_magics/bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,41 +635,26 @@ def _colab_node_expansion_callback(request: dict, params_str: str):
MAX_GRAPH_VISUALIZATION_QUERY_RESULT_SIZE = 100_000


def _get_graph_name(query_text: str):
"""Returns the name of the graph queried.
Supports GRAPH only, not GRAPH_TABLE.
Args:
query_text: The SQL query text.
Returns:
A (dataset_id, graph_id) tuple, or None if the graph name cannot be determined.
"""
match = re.match(r"\s*GRAPH\s+(\S+)\.(\S+)", query_text, re.IGNORECASE)
if match:
(dataset_id, graph_id) = (match.group(1)), match.group(2)
if "`" in dataset_id or "`" in graph_id:
return None # Backticks in graph name not support for schema view
return (dataset_id, graph_id)
return None


def _get_graph_schema(
bq_client: bigquery.client.Client, query_text: str, query_job: bigquery.job.QueryJob
):
graph_name_result = _get_graph_name(query_text)
if graph_name_result is None:
property_graphs = query_job.referenced_property_graphs
if len(property_graphs) != 1:
return None
dataset_id, graph_id = graph_name_result

graph_ref = property_graphs[0]

info_schema_query = f"""
select PROPERTY_GRAPH_METADATA_JSON
FROM `{query_job.configuration.destination.project}.{dataset_id}`.INFORMATION_SCHEMA.PROPERTY_GRAPHS
FROM `{graph_ref.project}.{graph_ref.dataset_id}`.INFORMATION_SCHEMA.PROPERTY_GRAPHS
WHERE PROPERTY_GRAPH_NAME = @graph_id
"""
job_config = bigquery.QueryJobConfig(
query_parameters=[bigquery.ScalarQueryParameter("graph_id", "STRING", graph_id)]
query_parameters=[
bigquery.ScalarQueryParameter(
"graph_id", "STRING", graph_ref.property_graph_id
)
]
)
job_config.use_legacy_sql = False
try:
Expand Down
137 changes: 120 additions & 17 deletions packages/bigquery-magics/tests/unit/bigquery/test_bigquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,23 +241,78 @@ def test__run_query_dry_run_without_errors_is_silent():
assert len(captured.stdout) == 0


def test__get_graph_name():
assert magics._get_graph_name("GRAPH foo.bar") == ("foo", "bar")
assert magics._get_graph_name("GRAPH `foo.bar`") is None
assert magics._get_graph_name("GRAPH `foo`.bar") is None
assert magics._get_graph_name("SELECT 1") is None


def test__get_graph_schema_exception():
bq_client = mock.create_autospec(bigquery.Client, instance=True)
bq_client.query.side_effect = Exception("error")
query_text = "GRAPH foo.bar"
query_job = mock.Mock()
query_job.configuration.destination.project = "my-project"

graph_ref = mock.Mock()
graph_ref.project = "my-project"
graph_ref.dataset_id = "dataset"
graph_ref.property_graph_id = "graph"
query_job.referenced_property_graphs = [graph_ref]

assert magics._get_graph_schema(bq_client, query_text, query_job) is None


def test__get_graph_schema_zero_references():
bq_client = mock.create_autospec(bigquery.Client, instance=True)
query_job = mock.Mock()
query_job.referenced_property_graphs = []

assert magics._get_graph_schema(bq_client, "SELECT 1", query_job) is None


def test__get_graph_schema_two_references():
bq_client = mock.create_autospec(bigquery.Client, instance=True)
query_job = mock.Mock()

ref1 = mock.Mock()
ref2 = mock.Mock()
query_job.referenced_property_graphs = [ref1, ref2]

assert magics._get_graph_schema(bq_client, "SELECT 1", query_job) is None


def test__get_graph_schema_success():
bq_client = mock.create_autospec(bigquery.Client, instance=True)
query_job = mock.Mock()

graph_ref = mock.Mock()
graph_ref.project = "my-project"
graph_ref.dataset_id = "dataset"
graph_ref.property_graph_id = "graph"
query_job.referenced_property_graphs = [graph_ref]

mock_df = mock.MagicMock()
mock_df.shape = (1, 1)
mock_df.iloc.__getitem__.return_value = "schema_json"
bq_client.query.return_value.to_dataframe.return_value = mock_df

with mock.patch(
"bigquery_magics.bigquery.graph_server._convert_schema"
) as convert_mock:
convert_mock.return_value = {"nodes": [], "edges": []}

result = magics._get_graph_schema(bq_client, "SELECT 1", query_job)

assert result == {"nodes": [], "edges": []}
convert_mock.assert_called_once_with("schema_json")

called_query = bq_client.query.call_args[0][0]
assert (
"FROM `my-project.dataset`.INFORMATION_SCHEMA.PROPERTY_GRAPHS"
in called_query
)

called_config = bq_client.query.call_args[1]["job_config"]
called_params = called_config.query_parameters
assert len(called_params) == 1
assert called_params[0].name == "graph_id"
assert called_params[0].value == "graph"


@pytest.mark.skipif(
bigquery_storage is None, reason="Requires `google-cloud-bigquery-storage`"
)
Expand Down Expand Up @@ -417,6 +472,12 @@ def test_bigquery_magic_without_optional_arguments(monkeypatch):
reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present",
)
def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch):
"""If `spanner-graph-notebook` is not installed, the graph visualizer
widget cannot be displayed.
"""
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -468,6 +529,10 @@ def test_bigquery_graph_spanner_graph_notebook_missing(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_int_result(monkeypatch):
"""Graph visualization of integer scalars is supported."""
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -519,6 +584,10 @@ def test_bigquery_graph_int_result(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_str_result(monkeypatch):
"""Graph visualization of string scalars is supported."""
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -570,6 +639,10 @@ def test_bigquery_graph_str_result(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_json_json_result(monkeypatch):
"""Graph visualization of JSON objects with valid JSON string fields is supported."""
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -639,6 +712,9 @@ def test_bigquery_graph_json_json_result(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_json_result(monkeypatch):
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -758,6 +834,9 @@ def test_bigquery_graph_json_result(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_size_exceeds_max(monkeypatch):
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -813,6 +892,9 @@ def test_bigquery_graph_size_exceeds_max(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_size_exceeds_query_result_max(monkeypatch):
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -869,6 +951,9 @@ def test_bigquery_graph_size_exceeds_query_result_max(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_with_args_serialization(monkeypatch):
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -938,6 +1023,9 @@ def test_bigquery_graph_with_args_serialization(monkeypatch):
reason="Requires `spanner-graph-notebook` and `google-cloud-bigquery-storage`",
)
def test_bigquery_graph_colab(monkeypatch):
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
# Mock the colab module so the code under test uses colab.register_callback(), rather than
# GraphServer.
sys.modules["google.colab"] = mock.Mock()
Expand Down Expand Up @@ -1073,6 +1161,9 @@ def test_colab_node_expansion_callback():
reason="Requires `spanner-graph-notebook` to be missing and `google-cloud-bigquery-storage` to be present",
)
def test_bigquery_graph_missing_spanner_deps(monkeypatch):
monkeypatch.setattr(
"bigquery_magics.bigquery._get_graph_schema", lambda *args: None
)
globalipapp.start_ipython()
ip = globalipapp.get_ipython()
ip.extension_manager.load_extension("bigquery_magics")
Expand Down Expand Up @@ -1142,11 +1233,17 @@ def test_add_graph_widget_with_schema(monkeypatch):
query_result = pandas.DataFrame([{"id": 1}], columns=["result"])
query_text = "GRAPH my_dataset.my_graph"

query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True)
query_job = mock.Mock()
query_job.configuration.destination.project = "p"
query_job.configuration.destination.dataset_id = "d"
query_job.configuration.destination.table_id = "t"

graph_ref = mock.Mock()
graph_ref.project = "p"
graph_ref.dataset_id = "my_dataset"
graph_ref.property_graph_id = "my_graph"
query_job.referenced_property_graphs = [graph_ref]
Comment on lines +1236 to +1245
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using mock.Mock() is less safe than mock.create_autospec as it doesn't enforce the mocked object's interface. It's better to stick with create_autospec for better test robustness. You can mock the referenced_property_graphs property by setting the underlying _properties dictionary, which is how the property gets its data. This is more aligned with how the actual QueryJob object works.

This feedback also applies to test_add_graph_widget_no_graph_name and test_add_graph_widget_schema_not_found.

Suggested change
query_job = mock.Mock()
query_job.configuration.destination.project = "p"
query_job.configuration.destination.dataset_id = "d"
query_job.configuration.destination.table_id = "t"
graph_ref = mock.Mock()
graph_ref.project = "p"
graph_ref.dataset_id = "my_dataset"
graph_ref.property_graph_id = "my_graph"
query_job.referenced_property_graphs = [graph_ref]
query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True)
query_job.configuration.destination.project = "p"
query_job.configuration.destination.dataset_id = "d"
query_job.configuration.destination.table_id = "t"
graph_ref_resource = {
"projectId": "p",
"datasetId": "my_dataset",
"propertyGraphId": "my_graph",
}
query_job._properties = {
"statistics": {
"query": {"referencedPropertyGraphs": [graph_ref_resource]}
}
}


args = mock.Mock()
args.bigquery_api_endpoint = "e"
args.project = "p"
Expand Down Expand Up @@ -1203,11 +1300,13 @@ def test_add_graph_widget_no_graph_name(monkeypatch):
query_result = pandas.DataFrame([{"id": 1}], columns=["result"])
query_text = "SELECT * FROM my_dataset.my_table"

query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True)
query_job = mock.Mock()
query_job.configuration.destination.project = "p"
query_job.configuration.destination.dataset_id = "d"
query_job.configuration.destination.table_id = "t"

query_job.referenced_property_graphs = []

args = mock.Mock()
args.bigquery_api_endpoint = "e"
args.project = "p"
Expand Down Expand Up @@ -1244,11 +1343,17 @@ def test_add_graph_widget_schema_not_found(monkeypatch):
query_result = pandas.DataFrame([{"id": 1}], columns=["result"])
query_text = "GRAPH my_dataset.my_graph"

query_job = mock.create_autospec(bigquery.job.QueryJob, instance=True)
query_job = mock.Mock()
query_job.configuration.destination.project = "p"
query_job.configuration.destination.dataset_id = "d"
query_job.configuration.destination.table_id = "t"

graph_ref = mock.Mock()
graph_ref.project = "p"
graph_ref.dataset_id = "my_dataset"
graph_ref.property_graph_id = "my_graph"
query_job.referenced_property_graphs = [graph_ref]

args = mock.Mock()
args.bigquery_api_endpoint = "e"
args.project = "p"
Expand Down Expand Up @@ -1293,9 +1398,8 @@ def test_bigquery_magic_default_connection_user_agent():

client_info_arg = conn.call_args[1].get("client_info")
assert client_info_arg is not None
assert (
client_info_arg.user_agent
== f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}"
assert client_info_arg.user_agent.startswith(
f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}"
)


Expand Down Expand Up @@ -1611,9 +1715,8 @@ def warning_match(warning):
assert kwargs.get("credentials") is mock_credentials
client_info = kwargs.get("client_info")
assert client_info is not None
assert (
client_info.user_agent
== f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}"
assert client_info.user_agent.startswith(
f"ipython-{IPython.__version__} bigquery-magics/{bigquery_magics.__version__}"
)

query_job_mock.to_dataframe.assert_called_once_with(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
from google.cloud.bigquery.table import _EmptyRowIterator
from google.cloud.bigquery.table import RangePartitioning
from google.cloud.bigquery.table import _table_arg_to_table_ref
from google.cloud.bigquery.table import TableReference
from google.cloud.bigquery.table import TableReference, PropertyGraphReference
from google.cloud.bigquery.table import TimePartitioning
from google.cloud.bigquery._tqdm_helpers import wait_for_query

Expand Down Expand Up @@ -1332,6 +1332,30 @@ def referenced_tables(self):

return tables

@property
def referenced_property_graphs(self):
"""Return referenced property graphs from job statistics, if present.
See:
https://cloud.google.com/bigquery/docs/reference/rest/v2/Job#JobStatistics2.FIELDS.referenced_property_graphs
Returns:
List[google.cloud.bigquery.table.PropertyGraphReference]:
mappings describing the property graphs, or an empty list
if the query has not yet completed.
"""
property_graphs = []

for pg in self._job_statistics().get("referencedPropertyGraphs", ()):
property_graphs.append(
PropertyGraphReference(
DatasetReference(pg["projectId"], pg["datasetId"]),
pg["propertyGraphId"],
)
)

return property_graphs

@property
def undeclared_query_parameters(self):
"""Return undeclared query parameters from job statistics, if present.
Expand Down
Loading
Loading