From 0385b83d26f8a066051fe7140fd340e4d477df26 Mon Sep 17 00:00:00 2001 From: zkan Date: Sat, 18 Sep 2021 19:24:46 +0700 Subject: [PATCH 1/5] Install google-cloud-bigquery :bear: --- scripts/requirements.txt | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 scripts/requirements.txt diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000..f52de1a --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,22 @@ +cachetools==4.2.2 +certifi==2021.5.30 +charset-normalizer==2.0.6 +google-api-core==2.0.1 +google-auth==2.1.0 +google-cloud-bigquery==2.26.0 +google-cloud-core==2.0.0 +google-crc32c==1.1.2 +google-resumable-media==2.0.2 +googleapis-common-protos==1.53.0 +grpcio==1.40.0 +idna==3.2 +packaging==21.0 +proto-plus==1.19.0 +protobuf==3.18.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pyparsing==2.4.7 +requests==2.26.0 +rsa==4.7.2 +six==1.16.0 +urllib3==1.26.6 From d82d496c56a26709c80898c834189f3237edb855 Mon Sep 17 00:00:00 2001 From: zkan Date: Sat, 18 Sep 2021 19:25:07 +0700 Subject: [PATCH 2/5] Install google-api-python-client :bear: --- scripts/requirements.txt | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/scripts/requirements.txt b/scripts/requirements.txt index f52de1a..76c1442 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -2,13 +2,16 @@ cachetools==4.2.2 certifi==2021.5.30 charset-normalizer==2.0.6 google-api-core==2.0.1 +google-api-python-client==2.21.0 google-auth==2.1.0 +google-auth-httplib2==0.1.0 google-cloud-bigquery==2.26.0 google-cloud-core==2.0.0 google-crc32c==1.1.2 google-resumable-media==2.0.2 googleapis-common-protos==1.53.0 grpcio==1.40.0 +httplib2==0.19.1 idna==3.2 packaging==21.0 proto-plus==1.19.0 @@ -19,4 +22,5 @@ pyparsing==2.4.7 requests==2.26.0 rsa==4.7.2 six==1.16.0 +uritemplate==3.0.1 urllib3==1.26.6 From 8469585c9eb5ff69d29d2c21128a3a544b6ad717 Mon Sep 17 00:00:00 2001 From: zkan Date: Sat, 18 Sep 2021 20:15:17 +0700 Subject: [PATCH 3/5] Ignore *.swp :bear: --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 894a44c..9f8ddea 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,6 @@ venv.bak/ # mypy .mypy_cache/ + +# Vim +*.swp From bb44cd3eda10627ec8a72e7739d1b84dfeedf38d Mon Sep 17 00:00:00 2001 From: zkan Date: Sat, 18 Sep 2021 20:17:38 +0700 Subject: [PATCH 4/5] Extract list of BigQuery table's fields :bear: --- scripts/conf.json | 6 ++++++ scripts/extract.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 scripts/conf.json create mode 100644 scripts/extract.py diff --git a/scripts/conf.json b/scripts/conf.json new file mode 100644 index 0000000..1deb716 --- /dev/null +++ b/scripts/conf.json @@ -0,0 +1,6 @@ +{ + "project": "", + "dataset": "", + "bigquery_credential_secret": { + } +} diff --git a/scripts/extract.py b/scripts/extract.py new file mode 100644 index 0000000..5cb9b73 --- /dev/null +++ b/scripts/extract.py @@ -0,0 +1,32 @@ +import json + +from google.cloud import bigquery +from google.oauth2 import service_account + + +with open("conf.json", "r") as f: + config = json.load(f) + +bigquery_credential_secret = config["bigquery_credential_secret"] +client = bigquery.Client( + credentials=service_account.Credentials.from_service_account_info( + bigquery_credential_secret, + ), + project=config["project"], +) + +tables = client.list_tables(config["dataset"]) +for each in tables: + print(each.table_id) + +query = f""" + SELECT * FROM `{config["dataset"]}.INFORMATION_SCHEMA.COLUMNS` +""" +query_job = client.query(query) +rows = query_job.result() +for each in rows: + # columns: + # table_catalog, table_schema, table_name, column_name, ordinal_position, is_nullable, + # data_type, is_generated, generation_expression, is_stored, is_hidden, is_updatable, + # is_system_defined, is_partitioning_column, clustering_ordinal_position + print(each.values()) From 35c202f543be511ac9274039cdee3a06c8f97607 Mon Sep 17 00:00:00 2001 From: zkan Date: Sat, 23 Oct 2021 20:45:30 +0700 Subject: [PATCH 5/5] Extract BigQuery client creation to func :bear: --- scripts/extract.py | 57 ++++++++++++++++++++++------------------- scripts/test_extract.py | 40 +++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+), 26 deletions(-) create mode 100644 scripts/test_extract.py diff --git a/scripts/extract.py b/scripts/extract.py index 5cb9b73..347f8e0 100644 --- a/scripts/extract.py +++ b/scripts/extract.py @@ -1,32 +1,37 @@ import json +from typing import Dict from google.cloud import bigquery from google.oauth2 import service_account -with open("conf.json", "r") as f: - config = json.load(f) - -bigquery_credential_secret = config["bigquery_credential_secret"] -client = bigquery.Client( - credentials=service_account.Credentials.from_service_account_info( - bigquery_credential_secret, - ), - project=config["project"], -) - -tables = client.list_tables(config["dataset"]) -for each in tables: - print(each.table_id) - -query = f""" - SELECT * FROM `{config["dataset"]}.INFORMATION_SCHEMA.COLUMNS` -""" -query_job = client.query(query) -rows = query_job.result() -for each in rows: - # columns: - # table_catalog, table_schema, table_name, column_name, ordinal_position, is_nullable, - # data_type, is_generated, generation_expression, is_stored, is_hidden, is_updatable, - # is_system_defined, is_partitioning_column, clustering_ordinal_position - print(each.values()) +def get_bigquery_client(bq_credential_secret: Dict[str, str], bq_project_id: str): + return bigquery.Client( + credentials=service_account.Credentials.from_service_account_info(bq_credential_secret), + project=bq_project_id, + ) + + +if __name__ == "__main__": + with open("conf.json", "r") as f: + config = json.load(f) + + bigquery_credential_secret = config["bigquery_credential_secret"] + bq_project_id = config["project"] + client = get_bigquery_client(bigquery_credential_secret, bq_project_id) + + tables = client.list_tables(config["dataset"]) + for each in tables: + print(each.table_id) + + query = f""" + SELECT * FROM `{config["dataset"]}.INFORMATION_SCHEMA.COLUMNS` + """ + query_job = client.query(query) + rows = query_job.result() + for each in rows: + # columns: + # table_catalog, table_schema, table_name, column_name, ordinal_position, is_nullable, + # data_type, is_generated, generation_expression, is_stored, is_hidden, is_updatable, + # is_system_defined, is_partitioning_column, clustering_ordinal_position + print(each.values()) diff --git a/scripts/test_extract.py b/scripts/test_extract.py new file mode 100644 index 0000000..8289139 --- /dev/null +++ b/scripts/test_extract.py @@ -0,0 +1,40 @@ +from contextlib import ExitStack +from unittest.mock import patch + +from extract import get_bigquery_client + + +def test_bigquery_service_should_create_bigquery_client_with_defined_arguments(): + with ExitStack() as stack: + mock_client = stack.enter_context(patch("extract.bigquery.Client")) + mock_service_account = stack.enter_context( + patch("extract.service_account.Credentials.from_service_account_info") + ) + + bq_credential_secret = { + "type": "service_account", + "project_id": "project-something", + } + bq_project_id = "bq_project_id" + get_bigquery_client(bq_credential_secret, bq_project_id) + + mock_service_account.assert_called_once_with(bq_credential_secret) + mock_client.assert_called_once_with(credentials=mock_service_account.return_value, project=bq_project_id) + + +def test_bigquery_service_should_get_bigquery_client(): + with ExitStack() as stack: + mock_client = stack.enter_context(patch("extract.bigquery.Client")) + mock_service_account = stack.enter_context( + patch("extract.service_account.Credentials.from_service_account_info") + ) + expected = mock_client.return_value = "bigquery_client_object" + + bq_credential_secret = { + "type": "service_account", + "project_id": "project-something", + } + bq_project_id = "bq_project_id" + bigquery_client = get_bigquery_client(bq_credential_secret, bq_project_id) + + assert bigquery_client == expected