diff --git a/.gitignore b/.gitignore index 894a44c..9f8ddea 100644 --- a/.gitignore +++ b/.gitignore @@ -102,3 +102,6 @@ venv.bak/ # mypy .mypy_cache/ + +# Vim +*.swp diff --git a/scripts/conf.json b/scripts/conf.json new file mode 100644 index 0000000..1deb716 --- /dev/null +++ b/scripts/conf.json @@ -0,0 +1,6 @@ +{ + "project": "", + "dataset": "", + "bigquery_credential_secret": { + } +} diff --git a/scripts/extract.py b/scripts/extract.py new file mode 100644 index 0000000..347f8e0 --- /dev/null +++ b/scripts/extract.py @@ -0,0 +1,37 @@ +import json +from typing import Dict + +from google.cloud import bigquery +from google.oauth2 import service_account + + +def get_bigquery_client(bq_credential_secret: Dict[str, str], bq_project_id: str): + return bigquery.Client( + credentials=service_account.Credentials.from_service_account_info(bq_credential_secret), + project=bq_project_id, + ) + + +if __name__ == "__main__": + with open("conf.json", "r") as f: + config = json.load(f) + + bigquery_credential_secret = config["bigquery_credential_secret"] + bq_project_id = config["project"] + client = get_bigquery_client(bigquery_credential_secret, bq_project_id) + + tables = client.list_tables(config["dataset"]) + for each in tables: + print(each.table_id) + + query = f""" + SELECT * FROM `{config["dataset"]}.INFORMATION_SCHEMA.COLUMNS` + """ + query_job = client.query(query) + rows = query_job.result() + for each in rows: + # columns: + # table_catalog, table_schema, table_name, column_name, ordinal_position, is_nullable, + # data_type, is_generated, generation_expression, is_stored, is_hidden, is_updatable, + # is_system_defined, is_partitioning_column, clustering_ordinal_position + print(each.values()) diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000..76c1442 --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,26 @@ +cachetools==4.2.2 +certifi==2021.5.30 +charset-normalizer==2.0.6 +google-api-core==2.0.1 +google-api-python-client==2.21.0 +google-auth==2.1.0 +google-auth-httplib2==0.1.0 +google-cloud-bigquery==2.26.0 +google-cloud-core==2.0.0 +google-crc32c==1.1.2 +google-resumable-media==2.0.2 +googleapis-common-protos==1.53.0 +grpcio==1.40.0 +httplib2==0.19.1 +idna==3.2 +packaging==21.0 +proto-plus==1.19.0 +protobuf==3.18.0 +pyasn1==0.4.8 +pyasn1-modules==0.2.8 +pyparsing==2.4.7 +requests==2.26.0 +rsa==4.7.2 +six==1.16.0 +uritemplate==3.0.1 +urllib3==1.26.6 diff --git a/scripts/test_extract.py b/scripts/test_extract.py new file mode 100644 index 0000000..8289139 --- /dev/null +++ b/scripts/test_extract.py @@ -0,0 +1,40 @@ +from contextlib import ExitStack +from unittest.mock import patch + +from extract import get_bigquery_client + + +def test_bigquery_service_should_create_bigquery_client_with_defined_arguments(): + with ExitStack() as stack: + mock_client = stack.enter_context(patch("extract.bigquery.Client")) + mock_service_account = stack.enter_context( + patch("extract.service_account.Credentials.from_service_account_info") + ) + + bq_credential_secret = { + "type": "service_account", + "project_id": "project-something", + } + bq_project_id = "bq_project_id" + get_bigquery_client(bq_credential_secret, bq_project_id) + + mock_service_account.assert_called_once_with(bq_credential_secret) + mock_client.assert_called_once_with(credentials=mock_service_account.return_value, project=bq_project_id) + + +def test_bigquery_service_should_get_bigquery_client(): + with ExitStack() as stack: + mock_client = stack.enter_context(patch("extract.bigquery.Client")) + mock_service_account = stack.enter_context( + patch("extract.service_account.Credentials.from_service_account_info") + ) + expected = mock_client.return_value = "bigquery_client_object" + + bq_credential_secret = { + "type": "service_account", + "project_id": "project-something", + } + bq_project_id = "bq_project_id" + bigquery_client = get_bigquery_client(bq_credential_secret, bq_project_id) + + assert bigquery_client == expected