From 5f91984a0f570c20d3b172568190bb40d5f76e40 Mon Sep 17 00:00:00 2001 From: Daniel Giles Date: Thu, 13 Nov 2025 13:33:32 -0500 Subject: [PATCH 1/8] Added a json parser method to the Datalink mixins --- pyvo/dal/adhoc.py | 168 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) diff --git a/pyvo/dal/adhoc.py b/pyvo/dal/adhoc.py index 42d7d515..b377f71c 100644 --- a/pyvo/dal/adhoc.py +++ b/pyvo/dal/adhoc.py @@ -396,6 +396,48 @@ def iter_datalinks(self, preserve_order=False): yield from self._iter_datalinks_from_dlblock( preserve_order=preserve_order) + def iter_parse_json_params( + self, + colname: str="cloud_access", + key: str="aws", + verbose: bool=False, + **match_params + ): + + for irow, record in enumerate(self): + access_points = record.parse_json_params( + colname=colname, + key=key, + verbose=verbose, + **match_params + ) + access_points.add_column([irow]*len(access_points), name="record_row", index=0) + if irow == 0: + new_table = access_points + else: + for row in access_points.iterrows(): + new_table.add_row(row) + + return new_table + + def iter_get_cloud_params( + self, + colname: str="cloud_access", + provider: str="aws", + verbose: bool=False, + **match_params + ): + for irow, record in enumerate(self): + # do the json parsing + cloud_params = record.get_cloud_params(colname, provider, verbose, **match_params) + cloud_params.add_column([irow]*len(cloud_params), name="record_row", index=0) + if irow == 0: + new_table = cloud_params + else: + for row in cloud_params.iterrows(): + new_table.add_row(row) + + return new_table class DatalinkRecordMixin: """ @@ -445,6 +487,110 @@ def getdataset(self, timeout=None): # this should go to Record.getdataset() return super().getdataset(timeout=timeout) + def parse_json_params( + self, + colname: str="cloud_access", + key: str="aws", + verbose: bool=False, + **match_params + ): + """Parse information stored as JSON by key + + Parameters + ---------- + colname: str + name of column to search in + provider: str, optional + name of data provider: only 'aws' is presently supported. + verbose: bool + If True, print progress and debug text. + + Return + ------ + A dict or a list of dict of parameters for every row in products + + """ + import json + + # init results table (avoiding adding import of astropy.table.Table) + new_table = TableElement(VOTableFile()).to_table() + + if verbose: + print(f'searching for and processing json column {colname}') + + try: + jsontxt = self[colname] + jsonDict = json.loads(jsontxt) + if key not in jsonDict and verbose: + print(f'No key "{key}" found for record' + 'in column "{colname}"') + else: + p_params = jsonDict[key] + checks = [] + for k, value in match_params.items(): + checks.append(p_params.getitem(k, value) == value) + + if all(checks): + if not isinstance(p_params, list): + p_params = [p_params] + colnames = list(p_params[0].keys()) + colvals = [[] for _ in colnames] + for ppar in p_params: + for idx, val in enumerate(ppar.values()): + colvals[idx].append(val) + new_table.add_columns(cols=colvals, names=colnames) + + except KeyError: + # no json column, return empty list + if verbose: + print(f'No column {colname} found for record.') + + return new_table + + def get_cloud_params(self, colname="cloud_access", provider="aws", verbose=False, **match_params): + """Parse information stored as JSON by key + + Parameters + ---------- + colname: str + name of column to search in + provider: str, optional + name of data provider: only 'aws' is presently supported. + verbose: bool + If True, print progress and debug text. + + Return + ------ + An astropy Table with parameters for every row in the datalinks + + """ + dl_results = self.getdatalink() + products = dl_results.bysemantics("#this") + + + for irow, row in enumerate(products): + # if no colname column, there is nothing to do + try: + access_points = row.parse_json_params( + colname=colname, + key=provider, + verbose=verbose, + **match_params + ) + access_points.add_column([irow]*len(access_points), name="datalink_row", index=0) + if irow == 0: + new_table = access_points + else: + for row in access_points.iterrows(): + new_table.add_row(row) + except KeyError: + # no json column, continue + if verbose: + print(f'No column {colname} found for row {irow}') + new_table = TableElement(VOTableFile()).to_table() + continue + + return new_table class DatalinkService(DALService, AvailabilityMixin, CapabilityMixin): """ @@ -839,6 +985,28 @@ def from_result_url(cls, result_url, *, session=None, original_row=None): res.original_row = original_row return res + def get_cloud_params(self, colname="cloud_access", provider="aws", verbose=False, **match_params): + products = list(self.bysemantics("#this")) + rows_access_points = [[] for i in range(len(products))] + + for irow, row in enumerate(products): + # if no colname column, there is nothing to do + try: + access_points = row.parse_json_params( + colname=colname, + key=provider, + verbose=verbose, + **match_params + ) + rows_access_points[irow].append(access_points) + except KeyError: + # no json column, continue + if verbose: + print(f'No column {colname} found for row {irow}') + continue + + return rows_access_points + class SodaRecordMixin: """ From 36370b3c7876f2d643305c91a97a21b9b5202246 Mon Sep 17 00:00:00 2001 From: Daniel Giles Date: Fri, 21 Nov 2025 16:43:09 -0500 Subject: [PATCH 2/8] Upated iter method to use iter_datalinks, added documentation and tests --- docs/dal/index.rst | 9 +++ pyvo/dal/adhoc.py | 102 +++++++++++++++++++++++++++----- pyvo/dal/tests/test_datalink.py | 73 +++++++++++++++++++++++ 3 files changed, 170 insertions(+), 14 deletions(-) diff --git a/docs/dal/index.rst b/docs/dal/index.rst index 3d2b566b..7f05dd77 100644 --- a/docs/dal/index.rst +++ b/docs/dal/index.rst @@ -869,6 +869,15 @@ DatalinkResults using >>> next(datalink.bysemantics("#this")).content_type 'application/fits' +As data becomes available from different cloud providers, some services are +including cloud access information in the associated Datalink in JSON format. +The ``get_cloud_params`` and ``iter_get_cloud_params`` fucntions for +Records and Results respectively return an ``astropy.Table`` with the +parameters specified to access data via the cloud service provider specified. + +More generic functions, ``parse_json_params`` and ``iter_parse_json_params``, +act directly on the Record or Results object and can parse JSON columns with +a given column name, key, and optionally parameters to match. Server-side processing ---------------------- diff --git a/pyvo/dal/adhoc.py b/pyvo/dal/adhoc.py index b377f71c..6dff390d 100644 --- a/pyvo/dal/adhoc.py +++ b/pyvo/dal/adhoc.py @@ -403,7 +403,28 @@ def iter_parse_json_params( verbose: bool=False, **match_params ): + """ + Iterate over all Records in a DalResult and return parsed json parameters. + + Parameters + ---------- + colname : str, optional + The column containing JSON to be parsed, by default "cloud_access" + key : str, optional + The key to filter JSON results by, by default "aws" + verbose : bool, optional + Whether to print progress and errors, by default False + **match_params : str, optional + Any further parameters to match on. + Returns + ------- + astropy.Table + A table containing the JSON parameters separated into columns, each + row corresponding to a matching JSON entry for each DataLinkRecord + for each row of the original DalResult. + + """ for irow, record in enumerate(self): access_points = record.parse_json_params( colname=colname, @@ -427,9 +448,55 @@ def iter_get_cloud_params( verbose: bool=False, **match_params ): - for irow, record in enumerate(self): + """ + Iterate over all Records in a DalResult and return parsed cloud parameters. + + Parameters + ---------- + colname : str, optional + The column containing JSON to be parsed, by default "cloud_access" + provider : str, optional + The key to filter JSON results by, by default "aws" + verbose : bool, optional + Whether to print progress and errors, by default False + **match_params : str, optional + Any further parameters to match on. + + Returns + ------- + astropy.Table + A table containing the JSON parameters separated into columns, each + row corresponding to matching JSON entries from each Record. + + """ + for irow, dl_results in enumerate(self.iter_datalinks()): + + products = dl_results.bysemantics("#this") + + for jrow, row in enumerate(products): + # if no colname column, there is nothing to do + try: + access_points = row.parse_json_params( + colname=colname, + key=provider, + verbose=verbose, + **match_params + ) + access_points.add_column([jrow]*len(access_points), name="datalink_row", index=0) + if jrow == 0: + new_dl_table = access_points + else: + for row in access_points.iterrows(): + new_dl_table.add_row(row) + except KeyError: + # no json column, continue + if verbose: + print(f'No column {colname} found for row {irow}, datalink {jrow}') + new_dl_table = TableElement(VOTableFile()).to_table() + continue + # do the json parsing - cloud_params = record.get_cloud_params(colname, provider, verbose, **match_params) + cloud_params = access_points cloud_params.add_column([irow]*len(cloud_params), name="record_row", index=0) if irow == 0: new_table = cloud_params @@ -498,16 +565,20 @@ def parse_json_params( Parameters ---------- - colname: str - name of column to search in - provider: str, optional - name of data provider: only 'aws' is presently supported. - verbose: bool - If True, print progress and debug text. - - Return - ------ - A dict or a list of dict of parameters for every row in products + colname : str, optional + The column containing JSON to be parsed, by default "cloud_access" + key : str, optional + The key to filter JSON results by, by default "aws" + verbose : bool, optional + Whether to print progress and errors, by default False + **match_params : str, optional + Any further parameters to match on. + + Returns + ------- + astropy.Table + A table containing the JSON parameters separated into columns, each + row representing a matching JSON entry. """ import json @@ -548,7 +619,7 @@ def parse_json_params( return new_table def get_cloud_params(self, colname="cloud_access", provider="aws", verbose=False, **match_params): - """Parse information stored as JSON by key + """Parse cloud information stored as JSON by provider Parameters ---------- @@ -558,10 +629,13 @@ def get_cloud_params(self, colname="cloud_access", provider="aws", verbose=False name of data provider: only 'aws' is presently supported. verbose: bool If True, print progress and debug text. + **match_params Return ------ - An astropy Table with parameters for every row in the datalinks + astropy.Table + A table containing the JSON parameters separated into columns, + each row being a unique JSON entry and/or from a different DatalinkRecord. """ dl_results = self.getdatalink() diff --git a/pyvo/dal/tests/test_datalink.py b/pyvo/dal/tests/test_datalink.py index 559d2c92..14a40b9c 100644 --- a/pyvo/dal/tests/test_datalink.py +++ b/pyvo/dal/tests/test_datalink.py @@ -4,10 +4,12 @@ Tests for pyvo.dal.datalink """ from functools import partial +from io import BytesIO import re import pytest +from astropy.io.votable import parse, writeto, from_table import pyvo as vo from pyvo.dal.adhoc import DatalinkResults, DALServiceError from pyvo.dal.sia2 import SIA2Results @@ -74,6 +76,21 @@ def callback(request, context): ) as matcher: yield matcher +@pytest.fixture() +def datalink_cloud(mocker): + def callback(request, context): + dl_base = parse('pyvo/dal/tests/data/datalink/datalink.xml') + dl_base_table = dl_base.get_first_table().to_table() + cloud_access_str = '{"aws": {"bucket_name": "test", "key":"path/to/cloudfile.fits", "region": "us-west-2"}}' + dl_base_table.add_column([cloud_access_str]*4, name='cloud_access') + out = BytesIO() + writeto(from_table(dl_base_table), out) + return out.getvalue() + + with mocker.register_uri( + 'GET', 'http://example.com/datalink-cloud.xml', content=callback + ) as matcher: + yield matcher @pytest.fixture() def obscore_datalink(mocker): @@ -336,3 +353,59 @@ def test_no_datalink(): result = results[0] with pytest.raises(DALServiceError, match="No datalink found for record."): result.getdatalink() + +@pytest.mark.filterwarnings("ignore::astropy.io.votable.exceptions.E02") +@pytest.mark.usefixtures('datalink_cloud') +class TestJsonColumns: + """Tests for producing datalinks from tables containing links to + datalink documents. + """ + + res = testing.create_dalresults([ + {"name": "access_url", "datatype": "char", "arraysize": "*", + "ucd": "meta.ref.url"}, + {"name": "access_format", "datatype": "char", "arraysize": "*", + "utype": "meta.code.mime"}, + {"name": "cloud_access", "datatype": "char", "arraysize": "*"},], + [("http://example.com/datalink-cloud.xml", + "application/x-votable+xml;content=datalink", + '{"aws": {"bucket_name": "test", "key":"path/to/file1.fits", "region": "us-west-2"}}',), + ("http://example.com/datalink-cloud.xml", + "application/x-votable+xml;content=datalink", + '{"aws": {"bucket_name": "test", "key":"path/to/file2.fits", "region": "us-west-2"}}',)], + resultsClass=SIA2Results + ) + def test_record_w_json(self): + + + parsed_json_matches = self.res[0].parse_json_params("cloud_access", "aws") + assert parsed_json_matches[0]["bucket_name"] == "test" + assert parsed_json_matches[0]["key"] == "path/to/file1.fits" + assert parsed_json_matches[0]["region"] == "us-west-2" + + def test_iter_json(self): + + parsed_json_matches = self.res.iter_parse_json_params("cloud_access", "aws") + assert parsed_json_matches[0]["record_row"] == 0 + assert parsed_json_matches[0]["bucket_name"] == "test" + assert parsed_json_matches[0]["key"] == "path/to/file1.fits" + assert parsed_json_matches[0]["region"] == "us-west-2" + assert parsed_json_matches[1]["record_row"] == 1 + assert parsed_json_matches[1]["key"] == "path/to/file2.fits" + + def test_datalink_json(self): + parsed_cloud_params = self.res[0].get_cloud_params("cloud_access", "aws") + assert parsed_cloud_params[0]["bucket_name"] == "test" + assert parsed_cloud_params[0]["key"] == "path/to/cloudfile.fits" + assert parsed_cloud_params[0]["region"] == "us-west-2" + + def test_iter_datalink_json(self): + parsed_json_matches = self.res.iter_get_cloud_params("cloud_access", "aws") + assert parsed_json_matches[0]["record_row"] == 0 + assert parsed_json_matches[0]["datalink_row"] == 0 + assert parsed_json_matches[0]["bucket_name"] == "test" + assert parsed_json_matches[0]["key"] == "path/to/cloudfile.fits" + assert parsed_json_matches[0]["region"] == "us-west-2" + assert parsed_json_matches[1]["record_row"] == 1 + assert parsed_json_matches[1]["datalink_row"] == 0 + assert parsed_json_matches[1]["key"] == "path/to/cloudfile.fits" \ No newline at end of file From b8a6b583eb4e32f9128fb327440e4d7ae466ad90 Mon Sep 17 00:00:00 2001 From: Daniel Giles Date: Mon, 24 Nov 2025 15:28:49 -0500 Subject: [PATCH 3/8] Upated documentation, removed duplicate code, fixed formatting --- docs/dal/index.rst | 4 +++- pyvo/dal/adhoc.py | 22 ---------------------- pyvo/dal/tests/test_datalink.py | 2 +- 3 files changed, 4 insertions(+), 24 deletions(-) diff --git a/docs/dal/index.rst b/docs/dal/index.rst index 7f05dd77..858c99e3 100644 --- a/docs/dal/index.rst +++ b/docs/dal/index.rst @@ -869,9 +869,11 @@ DatalinkResults using >>> next(datalink.bysemantics("#this")).content_type 'application/fits' +Cloud Access Information [subject to change] +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ As data becomes available from different cloud providers, some services are including cloud access information in the associated Datalink in JSON format. -The ``get_cloud_params`` and ``iter_get_cloud_params`` fucntions for +The ``get_cloud_params`` and ``iter_get_cloud_params`` functions for Records and Results respectively return an ``astropy.Table`` with the parameters specified to access data via the cloud service provider specified. diff --git a/pyvo/dal/adhoc.py b/pyvo/dal/adhoc.py index 6dff390d..779dc1d8 100644 --- a/pyvo/dal/adhoc.py +++ b/pyvo/dal/adhoc.py @@ -1059,28 +1059,6 @@ def from_result_url(cls, result_url, *, session=None, original_row=None): res.original_row = original_row return res - def get_cloud_params(self, colname="cloud_access", provider="aws", verbose=False, **match_params): - products = list(self.bysemantics("#this")) - rows_access_points = [[] for i in range(len(products))] - - for irow, row in enumerate(products): - # if no colname column, there is nothing to do - try: - access_points = row.parse_json_params( - colname=colname, - key=provider, - verbose=verbose, - **match_params - ) - rows_access_points[irow].append(access_points) - except KeyError: - # no json column, continue - if verbose: - print(f'No column {colname} found for row {irow}') - continue - - return rows_access_points - class SodaRecordMixin: """ diff --git a/pyvo/dal/tests/test_datalink.py b/pyvo/dal/tests/test_datalink.py index 14a40b9c..695efdaf 100644 --- a/pyvo/dal/tests/test_datalink.py +++ b/pyvo/dal/tests/test_datalink.py @@ -408,4 +408,4 @@ def test_iter_datalink_json(self): assert parsed_json_matches[0]["region"] == "us-west-2" assert parsed_json_matches[1]["record_row"] == 1 assert parsed_json_matches[1]["datalink_row"] == 0 - assert parsed_json_matches[1]["key"] == "path/to/cloudfile.fits" \ No newline at end of file + assert parsed_json_matches[1]["key"] == "path/to/cloudfile.fits" From e0dcec8117c3a120da921e21db3eaf2b7d5dc90d Mon Sep 17 00:00:00 2001 From: Daniel Giles Date: Wed, 3 Dec 2025 14:01:15 -0500 Subject: [PATCH 4/8] docstring updates --- pyvo/dal/adhoc.py | 63 ++++++++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 28 deletions(-) diff --git a/pyvo/dal/adhoc.py b/pyvo/dal/adhoc.py index 779dc1d8..350a780d 100644 --- a/pyvo/dal/adhoc.py +++ b/pyvo/dal/adhoc.py @@ -398,8 +398,8 @@ def iter_datalinks(self, preserve_order=False): def iter_parse_json_params( self, + json_key: str, colname: str="cloud_access", - key: str="aws", verbose: bool=False, **match_params ): @@ -408,14 +408,14 @@ def iter_parse_json_params( Parameters ---------- + json_key : str + The primary key by which to filter JSON results. colname : str, optional - The column containing JSON to be parsed, by default "cloud_access" - key : str, optional - The key to filter JSON results by, by default "aws" + The column containing JSON to be parsed, by default "cloud_access". verbose : bool, optional - Whether to print progress and errors, by default False + Whether to print progress and errors, by default False. **match_params : str, optional - Any further parameters to match on. + Further parameters on which to match beyond `json_key`. Returns ------- @@ -428,7 +428,7 @@ def iter_parse_json_params( for irow, record in enumerate(self): access_points = record.parse_json_params( colname=colname, - key=key, + json_key=json_key, verbose=verbose, **match_params ) @@ -443,8 +443,8 @@ def iter_parse_json_params( def iter_get_cloud_params( self, + provider: str, colname: str="cloud_access", - provider: str="aws", verbose: bool=False, **match_params ): @@ -453,14 +453,14 @@ def iter_get_cloud_params( Parameters ---------- + provider : str + Name of the data provider. colname : str, optional - The column containing JSON to be parsed, by default "cloud_access" - provider : str, optional - The key to filter JSON results by, by default "aws" + The column containing cloud access JSON, by default "cloud_access". verbose : bool, optional - Whether to print progress and errors, by default False + Whether to print debug text, by default False. **match_params : str, optional - Any further parameters to match on. + Further parameters on which to match beyond `provider`. Returns ------- @@ -478,7 +478,7 @@ def iter_get_cloud_params( try: access_points = row.parse_json_params( colname=colname, - key=provider, + json_key=provider, verbose=verbose, **match_params ) @@ -556,8 +556,8 @@ def getdataset(self, timeout=None): def parse_json_params( self, + json_key: str, colname: str="cloud_access", - key: str="aws", verbose: bool=False, **match_params ): @@ -565,14 +565,14 @@ def parse_json_params( Parameters ---------- + json_key : str + The primary key by which to filter JSON results colname : str, optional The column containing JSON to be parsed, by default "cloud_access" - key : str, optional - The key to filter JSON results by, by default "aws" verbose : bool, optional Whether to print progress and errors, by default False **match_params : str, optional - Any further parameters to match on. + Further parameters on which to match beyond `json_key`. Returns ------- @@ -592,11 +592,11 @@ def parse_json_params( try: jsontxt = self[colname] jsonDict = json.loads(jsontxt) - if key not in jsonDict and verbose: - print(f'No key "{key}" found for record' + if json_key not in jsonDict and verbose: + print(f'No key "{json_key}" found for record' 'in column "{colname}"') else: - p_params = jsonDict[key] + p_params = jsonDict[json_key] checks = [] for k, value in match_params.items(): checks.append(p_params.getitem(k, value) == value) @@ -618,18 +618,25 @@ def parse_json_params( return new_table - def get_cloud_params(self, colname="cloud_access", provider="aws", verbose=False, **match_params): + def get_cloud_params( + self, + provider: str, + colname: str = "cloud_access", + verbose: bool = False, + **match_params + ): """Parse cloud information stored as JSON by provider Parameters ---------- - colname: str - name of column to search in - provider: str, optional - name of data provider: only 'aws' is presently supported. - verbose: bool - If True, print progress and debug text. + provider: str + Name of the data provider + colname: str, optional + The column name containing the cloud access JSON, by default "cloud_access" + verbose: bool, optional + If True, print progress and debug text, by default False **match_params + Further parameters on which to match beyond `provider`. Return ------ From f69127ca75105e6e2d13d99d46798ce346b92292 Mon Sep 17 00:00:00 2001 From: Daniel Giles Date: Wed, 3 Dec 2025 15:15:21 -0500 Subject: [PATCH 5/8] added changelog entry --- CHANGES.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.rst b/CHANGES.rst index 06e81a8d..5d232a77 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -3,7 +3,8 @@ Enhancements and Fixes ---------------------- - +- Added functionality to the DatalinkRecordMixin and the DatalinkResultsMixin + to handle basic parsing of JSON entries Deprecations and Removals From 2eb66b4df336475af56f339ceea4b57194247179 Mon Sep 17 00:00:00 2001 From: Daniel Giles Date: Fri, 5 Dec 2025 16:39:42 -0500 Subject: [PATCH 6/8] added column guesser based on utype and ucd, changed parse_json to static function --- pyvo/dal/adhoc.py | 150 ++++++++++++-------------------- pyvo/dal/tests/test_datalink.py | 60 ++++++++----- 2 files changed, 97 insertions(+), 113 deletions(-) diff --git a/pyvo/dal/adhoc.py b/pyvo/dal/adhoc.py index 350a780d..1c983596 100644 --- a/pyvo/dal/adhoc.py +++ b/pyvo/dal/adhoc.py @@ -6,6 +6,7 @@ import warnings import copy import requests +import json from .query import DALResults, DALQuery, DALService, Record from .exceptions import DALServiceError @@ -396,51 +397,6 @@ def iter_datalinks(self, preserve_order=False): yield from self._iter_datalinks_from_dlblock( preserve_order=preserve_order) - def iter_parse_json_params( - self, - json_key: str, - colname: str="cloud_access", - verbose: bool=False, - **match_params - ): - """ - Iterate over all Records in a DalResult and return parsed json parameters. - - Parameters - ---------- - json_key : str - The primary key by which to filter JSON results. - colname : str, optional - The column containing JSON to be parsed, by default "cloud_access". - verbose : bool, optional - Whether to print progress and errors, by default False. - **match_params : str, optional - Further parameters on which to match beyond `json_key`. - - Returns - ------- - astropy.Table - A table containing the JSON parameters separated into columns, each - row corresponding to a matching JSON entry for each DataLinkRecord - for each row of the original DalResult. - - """ - for irow, record in enumerate(self): - access_points = record.parse_json_params( - colname=colname, - json_key=json_key, - verbose=verbose, - **match_params - ) - access_points.add_column([irow]*len(access_points), name="record_row", index=0) - if irow == 0: - new_table = access_points - else: - for row in access_points.iterrows(): - new_table.add_row(row) - - return new_table - def iter_get_cloud_params( self, provider: str, @@ -475,9 +431,10 @@ def iter_get_cloud_params( for jrow, row in enumerate(products): # if no colname column, there is nothing to do - try: + jsontxt = row._guess_cloud_column(colname=colname) + if jsontxt: access_points = row.parse_json_params( - colname=colname, + json_txt=jsontxt, json_key=provider, verbose=verbose, **match_params @@ -488,15 +445,14 @@ def iter_get_cloud_params( else: for row in access_points.iterrows(): new_dl_table.add_row(row) - except KeyError: + else: # no json column, continue if verbose: - print(f'No column {colname} found for row {irow}, datalink {jrow}') + print(f'No column {colname} found for Results row {irow}, datalink row {jrow}') new_dl_table = TableElement(VOTableFile()).to_table() - continue # do the json parsing - cloud_params = access_points + cloud_params = new_dl_table cloud_params.add_column([irow]*len(cloud_params), name="record_row", index=0) if irow == 0: new_table = cloud_params @@ -554,10 +510,10 @@ def getdataset(self, timeout=None): # this should go to Record.getdataset() return super().getdataset(timeout=timeout) + @staticmethod def parse_json_params( - self, + json_txt: str, json_key: str, - colname: str="cloud_access", verbose: bool=False, **match_params ): @@ -565,10 +521,10 @@ def parse_json_params( Parameters ---------- + json_txt : str + Text interpreted as JSON json_key : str The primary key by which to filter JSON results - colname : str, optional - The column containing JSON to be parsed, by default "cloud_access" verbose : bool, optional Whether to print progress and errors, by default False **match_params : str, optional @@ -581,43 +537,52 @@ def parse_json_params( row representing a matching JSON entry. """ - import json # init results table (avoiding adding import of astropy.table.Table) new_table = TableElement(VOTableFile()).to_table() - - if verbose: - print(f'searching for and processing json column {colname}') - try: - jsontxt = self[colname] - jsonDict = json.loads(jsontxt) - if json_key not in jsonDict and verbose: - print(f'No key "{json_key}" found for record' - 'in column "{colname}"') - else: - p_params = jsonDict[json_key] - checks = [] - for k, value in match_params.items(): - checks.append(p_params.getitem(k, value) == value) - - if all(checks): - if not isinstance(p_params, list): - p_params = [p_params] - colnames = list(p_params[0].keys()) - colvals = [[] for _ in colnames] - for ppar in p_params: - for idx, val in enumerate(ppar.values()): - colvals[idx].append(val) - new_table.add_columns(cols=colvals, names=colnames) - - except KeyError: - # no json column, return empty list - if verbose: - print(f'No column {colname} found for record.') + jsonDict = json.loads(json_txt) + if json_key not in jsonDict and verbose: + print(f'No key "{json_key}" found in json_txt given.') + else: + p_params = jsonDict[json_key] + checks = [] + for k, value in match_params.items(): + checks.append(p_params.getitem(k, value) == value) + + if all(checks): + if not isinstance(p_params, list): + p_params = [p_params] + colnames = list(p_params[0].keys()) + colvals = [[] for _ in colnames] + for ppar in p_params: + for idx, val in enumerate(ppar.values()): + colvals[idx].append(val) + new_table.add_columns(cols=colvals, names=colnames) return new_table + def _guess_cloud_column(self, colname="cloud_access"): + """returns a guess for a URI to a data product in row. + + This tries a few heuristics based on how cloud access or records might + be marked up. This will return None if row does not look as if + it contained a cloud access column. + """ + if hasattr(self, colname): + return getattr(self, colname) + + if colname in self: + return self[colname] + + cloud_access = self.getbyutype("adhoc:cloudstorage") + if cloud_access: + return cloud_access + + cloud_access = self.getbyucd("meta.ref.cloudstorage") + if cloud_access: + return cloud_access + def get_cloud_params( self, provider: str, @@ -651,10 +616,11 @@ def get_cloud_params( for irow, row in enumerate(products): # if no colname column, there is nothing to do - try: + cloud_json = row._guess_cloud_column(colname=colname) + if cloud_json: access_points = row.parse_json_params( - colname=colname, - key=provider, + json_txt=cloud_json, + json_key=provider, verbose=verbose, **match_params ) @@ -664,12 +630,12 @@ def get_cloud_params( else: for row in access_points.iterrows(): new_table.add_row(row) - except KeyError: - # no json column, continue + else: + # no json column, return None if verbose: print(f'No column {colname} found for row {irow}') - new_table = TableElement(VOTableFile()).to_table() - continue + new_table = None + break return new_table diff --git a/pyvo/dal/tests/test_datalink.py b/pyvo/dal/tests/test_datalink.py index 695efdaf..e865f5a5 100644 --- a/pyvo/dal/tests/test_datalink.py +++ b/pyvo/dal/tests/test_datalink.py @@ -77,18 +77,38 @@ def callback(request, context): yield matcher @pytest.fixture() -def datalink_cloud(mocker): +def datalink_cloud1(mocker): def callback(request, context): dl_base = parse('pyvo/dal/tests/data/datalink/datalink.xml') dl_base_table = dl_base.get_first_table().to_table() cloud_access_str = '{"aws": {"bucket_name": "test", "key":"path/to/cloudfile.fits", "region": "us-west-2"}}' dl_base_table.add_column([cloud_access_str]*4, name='cloud_access') out = BytesIO() - writeto(from_table(dl_base_table), out) + votable = from_table(dl_base_table) + votable.get_first_table().get_field_by_id("cloud_access").utype = "adhoc:cloudstorage" + writeto(votable, out) return out.getvalue() with mocker.register_uri( - 'GET', 'http://example.com/datalink-cloud.xml', content=callback + 'GET', 'http://example.com/datalink-cloud1.xml', content=callback + ) as matcher: + yield matcher + +@pytest.fixture() +def datalink_cloud2(mocker): + def callback(request, context): + dl_base = parse('pyvo/dal/tests/data/datalink/datalink.xml') + dl_base_table = dl_base.get_first_table().to_table() + cloud_access_str = '{"aws": {"bucket_name": "test", "key":"path/to/cloudfile.fits", "region": "us-west-2"}}' + dl_base_table.add_column([cloud_access_str]*4, name='cloud_access') + out = BytesIO() + votable = from_table(dl_base_table) + votable.get_first_table().get_field_by_id("cloud_access").ucd = "meta.ref.cloudstorage" + writeto(votable, out) + return out.getvalue() + + with mocker.register_uri( + 'GET', 'http://example.com/datalink-cloud2.xml', content=callback ) as matcher: yield matcher @@ -355,7 +375,7 @@ def test_no_datalink(): result.getdatalink() @pytest.mark.filterwarnings("ignore::astropy.io.votable.exceptions.E02") -@pytest.mark.usefixtures('datalink_cloud') +@pytest.mark.usefixtures('datalink_cloud1', 'datalink_cloud2') class TestJsonColumns: """Tests for producing datalinks from tables containing links to datalink documents. @@ -366,41 +386,39 @@ class TestJsonColumns: "ucd": "meta.ref.url"}, {"name": "access_format", "datatype": "char", "arraysize": "*", "utype": "meta.code.mime"}, - {"name": "cloud_access", "datatype": "char", "arraysize": "*"},], - [("http://example.com/datalink-cloud.xml", + {"name": "cloud_access", "datatype": "char", "arraysize": "*", + "utype": "adhoc:cloudstorage", "ucd": "meta.ref.cloudstorage"},], + [("http://example.com/datalink-cloud1.xml", "application/x-votable+xml;content=datalink", '{"aws": {"bucket_name": "test", "key":"path/to/file1.fits", "region": "us-west-2"}}',), - ("http://example.com/datalink-cloud.xml", + ("http://example.com/datalink-cloud2.xml", "application/x-votable+xml;content=datalink", '{"aws": {"bucket_name": "test", "key":"path/to/file2.fits", "region": "us-west-2"}}',)], resultsClass=SIA2Results ) def test_record_w_json(self): - - parsed_json_matches = self.res[0].parse_json_params("cloud_access", "aws") + jsontxt = '{"aws": {"bucket_name": "test", "key":"path/to/file1.fits", "region": "us-west-2"}}' + parsed_json_matches = self.res[0].parse_json_params(json_txt=jsontxt, json_key="aws") assert parsed_json_matches[0]["bucket_name"] == "test" assert parsed_json_matches[0]["key"] == "path/to/file1.fits" assert parsed_json_matches[0]["region"] == "us-west-2" - def test_iter_json(self): - - parsed_json_matches = self.res.iter_parse_json_params("cloud_access", "aws") - assert parsed_json_matches[0]["record_row"] == 0 - assert parsed_json_matches[0]["bucket_name"] == "test" - assert parsed_json_matches[0]["key"] == "path/to/file1.fits" - assert parsed_json_matches[0]["region"] == "us-west-2" - assert parsed_json_matches[1]["record_row"] == 1 - assert parsed_json_matches[1]["key"] == "path/to/file2.fits" - def test_datalink_json(self): - parsed_cloud_params = self.res[0].get_cloud_params("cloud_access", "aws") + parsed_cloud_params = self.res[0].get_cloud_params(provider="aws", colname="cloud_access") assert parsed_cloud_params[0]["bucket_name"] == "test" assert parsed_cloud_params[0]["key"] == "path/to/cloudfile.fits" assert parsed_cloud_params[0]["region"] == "us-west-2" + parsed2 = self.res[0].get_cloud_params(provider="aws", colname="bad_col_name") + assert parsed_cloud_params == parsed2 + + parsed3 = self.res[1].get_cloud_params(provider="aws", colname="bad_col_name") + parsed4 = self.res[1].get_cloud_params(provider="aws", colname="cloud_access") + assert parsed3 == parsed4 + def test_iter_datalink_json(self): - parsed_json_matches = self.res.iter_get_cloud_params("cloud_access", "aws") + parsed_json_matches = self.res.iter_get_cloud_params(provider="aws", colname="cloud_access") assert parsed_json_matches[0]["record_row"] == 0 assert parsed_json_matches[0]["datalink_row"] == 0 assert parsed_json_matches[0]["bucket_name"] == "test" From 7989c73a05a0fe9db0a1faaeeb3cbb147006891a Mon Sep 17 00:00:00 2001 From: Daniel Giles Date: Thu, 18 Dec 2025 16:24:44 -0800 Subject: [PATCH 7/8] Fixed handling of multiple entries for a single provider, improved tests, fixed changelog entry --- CHANGES.rst | 2 +- pyvo/dal/adhoc.py | 36 ++++++++++++++++-------- pyvo/dal/tests/test_datalink.py | 49 +++++++++++++++++++++++++++++---- 3 files changed, 68 insertions(+), 19 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 5d232a77..2677454d 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,7 +4,7 @@ Enhancements and Fixes ---------------------- - Added functionality to the DatalinkRecordMixin and the DatalinkResultsMixin - to handle basic parsing of JSON entries + to handle basic parsing of JSON entries [#709] Deprecations and Removals diff --git a/pyvo/dal/adhoc.py b/pyvo/dal/adhoc.py index 1c983596..22546b6d 100644 --- a/pyvo/dal/adhoc.py +++ b/pyvo/dal/adhoc.py @@ -540,25 +540,37 @@ def parse_json_params( # init results table (avoiding adding import of astropy.table.Table) new_table = TableElement(VOTableFile()).to_table() - + jsonDict = json.loads(json_txt) if json_key not in jsonDict and verbose: print(f'No key "{json_key}" found in json_txt given.') else: - p_params = jsonDict[json_key] + # Expected format is a dictionary of providers as keys, with lists + # of dictionaries, for example: + # {"json_key1": [{"param1": p1val1, "param2": p2val1}, + # {"param1": p1val2, "param2": p2val2}] + # "json_key2": [{"param1": p1val3, "param2": p2val3}] + # } + jkey_params = jsonDict[json_key] + if isinstance(jkey_params, dict): + jkey_params = [jkey_params] checks = [] - for k, value in match_params.items(): - checks.append(p_params.getitem(k, value) == value) - - if all(checks): - if not isinstance(p_params, list): - p_params = [p_params] - colnames = list(p_params[0].keys()) - colvals = [[] for _ in colnames] - for ppar in p_params: - for idx, val in enumerate(ppar.values()): + col_init = False + for params in jkey_params: + for k, value in match_params.items(): + checks.append(params.get(k, value) == value) + + if all(checks): + if not col_init: + colnames = list(params.keys()) + colvals = [[] for _ in colnames] + col_init = True + for idx, val in enumerate(params.values()): colvals[idx].append(val) + try: new_table.add_columns(cols=colvals, names=colnames) + except UnboundLocalError: + pass return new_table diff --git a/pyvo/dal/tests/test_datalink.py b/pyvo/dal/tests/test_datalink.py index e865f5a5..70807c01 100644 --- a/pyvo/dal/tests/test_datalink.py +++ b/pyvo/dal/tests/test_datalink.py @@ -99,7 +99,9 @@ def datalink_cloud2(mocker): def callback(request, context): dl_base = parse('pyvo/dal/tests/data/datalink/datalink.xml') dl_base_table = dl_base.get_first_table().to_table() - cloud_access_str = '{"aws": {"bucket_name": "test", "key":"path/to/cloudfile.fits", "region": "us-west-2"}}' + cloud_access_str = ('{"aws": ' + '[{"bucket_name": "test", "key": "path/to/cloudfile.fits", "region": "us-west-2"}, ' + '{"bucket_name": "test", "key": "path/to/cloudfile2.fits", "region": "us-west-2"}]}') dl_base_table.add_column([cloud_access_str]*4, name='cloud_access') out = BytesIO() votable = from_table(dl_base_table) @@ -375,7 +377,7 @@ def test_no_datalink(): result.getdatalink() @pytest.mark.filterwarnings("ignore::astropy.io.votable.exceptions.E02") -@pytest.mark.usefixtures('datalink_cloud1', 'datalink_cloud2') +@pytest.mark.usefixtures('datalink_cloud1', 'datalink_cloud2', 'datalink_product') class TestJsonColumns: """Tests for producing datalinks from tables containing links to datalink documents. @@ -393,7 +395,10 @@ class TestJsonColumns: '{"aws": {"bucket_name": "test", "key":"path/to/file1.fits", "region": "us-west-2"}}',), ("http://example.com/datalink-cloud2.xml", "application/x-votable+xml;content=datalink", - '{"aws": {"bucket_name": "test", "key":"path/to/file2.fits", "region": "us-west-2"}}',)], + '{"aws": {"bucket_name": "test", "key":"path/to/file2.fits", "region": "us-west-2"}}',), + ("http://example.com/datalink.xml", + "application/x-votable+xml;content=datalink", + '{"aws": {"bucket_name": "test", "key":"path/to/file2.fits", "region": "us-west-2"}}',),], resultsClass=SIA2Results ) def test_record_w_json(self): @@ -404,6 +409,17 @@ def test_record_w_json(self): assert parsed_json_matches[0]["key"] == "path/to/file1.fits" assert parsed_json_matches[0]["region"] == "us-west-2" + def test_extra_key(self): + # Check that giving extra kwargs matches parameters + jsontxt = '{"aws": {"bucket_name": "test", "key":"path/to/file1.fits", "region": "us-west-2"}}' + parsed_json_matches0 = self.res[0].parse_json_params(json_txt=jsontxt, json_key="aws") + parsed_json_matches1 = self.res[0].parse_json_params(json_txt=jsontxt, json_key="aws", region="us-west-2") + + assert parsed_json_matches0 == parsed_json_matches1 + + parsed_json_matches2 = self.res[0].parse_json_params(json_txt=jsontxt, json_key="aws", region="us-west-1") + assert len(parsed_json_matches2) == 0 + def test_datalink_json(self): parsed_cloud_params = self.res[0].get_cloud_params(provider="aws", colname="cloud_access") assert parsed_cloud_params[0]["bucket_name"] == "test" @@ -413,12 +429,29 @@ def test_datalink_json(self): parsed2 = self.res[0].get_cloud_params(provider="aws", colname="bad_col_name") assert parsed_cloud_params == parsed2 - parsed3 = self.res[1].get_cloud_params(provider="aws", colname="bad_col_name") - parsed4 = self.res[1].get_cloud_params(provider="aws", colname="cloud_access") - assert parsed3 == parsed4 + def test_datalink_guess_colname(self): + # Check that guessing by ucd or utype works when given a non-existant column name + parsed1 = self.res[1].get_cloud_params(provider="aws", colname="bad_col_name") + # Only one row in datalink has semantics "#this", only returning results from that row + assert len(parsed1) == 2 + parsed2 = self.res[1].get_cloud_params(provider="aws", colname="cloud_access") + assert all(parsed1 == parsed2) + + def test_existing_colname(self): + # Existing colname given, so does not guess. Column isn't json, so results shouldn't exist. + parsed_cloud_params = self.res[0].get_cloud_params(provider="aws", colname="service_def") + assert parsed_cloud_params is None + + def no_column_in_datalink(self, capsys): + # All column guesses come back empty + parsed_cloud_params = self.res[1].get_cloud_params(provider="aws", colname="cloud_access", verbose=True) + assert "No column cloud_access" in capsys + assert parsed_cloud_params is None def test_iter_datalink_json(self): parsed_json_matches = self.res.iter_get_cloud_params(provider="aws", colname="cloud_access") + # 1 result from 1st datalink, 2 from 2nd, 0 from 3rd + assert len(parsed_json_matches) == 3 assert parsed_json_matches[0]["record_row"] == 0 assert parsed_json_matches[0]["datalink_row"] == 0 assert parsed_json_matches[0]["bucket_name"] == "test" @@ -427,3 +460,7 @@ def test_iter_datalink_json(self): assert parsed_json_matches[1]["record_row"] == 1 assert parsed_json_matches[1]["datalink_row"] == 0 assert parsed_json_matches[1]["key"] == "path/to/cloudfile.fits" + assert parsed_json_matches[2]["record_row"] == 1 + assert parsed_json_matches[2]["datalink_row"] == 0 + assert parsed_json_matches[2]["key"] == "path/to/cloudfile2.fits" + From 2afc2291c169cb7e971e67afae8b5e893c2ff3ab Mon Sep 17 00:00:00 2001 From: Daniel Giles Date: Fri, 19 Dec 2025 11:42:01 -0800 Subject: [PATCH 8/8] Formatting --- pyvo/dal/adhoc.py | 37 +++++++++--------- pyvo/dal/tests/test_datalink.py | 67 +++++++++++++++++++-------------- 2 files changed, 58 insertions(+), 46 deletions(-) diff --git a/pyvo/dal/adhoc.py b/pyvo/dal/adhoc.py index 22546b6d..155d3afc 100644 --- a/pyvo/dal/adhoc.py +++ b/pyvo/dal/adhoc.py @@ -400,8 +400,8 @@ def iter_datalinks(self, preserve_order=False): def iter_get_cloud_params( self, provider: str, - colname: str="cloud_access", - verbose: bool=False, + colname: str = "cloud_access", + verbose: bool = False, **match_params ): """ @@ -416,7 +416,7 @@ def iter_get_cloud_params( verbose : bool, optional Whether to print debug text, by default False. **match_params : str, optional - Further parameters on which to match beyond `provider`. + Further parameters on which to match beyond provider. Returns ------- @@ -430,7 +430,7 @@ def iter_get_cloud_params( products = dl_results.bysemantics("#this") for jrow, row in enumerate(products): - # if no colname column, there is nothing to do + # if no colname column, there is nothing to do jsontxt = row._guess_cloud_column(colname=colname) if jsontxt: access_points = row.parse_json_params( @@ -438,7 +438,7 @@ def iter_get_cloud_params( json_key=provider, verbose=verbose, **match_params - ) + ) access_points.add_column([jrow]*len(access_points), name="datalink_row", index=0) if jrow == 0: new_dl_table = access_points @@ -462,6 +462,7 @@ def iter_get_cloud_params( return new_table + class DatalinkRecordMixin: """ Mixin for record classes, providing functionallity for datalink. @@ -514,11 +515,11 @@ def getdataset(self, timeout=None): def parse_json_params( json_txt: str, json_key: str, - verbose: bool=False, + verbose: bool = False, **match_params - ): + ): """Parse information stored as JSON by key - + Parameters ---------- json_txt : str @@ -528,7 +529,7 @@ def parse_json_params( verbose : bool, optional Whether to print progress and errors, by default False **match_params : str, optional - Further parameters on which to match beyond `json_key`. + Further parameters on which to match beyond json_key. Returns ------- @@ -540,7 +541,7 @@ def parse_json_params( # init results table (avoiding adding import of astropy.table.Table) new_table = TableElement(VOTableFile()).to_table() - + jsonDict = json.loads(json_txt) if json_key not in jsonDict and verbose: print(f'No key "{json_key}" found in json_txt given.') @@ -591,7 +592,7 @@ def _guess_cloud_column(self, colname="cloud_access"): if cloud_access: return cloud_access - cloud_access = self.getbyucd("meta.ref.cloudstorage") + cloud_access = self.getbyucd("meta.ref.cloudstorage") if cloud_access: return cloud_access @@ -601,9 +602,9 @@ def get_cloud_params( colname: str = "cloud_access", verbose: bool = False, **match_params - ): + ): """Parse cloud information stored as JSON by provider - + Parameters ---------- provider: str @@ -613,8 +614,8 @@ def get_cloud_params( verbose: bool, optional If True, print progress and debug text, by default False **match_params - Further parameters on which to match beyond `provider`. - + Further parameters on which to match beyond provider. + Return ------ astropy.Table @@ -625,9 +626,8 @@ def get_cloud_params( dl_results = self.getdatalink() products = dl_results.bysemantics("#this") - for irow, row in enumerate(products): - # if no colname column, there is nothing to do + # if no colname column, there is nothing to do cloud_json = row._guess_cloud_column(colname=colname) if cloud_json: access_points = row.parse_json_params( @@ -635,7 +635,7 @@ def get_cloud_params( json_key=provider, verbose=verbose, **match_params - ) + ) access_points.add_column([irow]*len(access_points), name="datalink_row", index=0) if irow == 0: new_table = access_points @@ -651,6 +651,7 @@ def get_cloud_params( return new_table + class DatalinkService(DALService, AvailabilityMixin, CapabilityMixin): """ a representation of a Datalink service diff --git a/pyvo/dal/tests/test_datalink.py b/pyvo/dal/tests/test_datalink.py index 70807c01..a921a4a6 100644 --- a/pyvo/dal/tests/test_datalink.py +++ b/pyvo/dal/tests/test_datalink.py @@ -76,12 +76,14 @@ def callback(request, context): ) as matcher: yield matcher + @pytest.fixture() def datalink_cloud1(mocker): - def callback(request, context): + def callback(request, context): dl_base = parse('pyvo/dal/tests/data/datalink/datalink.xml') dl_base_table = dl_base.get_first_table().to_table() - cloud_access_str = '{"aws": {"bucket_name": "test", "key":"path/to/cloudfile.fits", "region": "us-west-2"}}' + cloud_access_str = ('{"aws": {"bucket_name": "test", ' + '"key":"path/to/cloudfile.fits", "region": "us-west-2"}}') dl_base_table.add_column([cloud_access_str]*4, name='cloud_access') out = BytesIO() votable = from_table(dl_base_table) @@ -94,14 +96,17 @@ def callback(request, context): ) as matcher: yield matcher + @pytest.fixture() def datalink_cloud2(mocker): - def callback(request, context): + def callback(request, context): dl_base = parse('pyvo/dal/tests/data/datalink/datalink.xml') dl_base_table = dl_base.get_first_table().to_table() cloud_access_str = ('{"aws": ' - '[{"bucket_name": "test", "key": "path/to/cloudfile.fits", "region": "us-west-2"}, ' - '{"bucket_name": "test", "key": "path/to/cloudfile2.fits", "region": "us-west-2"}]}') + '[{"bucket_name": "test", ' + '"key": "path/to/cloudfile.fits", "region": "us-west-2"}, ' + '{"bucket_name": "test", ' + '"key": "path/to/cloudfile2.fits", "region": "us-west-2"}]}') dl_base_table.add_column([cloud_access_str]*4, name='cloud_access') out = BytesIO() votable = from_table(dl_base_table) @@ -114,6 +119,7 @@ def callback(request, context): ) as matcher: yield matcher + @pytest.fixture() def obscore_datalink(mocker): def callback(request, context): @@ -376,31 +382,32 @@ def test_no_datalink(): with pytest.raises(DALServiceError, match="No datalink found for record."): result.getdatalink() + @pytest.mark.filterwarnings("ignore::astropy.io.votable.exceptions.E02") @pytest.mark.usefixtures('datalink_cloud1', 'datalink_cloud2', 'datalink_product') class TestJsonColumns: - """Tests for producing datalinks from tables containing links to - datalink documents. + """Tests for parsing JSON in Records and Results columns. """ res = testing.create_dalresults([ - {"name": "access_url", "datatype": "char", "arraysize": "*", - "ucd": "meta.ref.url"}, - {"name": "access_format", "datatype": "char", "arraysize": "*", - "utype": "meta.code.mime"}, - {"name": "cloud_access", "datatype": "char", "arraysize": "*", - "utype": "adhoc:cloudstorage", "ucd": "meta.ref.cloudstorage"},], - [("http://example.com/datalink-cloud1.xml", - "application/x-votable+xml;content=datalink", - '{"aws": {"bucket_name": "test", "key":"path/to/file1.fits", "region": "us-west-2"}}',), - ("http://example.com/datalink-cloud2.xml", - "application/x-votable+xml;content=datalink", - '{"aws": {"bucket_name": "test", "key":"path/to/file2.fits", "region": "us-west-2"}}',), - ("http://example.com/datalink.xml", - "application/x-votable+xml;content=datalink", - '{"aws": {"bucket_name": "test", "key":"path/to/file2.fits", "region": "us-west-2"}}',),], - resultsClass=SIA2Results - ) + {"name": "access_url", "datatype": "char", "arraysize": "*", + "ucd": "meta.ref.url"}, + {"name": "access_format", "datatype": "char", "arraysize": "*", + "utype": "meta.code.mime"}, + {"name": "cloud_access", "datatype": "char", "arraysize": "*", + "utype": "adhoc:cloudstorage", "ucd": "meta.ref.cloudstorage"},], + [("http://example.com/datalink-cloud1.xml", + "application/x-votable+xml;content=datalink", + '{"aws": {"bucket_name": "test", "key":"path/to/file1.fits", "region": "us-west-2"}}',), + ("http://example.com/datalink-cloud2.xml", + "application/x-votable+xml;content=datalink", + '{"aws": {"bucket_name": "test", "key":"path/to/file2.fits", "region": "us-west-2"}}',), + ("http://example.com/datalink.xml", + "application/x-votable+xml;content=datalink", + '{"aws": {"bucket_name": "test", "key":"path/to/file2.fits", "region": "us-west-2"}}',),], + resultsClass=SIA2Results + ) + def test_record_w_json(self): jsontxt = '{"aws": {"bucket_name": "test", "key":"path/to/file1.fits", "region": "us-west-2"}}' @@ -413,11 +420,14 @@ def test_extra_key(self): # Check that giving extra kwargs matches parameters jsontxt = '{"aws": {"bucket_name": "test", "key":"path/to/file1.fits", "region": "us-west-2"}}' parsed_json_matches0 = self.res[0].parse_json_params(json_txt=jsontxt, json_key="aws") - parsed_json_matches1 = self.res[0].parse_json_params(json_txt=jsontxt, json_key="aws", region="us-west-2") + parsed_json_matches1 = self.res[0].parse_json_params(json_txt=jsontxt, + json_key="aws", region="us-west-2") assert parsed_json_matches0 == parsed_json_matches1 - parsed_json_matches2 = self.res[0].parse_json_params(json_txt=jsontxt, json_key="aws", region="us-west-1") + parsed_json_matches2 = self.res[0].parse_json_params(json_txt=jsontxt, + json_key="aws", + region="us-west-1") assert len(parsed_json_matches2) == 0 def test_datalink_json(self): @@ -444,7 +454,9 @@ def test_existing_colname(self): def no_column_in_datalink(self, capsys): # All column guesses come back empty - parsed_cloud_params = self.res[1].get_cloud_params(provider="aws", colname="cloud_access", verbose=True) + parsed_cloud_params = self.res[1].get_cloud_params(provider="aws", + colname="cloud_access", + verbose=True) assert "No column cloud_access" in capsys assert parsed_cloud_params is None @@ -463,4 +475,3 @@ def test_iter_datalink_json(self): assert parsed_json_matches[2]["record_row"] == 1 assert parsed_json_matches[2]["datalink_row"] == 0 assert parsed_json_matches[2]["key"] == "path/to/cloudfile2.fits" -