66import warnings
77import copy
88import requests
9+ import json
910
1011from .query import DALResults , DALQuery , DALService , Record
1112from .exceptions import DALServiceError
@@ -396,51 +397,6 @@ def iter_datalinks(self, preserve_order=False):
396397 yield from self ._iter_datalinks_from_dlblock (
397398 preserve_order = preserve_order )
398399
399- def iter_parse_json_params (
400- self ,
401- json_key : str ,
402- colname : str = "cloud_access" ,
403- verbose : bool = False ,
404- ** match_params
405- ):
406- """
407- Iterate over all Records in a DalResult and return parsed json parameters.
408-
409- Parameters
410- ----------
411- json_key : str
412- The primary key by which to filter JSON results.
413- colname : str, optional
414- The column containing JSON to be parsed, by default "cloud_access".
415- verbose : bool, optional
416- Whether to print progress and errors, by default False.
417- **match_params : str, optional
418- Further parameters on which to match beyond `json_key`.
419-
420- Returns
421- -------
422- astropy.Table
423- A table containing the JSON parameters separated into columns, each
424- row corresponding to a matching JSON entry for each DataLinkRecord
425- for each row of the original DalResult.
426-
427- """
428- for irow , record in enumerate (self ):
429- access_points = record .parse_json_params (
430- colname = colname ,
431- json_key = json_key ,
432- verbose = verbose ,
433- ** match_params
434- )
435- access_points .add_column ([irow ]* len (access_points ), name = "record_row" , index = 0 )
436- if irow == 0 :
437- new_table = access_points
438- else :
439- for row in access_points .iterrows ():
440- new_table .add_row (row )
441-
442- return new_table
443-
444400 def iter_get_cloud_params (
445401 self ,
446402 provider : str ,
@@ -475,9 +431,10 @@ def iter_get_cloud_params(
475431
476432 for jrow , row in enumerate (products ):
477433 # if no colname column, there is nothing to do
478- try :
434+ jsontxt = row ._guess_cloud_column (colname = colname )
435+ if jsontxt :
479436 access_points = row .parse_json_params (
480- colname = colname ,
437+ json_txt = jsontxt ,
481438 json_key = provider ,
482439 verbose = verbose ,
483440 ** match_params
@@ -488,15 +445,14 @@ def iter_get_cloud_params(
488445 else :
489446 for row in access_points .iterrows ():
490447 new_dl_table .add_row (row )
491- except KeyError :
448+ else :
492449 # no json column, continue
493450 if verbose :
494- print (f'No column { colname } found for row { irow } , datalink { jrow } ' )
451+ print (f'No column { colname } found for Results row { irow } , datalink row { jrow } ' )
495452 new_dl_table = TableElement (VOTableFile ()).to_table ()
496- continue
497453
498454 # do the json parsing
499- cloud_params = access_points
455+ cloud_params = new_dl_table
500456 cloud_params .add_column ([irow ]* len (cloud_params ), name = "record_row" , index = 0 )
501457 if irow == 0 :
502458 new_table = cloud_params
@@ -554,21 +510,21 @@ def getdataset(self, timeout=None):
554510 # this should go to Record.getdataset()
555511 return super ().getdataset (timeout = timeout )
556512
513+ @staticmethod
557514 def parse_json_params (
558- self ,
515+ json_txt : str ,
559516 json_key : str ,
560- colname : str = "cloud_access" ,
561517 verbose : bool = False ,
562518 ** match_params
563519 ):
564520 """Parse information stored as JSON by key
565521
566522 Parameters
567523 ----------
524+ json_txt : str
525+ Text interpreted as JSON
568526 json_key : str
569527 The primary key by which to filter JSON results
570- colname : str, optional
571- The column containing JSON to be parsed, by default "cloud_access"
572528 verbose : bool, optional
573529 Whether to print progress and errors, by default False
574530 **match_params : str, optional
@@ -581,43 +537,52 @@ def parse_json_params(
581537 row representing a matching JSON entry.
582538
583539 """
584- import json
585540
586541 # init results table (avoiding adding import of astropy.table.Table)
587542 new_table = TableElement (VOTableFile ()).to_table ()
588-
589- if verbose :
590- print (f'searching for and processing json column { colname } ' )
591543
592- try :
593- jsontxt = self [colname ]
594- jsonDict = json .loads (jsontxt )
595- if json_key not in jsonDict and verbose :
596- print (f'No key "{ json_key } " found for record'
597- 'in column "{colname}"' )
598- else :
599- p_params = jsonDict [json_key ]
600- checks = []
601- for k , value in match_params .items ():
602- checks .append (p_params .getitem (k , value ) == value )
603-
604- if all (checks ):
605- if not isinstance (p_params , list ):
606- p_params = [p_params ]
607- colnames = list (p_params [0 ].keys ())
608- colvals = [[] for _ in colnames ]
609- for ppar in p_params :
610- for idx , val in enumerate (ppar .values ()):
611- colvals [idx ].append (val )
612- new_table .add_columns (cols = colvals , names = colnames )
613-
614- except KeyError :
615- # no json column, return empty list
616- if verbose :
617- print (f'No column { colname } found for record.' )
544+ jsonDict = json .loads (json_txt )
545+ if json_key not in jsonDict and verbose :
546+ print (f'No key "{ json_key } " found in json_txt given.' )
547+ else :
548+ p_params = jsonDict [json_key ]
549+ checks = []
550+ for k , value in match_params .items ():
551+ checks .append (p_params .getitem (k , value ) == value )
552+
553+ if all (checks ):
554+ if not isinstance (p_params , list ):
555+ p_params = [p_params ]
556+ colnames = list (p_params [0 ].keys ())
557+ colvals = [[] for _ in colnames ]
558+ for ppar in p_params :
559+ for idx , val in enumerate (ppar .values ()):
560+ colvals [idx ].append (val )
561+ new_table .add_columns (cols = colvals , names = colnames )
618562
619563 return new_table
620564
565+ def _guess_cloud_column (self , colname = "cloud_access" ):
566+ """returns a guess for a URI to a data product in row.
567+
568+ This tries a few heuristics based on how cloud access or records might
569+ be marked up. This will return None if row does not look as if
570+ it contained a cloud access column.
571+ """
572+ if hasattr (self , colname ):
573+ return getattr (self , colname )
574+
575+ if colname in self :
576+ return self [colname ]
577+
578+ cloud_access = self .getbyutype ("adhoc:cloudstorage" )
579+ if cloud_access :
580+ return cloud_access
581+
582+ cloud_access = self .getbyucd ("meta.ref.cloudstorage" )
583+ if cloud_access :
584+ return cloud_access
585+
621586 def get_cloud_params (
622587 self ,
623588 provider : str ,
@@ -651,10 +616,11 @@ def get_cloud_params(
651616
652617 for irow , row in enumerate (products ):
653618 # if no colname column, there is nothing to do
654- try :
619+ cloud_json = row ._guess_cloud_column (colname = colname )
620+ if cloud_json :
655621 access_points = row .parse_json_params (
656- colname = colname ,
657- key = provider ,
622+ json_txt = cloud_json ,
623+ json_key = provider ,
658624 verbose = verbose ,
659625 ** match_params
660626 )
@@ -664,12 +630,12 @@ def get_cloud_params(
664630 else :
665631 for row in access_points .iterrows ():
666632 new_table .add_row (row )
667- except KeyError :
668- # no json column, continue
633+ else :
634+ # no json column, return None
669635 if verbose :
670636 print (f'No column { colname } found for row { irow } ' )
671- new_table = TableElement ( VOTableFile ()). to_table ()
672- continue
637+ new_table = None
638+ break
673639
674640 return new_table
675641
0 commit comments