From a58d1c74f80b8dcdba06769781924bfe706b39a5 Mon Sep 17 00:00:00 2001 From: Nalini Ganapati Date: Wed, 22 Jan 2025 21:52:13 -0800 Subject: [PATCH 1/4] Add field class/type/length to info output for list-fields --- examples/genomicsdb_query | 65 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 60 insertions(+), 5 deletions(-) diff --git a/examples/genomicsdb_query b/examples/genomicsdb_query index 04724d8..e41fa65 100755 --- a/examples/genomicsdb_query +++ b/examples/genomicsdb_query @@ -116,8 +116,66 @@ def parse_callset_json_for_split_row_ranges(callset_file, chunk_size): return split_row_ranges +def print_fields(key, val): + if "vcf_field_class" not in val: + val["vcf_field_class"] = ["FILTER"] + if "length" not in val: + val["length"] = "1" + for idx in range(len(val["vcf_field_class"])): + field_val = val["vcf_field_class"][idx] + if isinstance(val["type"], list): + if idx < len(val["type"]): + field_type = val["type"][idx] + else: + field_type = val["type"][0] + else: + field_type = val["type"] + if isinstance(val["length"], list): + if idx < len(val["length"]): + field_length = val["length"][idx] + else: + field_length = val["length"][0] + if "variable_length_descriptor" in field_length: + field_length = field_length["variable_length_descriptor"] + else: + field_length = val["length"] + if field_type == "int": + field_type = "Integer" + elif field_type == "float": + field_type = "Float" + elif field_type == "char": + if field_length.lower() == "var": + field_type = "String" + else: + field_type = "Char" + print(f"{key:<20} {field_val:10} {field_type:10} {field_length}") + + +def parse_vidmap_json_and_print_fields(vidmap_file): + vidmap = json.loads(genomicsdb.read_entire_file(vidmap_file)) + fields = vidmap["fields"] + print(f"{'Field':20} {'Class':10} {'Type':10} {'Length'}") + print(f"{'-----':20} {'-----':10} {'----':10} {'------'}") + if isinstance(fields, list): + {print_fields(field["name"], field) for field in fields} + else: # Old style vidmap json + for key, val in fields.items(): + # breakpoint() + print_fields(key, val) + abbreviations = { + "A": "Number of alternate alleles", + "R": "Number of alleles (including reference allele)", + "G": "Number of possible genotypes", + "PP or P": "Ploidy", + "VAR or var": "variable length", + } + print("--") + print("Abbreviations : ") + {print(f" {k}: {v}") for k, v in abbreviations.items()} + + def parse_vidmap_json_for_attributes(vidmap_file, attributes=None): - if attributes is None: + if attributes is None or len(attributes) == 0: return ["GT"] else: vidmap = json.loads(genomicsdb.read_entire_file(vidmap_file)) @@ -126,8 +184,6 @@ def parse_vidmap_json_for_attributes(vidmap_file, attributes=None): fields = [field["name"] for field in fields] else: # Old style vidmap json fields = fields.keys() - if len(attributes) == 0: - return fields attributes = attributes.replace(" ", "").split(",") not_found = [attribute for attribute in attributes if attribute not in fields] if len(not_found) > 0: @@ -322,8 +378,7 @@ def setup(): # List fields if args.list_fields: - fields = parse_vidmap_json_for_attributes(vidmap_file, attributes="") - print(*fields, sep="\n") + parse_vidmap_json_and_print_fields(vidmap_file) sys.exit(0) intervals = args.interval From fcff6c028c4742b555c9272ae96b4f3342191dce Mon Sep 17 00:00:00 2001 From: Nalini Ganapati Date: Wed, 22 Jan 2025 22:20:12 -0800 Subject: [PATCH 2/4] Rename variables --- examples/genomicsdb_query | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/genomicsdb_query b/examples/genomicsdb_query index e41fa65..e746632 100755 --- a/examples/genomicsdb_query +++ b/examples/genomicsdb_query @@ -122,7 +122,7 @@ def print_fields(key, val): if "length" not in val: val["length"] = "1" for idx in range(len(val["vcf_field_class"])): - field_val = val["vcf_field_class"][idx] + field_class = val["vcf_field_class"][idx] if isinstance(val["type"], list): if idx < len(val["type"]): field_type = val["type"][idx] @@ -148,7 +148,7 @@ def print_fields(key, val): field_type = "String" else: field_type = "Char" - print(f"{key:<20} {field_val:10} {field_type:10} {field_length}") + print(f"{key:<20} {field_class:10} {field_type:10} {field_length}") def parse_vidmap_json_and_print_fields(vidmap_file): @@ -171,7 +171,7 @@ def parse_vidmap_json_and_print_fields(vidmap_file): } print("--") print("Abbreviations : ") - {print(f" {k}: {v}") for k, v in abbreviations.items()} + {print(f" {key}: {val}") for key, val in abbreviations.items()} def parse_vidmap_json_for_attributes(vidmap_file, attributes=None): From f14853e1f0fb7899ea958cce5e4a29bc3093ea89 Mon Sep 17 00:00:00 2001 From: Nalini Ganapati Date: Wed, 22 Jan 2025 22:24:51 -0800 Subject: [PATCH 3/4] Place link to documentation of abbreviations --- examples/genomicsdb_query | 2 ++ 1 file changed, 2 insertions(+) diff --git a/examples/genomicsdb_query b/examples/genomicsdb_query index e746632..790f535 100755 --- a/examples/genomicsdb_query +++ b/examples/genomicsdb_query @@ -162,6 +162,8 @@ def parse_vidmap_json_and_print_fields(vidmap_file): for key, val in fields.items(): # breakpoint() print_fields(key, val) + # See https://github.com/GenomicsDB/GenomicsDB/wiki/Importing-VCF-data-into-GenomicsDB + # for description of lengths in vid mapping files abbreviations = { "A": "Number of alternate alleles", "R": "Number of alleles (including reference allele)", From 8cd0c71a32e1c1e00abdc83acf29f7d5b606664b Mon Sep 17 00:00:00 2001 From: Nalini Ganapati Date: Wed, 22 Jan 2025 23:13:10 -0800 Subject: [PATCH 4/4] Remove --- examples/genomicsdb_query | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/genomicsdb_query b/examples/genomicsdb_query index 790f535..05d3445 100755 --- a/examples/genomicsdb_query +++ b/examples/genomicsdb_query @@ -160,7 +160,6 @@ def parse_vidmap_json_and_print_fields(vidmap_file): {print_fields(field["name"], field) for field in fields} else: # Old style vidmap json for key, val in fields.items(): - # breakpoint() print_fields(key, val) # See https://github.com/GenomicsDB/GenomicsDB/wiki/Importing-VCF-data-into-GenomicsDB # for description of lengths in vid mapping files