diff --git a/uniprot_utils/uinprot_loader.py b/uniprot_utils/uinprot_loader.py new file mode 100644 index 0000000..feeb9ea --- /dev/null +++ b/uniprot_utils/uinprot_loader.py @@ -0,0 +1,81 @@ +from pyspark.sql import SparkSession +from delta.tables import DeltaTable +from minio import Minio +from minio.error import S3Error + +#Janaka E + +# Initialize Spark session and MinIO client +spark = get_spark_session() +minio_client = get_minio_client() + +# File-to-table mapping +file_table_mapping = { + "janaka_db-source/sp_proteins_Jan8_parquet/feature_x_protein.parquet": "feature_x_protein", + "janaka_db-source/sp_proteins_Jan8_parquet/protein_table.parquet": "protein", + "janaka_db-source/sp_proteins_Jan8_parquet/name_table.parquet": "name", + "janaka_db-source/sp_proteins_Jan8_parquet/identifier_table.parquet": "identifier", + "janaka_db-source/sp_proteins_Jan8_parquet/association_table.parquet": "association", +} + +# Bucket and namespace information +bucket_name = "cdm-lake" +namespace = "janaka_db" + +# Existing Delta table locations (aligned with the metastore) +existing_delta_locations = { + "protein": "janaka_db-deltalake/protein_table_delta", + "feature_x_protein": "janaka_db-deltalake/feature_x_protein_delta", + "name": "janaka_db-deltalake/name_table_delta", + "identifier": "janaka_db-deltalake/identifier_table_delta", + "association": "janaka_db-deltalake/association_table_delta", +} + +# Process each file and upload/update to its respective Delta table +for file_name, table_name in file_table_mapping.items(): + try: + # Derive paths + parquet_file_path = f"s3a://{bucket_name}/{file_name}" + delta_table_path = existing_delta_locations[table_name] + delta_table_s3_path = f"s3a://{bucket_name}/{delta_table_path}" + spark_table = f"{namespace}.{table_name}" + + print(f"Processing file: {file_name} -> Table: {table_name}") + + # Load the Parquet file into Spark + df_spark = spark.read.parquet(parquet_file_path) + print(f"Loaded Parquet file from {parquet_file_path} into Spark DataFrame.") + + # Check if Delta table exists + if DeltaTable.isDeltaTable(spark, delta_table_s3_path): + # Perform upsert (merge) into the Delta table + delta_table = DeltaTable.forPath(spark, delta_table_s3_path) + # Define merge condition based on primary keys (adjust based on schema) + if table_name == "feature_x_protein": + merge_condition = "existing.protein_id = updates.protein_id AND existing.feature_id = updates.feature_id" + else: + merge_condition = "existing.protein_id = updates.protein_id" + + # Perform the merge + delta_table.alias("existing").merge( + df_spark.alias("updates"), + merge_condition + ).whenMatchedUpdateAll( + ).whenNotMatchedInsertAll( + ).execute() + print(f"Table '{table_name}' updated successfully with new data.") + else: + # Create a new Delta table + (df_spark.write.mode("overwrite") + .format("delta") + .option("path", delta_table_s3_path) + .saveAsTable(spark_table)) + print(f"New table '{table_name}' created successfully in namespace '{namespace}'.") + + # Verify the table contents + result = spark.sql(f"SELECT * FROM {spark_table} LIMIT 5") + result.show(truncate=False) + print(f"Verification successful for table: {table_name}.") + + except Exception as e: + print(f"Error processing file {file_name} for table {table_name}: {e}") diff --git a/uniprot_utils/uniProtDatFileParser.py b/uniprot_utils/uniProtDatFileParser.py new file mode 100644 index 0000000..3adb91a --- /dev/null +++ b/uniprot_utils/uniProtDatFileParser.py @@ -0,0 +1,122 @@ +import pandas as pd +from Bio import SwissProt +from io import StringIO + +# This code parses the Uniprot .dat dumps into a tab delimited file +# Janaka E + +def parse_swissprot_file_in_chunks(file_path, output_file, chunk_size=5000000): + def write_to_file(records, output_file, write_mode): + """Write processed records to the output file.""" + df = pd.DataFrame(records) + df.to_csv(output_file, sep="\t", index=False, mode=write_mode, header=(write_mode == 'w')) + + def process_buffer(buffer, output_file, chunk_number): + """Process lines in the buffer and write to the output file.""" + records = [] + try: + buffer_as_string = "".join(buffer) + buffer_stream = StringIO(buffer_as_string) + for record in SwissProt.parse(buffer_stream): + try: + # Extract evidence codes from features + evidence_codes = [] + for feature in record.features: + if isinstance(feature.qualifiers, dict) and "evidence" in feature.qualifiers: + evidence_values = feature.qualifiers.get("evidence", []) + evidence_codes.extend(evidence_values if isinstance(evidence_values, list) else [evidence_values]) + evidence_codes = "; ".join(map(str, evidence_codes)) if evidence_codes else "NULL" + + # Extract publication details + publications = [] + for ref in record.references: + authors = ref.authors.strip() if ref.authors else "Unknown Authors" + title = ref.title.strip() if ref.title else "No Title" + location = ref.location.strip() if ref.location else "No Journal Info" + pubmed = "" + for db, id_ in ref.references: + if db.lower() == "pubmed": + pubmed = f"PubMed:{id_}" + publication = f"{authors}. {title}. {location}. {pubmed}".strip() + publications.append(publication) + publications = "; ".join(publications) if publications else "NULL" + + # Extract GO terms + go_terms = [ + f"{xref[1]} ({xref[2]})" + for xref in record.cross_references + if len(xref) >= 3 and xref[0] == "GO" + ] + go_terms = "; ".join(map(str, go_terms)) if go_terms else "NULL" + + # Consolidate all key data + entry_data = { + "Entry": record.accessions[0] if record.accessions else "NULL", + "Entry Name": record.entry_name if record.entry_name else "NULL", + "Reviewed": "Reviewed" if record.data_class == "Reviewed" else "Unreviewed", + "Protein names": record.description if record.description else "NULL", + "Gene Names": "; ".join(map(str, record.gene_name)) if record.gene_name else "NULL", + "Organism": record.organism if record.organism else "NULL", + "Taxonomy": "; ".join(map(str, record.organism_classification)) if record.organism_classification else "NULL", + "Length": len(record.sequence) if record.sequence else "NULL", + "Sequence": record.sequence if record.sequence else "NULL", + "PE": record.protein_existence if hasattr(record, "protein_existence") and record.protein_existence else "NULL", + "EMBL": "; ".join(id_ for db, id_, *_ in record.cross_references if db == "EMBL") or "NULL", + "RefSeq": "; ".join(id_ for db, id_, *_ in record.cross_references if db == "RefSeq") or "NULL", + "GeneID": "; ".join(id_ for db, id_, *_ in record.cross_references if db == "GeneID") or "NULL", + "PDB": "; ".join(id_ for db, id_, *_ in record.cross_references if db == "PDB") or "NULL", + "KEGG": "; ".join(id_ for db, id_, *_ in record.cross_references if db == "KEGG") or "NULL", + "Reactome": "; ".join(id_ for db, id_, *_ in record.cross_references if db == "Reactome") or "NULL", + "HGNC": "; ".join(id_ for db, id_, *_ in record.cross_references if db == "HGNC") or "NULL", + "STRING": "; ".join(id_ for db, id_, *_ in record.cross_references if db == "STRING") or "NULL", + "BioCyc": "; ".join(id_ for db, id_, *_ in record.cross_references if db == "BioCyc") or "NULL", + "Pfam": "; ".join(id_ for db, id_, *_ in record.cross_references if db == "Pfam") or "NULL", + "InterPro": "; ".join(id_ for db, id_, *_ in record.cross_references if db == "InterPro") or "NULL", + "GO": go_terms, + "Proteomes": "; ".join(id_ for db, id_, *_ in record.cross_references if db == "Proteomes") or "NULL", + "Keywords": "; ".join(map(str, record.keywords)) if record.keywords else "NULL", + "Evidence Codes": evidence_codes, + "Publications": publications, + } + records.append(entry_data) + except Exception as e: + print(f"Error parsing record: {e}") + continue + except Exception as e: + print(f"Error processing buffer: {e}") + + # Write to file + if records: + write_mode = 'a' if chunk_number > 1 else 'w' + write_to_file(records, output_file, write_mode) + + # Main logic to read in chunks + chunk_number = 1 + line_buffer = [] + with open(file_path, "r") as handle: + for line in handle: + line_buffer.append(line) + if line.startswith("//") and len(line_buffer) >= chunk_size: + print(f"Processing chunk {chunk_number}, lines read so far: {len(line_buffer)}") + process_buffer(line_buffer, output_file, chunk_number) + line_buffer = [] # Clear buffer + chunk_number += 1 + + # Process any remaining lines + if line_buffer: + print(f"Processing final chunk, total lines read: {len(line_buffer)}") + process_buffer(line_buffer, output_file, chunk_number) + + print(f"Parsed data saved to {output_file}") + +# Input and output file paths +# File paths are written to match the file paths in Sequoia - +#input_file = "/home/janakae/scratch/Uniprot/Trembl/uniprot_trembl.dat" # Path to your UniProt text file +#output_file = "/home/janakae/scratch/Uniprot/Trembl/Full_parsed_trembl_data.tsv" + +#Test files in the repo +input_file = "/uniprotTest/uniprotTest.dat" # Path to your UniProt text file +output_file = "/uniprotTest/Full_parsed_swissprot_data_test.tsv" + +# Parse the UniProt/SwissProt file in chunks +parse_swissprot_file_in_chunks(input_file, output_file) diff --git a/uniprot_utils/uniProtDatFileParserTrembl.py b/uniprot_utils/uniProtDatFileParserTrembl.py new file mode 100644 index 0000000..6e5b7db --- /dev/null +++ b/uniprot_utils/uniProtDatFileParserTrembl.py @@ -0,0 +1,139 @@ +import pandas as pd +import hashlib +import ast +import uuid +import os + +# This code was specifically written to handle the trempl .dat file and able to parse into a tab delimited file +# Given the large size of trembl file, this code would able to process the trembl dataset as chunks and produce consolidated output file +# Janaka E + +# Function to calculate SHA256 hash for sequences +def calculate_hash(sequence): + return hashlib.sha256(sequence.encode('utf-8')).hexdigest() if pd.notnull(sequence) else None + +# Function to generate UUID +def generate_uuid(): + return str(uuid.uuid4()) + +# Function to process each chunk +def process_chunk(chunk, hash_to_uuid_map, output_dir, chunk_number): + # Treat missing values as NULL + chunk = chunk.fillna("NULL") + + # Calculate SHA256 hash for sequences + chunk['hash'] = chunk['Sequence'].apply(calculate_hash) + + # Remove duplicate sequences based on the hash + unique_chunk = chunk.drop_duplicates(subset=['hash']) + + # Map UUIDs to each unique hash + new_uuids = {hash_val: generate_uuid() for hash_val in unique_chunk['hash'] if hash_val not in hash_to_uuid_map} + hash_to_uuid_map.update(new_uuids) + + # Create output directory if it doesn't exist + os.makedirs(output_dir, exist_ok=True) + + # Generate and save the 'protein' table for the chunk + protein_data = pd.DataFrame({ + 'protein_id': unique_chunk['hash'].map(hash_to_uuid_map), + 'name': unique_chunk['Entry'], + 'length': pd.to_numeric(unique_chunk['Length'], errors='coerce').fillna(0).astype(int), + 'sequence': unique_chunk['Sequence'], + 'hash': unique_chunk['hash'], + 'description': unique_chunk['Protein names'] + }) + protein_data.to_parquet(os.path.join(output_dir, f"protein_table_chunk_{chunk_number}.parquet"), index=False) + + # Generate and save the 'name' table for the chunk + def extract_source(gene_names): + if gene_names == "NULL" or not gene_names.strip(): + return "NULL" + try: + parsed_data = ast.literal_eval(gene_names) + if isinstance(parsed_data, dict) and 'ORFNames' in parsed_data: + orf_names = parsed_data.get('ORFNames', []) + if isinstance(orf_names, list) and len(orf_names) > 0: + return orf_names[0] + return "NULL" + except (ValueError, SyntaxError): + return "NULL" + + name_data = pd.DataFrame({ + 'protein_id': unique_chunk['hash'].map(hash_to_uuid_map), + 'name': unique_chunk['Entry'], + 'entry': unique_chunk['Entry Name'], + 'source': unique_chunk['Gene Names'].apply(extract_source), + 'description': unique_chunk['Protein names'] + }) + name_data.to_parquet(os.path.join(output_dir, f"name_table_chunk_{chunk_number}.parquet"), index=False) + + # Generate and save the 'identifier' table for the chunk + identifier_data = pd.DataFrame({ + 'protein_id': unique_chunk['hash'].map(hash_to_uuid_map), + 'identifier': unique_chunk['Entry Name'], + 'source': unique_chunk['Entry'].apply(lambda x: f"https://www.uniprot.org/uniprotkb/{x}/entry"), + 'description': unique_chunk['Protein names'] + }) + identifier_data.to_parquet(os.path.join(output_dir, f"identifier_table_chunk_{chunk_number}.parquet"), index=False) + + # Generate and save the 'association' table for the chunk + def parse_ontologies(row): + ontologies = [] + if row['KEGG'] != "NULL": + ontologies.append(f"KEGG: {row['KEGG']}") + if row['GO'] != "NULL": + go_terms = [term.split(' ')[0] for term in row['GO'].split('; ') if term] + ontologies.extend(go_terms) + return ontologies + + association_data = [] + for _, row in unique_chunk.iterrows(): + protein_id = hash_to_uuid_map[row['hash']] + ontologies = parse_ontologies(row) + for ontology in ontologies: + association_data.append({ + 'subject': protein_id, + 'ontology_id': ontology, + 'publications': row['Publications'] if row['Publications'] != "NULL" else "NULL", + 'evidence_type': row['Evidence Codes'] if row['Evidence Codes'] != "NULL" else "NULL" + }) + pd.DataFrame(association_data).to_parquet(os.path.join(output_dir, f"association_table_chunk_{chunk_number}.parquet"), index=False) + + # Generate and save the 'feature_x_protein' table for the chunk + feature_x_protein_data = [] + for _, row in unique_chunk.iterrows(): + protein_id = hash_to_uuid_map[row['hash']] + gene_ids = row['GeneID'] if row['GeneID'] != "NULL" else None + + if gene_ids: + for gene_id in gene_ids.split("; "): # Handle multiple GeneIDs + feature_x_protein_data.append({ + 'protein_id': protein_id, + 'feature_id': gene_id.strip(), + 'protocol_id': "SwissProt/NCBI" + }) + else: + feature_x_protein_data.append({ + 'protein_id': protein_id, + 'feature_id': "NULL", + 'protocol_id': "SwissProt/NCBI" + }) + pd.DataFrame(feature_x_protein_data).to_parquet(os.path.join(output_dir, f"feature_x_protein_chunk_{chunk_number}.parquet"), index=False) + + return len(new_uuids) + +# Input and output file paths +input_file = "Full_parsed_trembl_data.tsv" +chunk_size = 1000000 # Process 500,000 lines at a time +output_dir = "output_parquet_files" + +# Initialize hash-to-UUID mapping +hash_to_uuid_map = {} + +# Process the input file in chunks +for chunk_number, chunk in enumerate(pd.read_csv(input_file, sep='\t', chunksize=chunk_size, dtype=str), start=1): + print(f"Processing chunk {chunk_number}, lines read so far: {chunk_number * chunk_size}") + process_chunk(chunk, hash_to_uuid_map, output_dir, chunk_number) + +print(f"Tables generated and saved in {output_dir}.") diff --git a/uniprot_utils/uniprotTSVtoCDMtablesConversion.py b/uniprot_utils/uniprotTSVtoCDMtablesConversion.py new file mode 100644 index 0000000..b778be6 --- /dev/null +++ b/uniprot_utils/uniprotTSVtoCDMtablesConversion.py @@ -0,0 +1,130 @@ +import pandas as pd +import hashlib +import ast # Safe evaluation of strings to Python literals + + +## This code utilize the Full_parsed_swissprot_data.tsv that was generated from uniprotDatFileParser.py +## and generate the CDM schema compliant parquet files and sample tsv files foreach table type + +# Function to calculate SHA256 hash for sequences +def calculate_hash(sequence): + return hashlib.sha256(sequence.encode('utf-8')).hexdigest() + +# Load the parsed_swissprot_data.tsv file - a sample file that has the "Full_parsed_swissprot_data.tsv" is located in uniprotTest folder +input_file = "Full_parsed_swissprot_data.tsv" +data = pd.read_csv(input_file, sep='\t') + +# Calculate SHA256 hash for sequences +data['hash'] = data['Sequence'].apply(calculate_hash) + +# Remove duplicate sequences based on the hash +unique_data = data.drop_duplicates(subset=['hash']) + +# 1. Generate the 'protein' table +protein_table = pd.DataFrame({ + 'protein_hash': unique_data['hash'], # Use hash as the primary key + 'name': unique_data['Entry'], + 'length': unique_data['Length'], + 'sequence': unique_data['Sequence'], + 'description': unique_data['Protein names'] +}) + +# Save the 'protein' table as Parquet and TSV for QA/QC +protein_table.to_parquet("protein_table.parquet", index=False) +protein_table.head(20).to_csv("protein_table_sample.tsv", sep='\t', index=False) + +# 2. Generate the 'name' table +def extract_source(gene_names): + if pd.isna(gene_names) or not gene_names.strip(): + return "NULL" + try: + parsed_data = ast.literal_eval(gene_names) + if isinstance(parsed_data, dict) and 'ORFNames' in parsed_data: + orf_names = parsed_data.get('ORFNames', []) + if isinstance(orf_names, list) and len(orf_names) > 0: + return orf_names[0] + return "NULL" + except (ValueError, SyntaxError): + return "NULL" + +name_table = pd.DataFrame({ + 'protein_hash': unique_data['hash'], # Use hash as the foreign key + 'name': unique_data['Entry'], + 'entry': unique_data['Entry Name'], + 'source': unique_data['Gene Names'].apply(extract_source), + 'description': unique_data['Protein names'] +}) + +# Save the 'name' table as Parquet and TSV for QA/QC +name_table.to_parquet("name_table.parquet", index=False) +name_table.head(20).to_csv("name_table_sample.tsv", sep='\t', index=False) + +# 3. Generate the 'identifier' table +identifier_table = pd.DataFrame({ + 'protein_hash': unique_data['hash'], # Use hash as the foreign key + 'identifier': unique_data['Entry Name'], + 'source': unique_data['Entry'].apply(lambda x: f"https://www.uniprot.org/uniprotkb/{x}/entry"), + 'description': unique_data['Protein names'] +}) + +# Save the 'identifier' table as Parquet and TSV for QA/QC +identifier_table.to_parquet("identifier_table.parquet", index=False) +identifier_table.head(20).to_csv("identifier_table_sample.tsv", sep='\t', index=False) + +# 4. Generate the 'association' table +def parse_ontologies(row): + ontologies = [] + if not pd.isna(row['KEGG']): + ontologies.append(f"KEGG: {row['KEGG']}") + if not pd.isna(row['GO']): + go_terms = [term.split(' ')[0] for term in row['GO'].split('; ') if term] + ontologies.extend(go_terms) + return ontologies + +association_data = [] +for _, row in unique_data.iterrows(): + protein_hash = row['hash'] # Use hash as the foreign key + ontologies = parse_ontologies(row) + for ontology in ontologies: + association_data.append({ + 'subject': protein_hash, + 'ontology_id': ontology, + 'predicate': row['Entry'], # Protein name as the predicate + 'publications': row['Publications'] if not pd.isna(row['Publications']) else "NULL", + 'evidence_type': row['Evidence Codes'] if not pd.isna(row['Evidence Codes']) else "NULL" + }) + +association_table = pd.DataFrame(association_data) + +# Save the 'association' table as Parquet and TSV for QA/QC +association_table.to_parquet("association_table.parquet", index=False) +association_table.head(20).to_csv("association_table_sample.tsv", sep='\t', index=False) + +# 5. Generate the 'feature_x_protein' table +feature_x_protein_data = [] + +for _, row in unique_data.iterrows(): + protein_hash = row['hash'] # Use hash as the foreign key + gene_ids = row['GeneID'] if not pd.isna(row['GeneID']) else "NULL" + + if gene_ids != "NULL": + for gene_id in gene_ids.split("; "): # Handle multiple GeneIDs + feature_x_protein_data.append({ + 'protein_hash': protein_hash, + 'feature_id': gene_id.strip(), + 'protocol_id': "SwissProt/NCBI" + }) + else: + feature_x_protein_data.append({ + 'protein_hash': protein_hash, + 'feature_id': "NULL", + 'protocol_id': "SwissProt/NCBI" + }) + +feature_x_protein_table = pd.DataFrame(feature_x_protein_data) + +# Save the 'feature_x_protein' table as Parquet and TSV for QA/QC +feature_x_protein_table.to_parquet("feature_x_protein.parquet", index=False) +feature_x_protein_table.head(20).to_csv("feature_x_protein_sample.tsv", sep='\t', index=False) + +print("Tables generated and saved as Parquet and QA/QC TSV files.") diff --git a/uniprot_utils/uniprotTest/Full_parsed_swissprot_data_test.tsv b/uniprot_utils/uniprotTest/Full_parsed_swissprot_data_test.tsv new file mode 100644 index 0000000..cf0cd4f --- /dev/null +++ b/uniprot_utils/uniprotTest/Full_parsed_swissprot_data_test.tsv @@ -0,0 +1,23 @@ +Entry Entry Name Reviewed Protein names Gene Names Organism Taxonomy Length Sequence PE EMBL RefSeq GeneID PDB KEGG Reactome HGNC STRING BioCyc Pfam InterPro GO Proteomes Keywords Evidence Codes Publications +Q6GZX4 001R_FRG3G Reviewed RecName: Full=Putative transcription factor 001R; {'ORFNames': ['FV3-001R']} Frog virus 3 (isolate Goorha) (FV-3). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3 256 MAFSAEDVLKEYDRRRRMEALLLSLYYPNDRKLLDYKEWSPPRVQVECPKAPVEWNNPPSEKGLIVGHFSGIKYKGEKAQASEVDVNKMCCWVSKFKDAMRRYQGIQTCKIPGKVLSDLDAKIKAYNLTVEGVEGFVRYSRVTKQHVAAFLKELRHSKQYENVNLIHYILTDKRVDIQHLEKDLVKDFKALVESAHRMRQGHMINVKYILYQLLKKHGHGPDGPDILTVKTGSKGVLYDDSFRKIYTDLGWKFTPL 4 AY548484 YP_031579.1 2947773 NULL vg:2947773 NULL NULL NULL NULL PF04947 IPR007031 GO:0046782 (P:regulation of viral transcription) UP000008770 Activator; Reference proteome; Transcription; Transcription regulation NULL Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.. Comparative genomic analyses of frog virus 3, type species of the genus Ranavirus (family Iridoviridae).. Virology 323:70-84(2004).. PubMed:15165820 +Q6GZX3 002L_FRG3G Reviewed RecName: Full=Uncharacterized protein 002L; {'ORFNames': ['FV3-002L']} Frog virus 3 (isolate Goorha) (FV-3). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3 320 MSIIGATRLQNDKSDTYSAGPCYAGGCSAFTPRGTCGKDWDLGEQTCASGFCTSQPLCARIKKTQVCGLRYSSKGKDPLVSAEWDSRGAPYVRCTYDADLIDTQAQVDQFVSMFGESPSLAERYCMRGVKNTAGELVSRVSSDADPAGGWCRKWYSAHRGPDQDAALGSFCIKNPGAADCKCINRASDPVYQKVKTLHAYPDQCWYVPCAADVGELKMGTQRDTPTNCPTQVCQIVFNMLDDGSVTMDDVKNTINCDFSKYVPPPPPPKPTPPTPPTPPTPPTPPTPPTPPTPRPVHNRKVMFFVAGAVLVAILISTVRW 4 AY548484 YP_031580.1 2947774 NULL vg:2947774 NULL NULL NULL NULL PF03003 IPR004251 GO:0033644 (C:host cell membrane); GO:0016020 (C:membrane) UP000008770 Host membrane; Membrane; Reference proteome; Transmembrane; Transmembrane helix ECO:0000255; ECO:0000256|SAM:MobiDB-lite Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.. Comparative genomic analyses of frog virus 3, type species of the genus Ranavirus (family Iridoviridae).. Virology 323:70-84(2004).. PubMed:15165820 +Q197F8 002R_IIV3 Reviewed RecName: Full=Uncharacterized protein 002R; {'ORFNames': ['IIV3-002R']} Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus 458 MASNTVSAQGGSNRPVRDFSNIQDVAQFLLFDPIWNEQPGSIVPWKMNREQALAERYPELQTSEPSEDYSGPVESLELLPLEIKLDIMQYLSWEQISWCKHPWLWTRWYKDNVVRVSAITFEDFQREYAFPEKIQEIHFTDTRAEEIKAILETTPNVTRLVIRRIDDMNYNTHGDLGLDDLEFLTHLMVEDACGFTDFWAPSLTHLTIKNLDMHPRWFGPVMDGIKSMQSTLKYLYIFETYGVNKPFVQWCTDNIETFYCTNSYRYENVPRPIYVWVLFQEDEWHGYRVEDNKFHRRYMYSTILHKRDTDWVENNPLKTPAQVEMYKFLLRISQLNRDGTGYESDSDPENEHFDDESFSSGEEDSSDEDDPTWAPDSDDSDWETETEEEPSVAARILEKGKLTITNLMKSLGFKPKPKKIQSIDRYFCSLDSNYNSEDEDFEYDSDSEDDDSDSEDDC 4 DQ643392 YP_654574.1 4156251 NULL vg:4156251 NULL NULL NULL NULL NULL NULL NULL UP000001358 Reference proteome ECO:0000256|SAM:MobiDB-lite; ECO:0000256|SAM:MobiDB-lite; ECO:0000256|SAM:MobiDB-lite; ECO:0000256|SAM:MobiDB-lite Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., Kutish G.F., Rock D.L.. Genome of invertebrate iridescent virus type 3 (mosquito iridescent virus).. J. Virol. 80:8439-8449(2006).. PubMed:16912294 +Q197F7 003L_IIV3 Reviewed RecName: Full=Uncharacterized protein 003L; {'ORFNames': ['IIV3-003L']} Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus 156 MYQAINPCPQSWYGSPQLEREIVCKMSGAPHYPNYYPVHPNALGGAWFDTSLNARSLTTTPSLTTCTPPSLAACTPPTSLGMVDSPPHINPPRRIGTLCFDFGSAKSPQRCECVASDRPSTTSNTAPDTYRLLITNSKTRKNNYGTCRLEPLTYGI 4 DQ643392 YP_654575.1 4156252 NULL vg:4156252 NULL NULL NULL NULL NULL NULL NULL UP000001358 Reference proteome NULL Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., Kutish G.F., Rock D.L.. Genome of invertebrate iridescent virus type 3 (mosquito iridescent virus).. J. Virol. 80:8439-8449(2006).. PubMed:16912294 +Q6GZX2 003R_FRG3G Reviewed RecName: Full=Uncharacterized protein 3R; Flags: Precursor; {'ORFNames': ['FV3-003R']} Frog virus 3 (isolate Goorha) (FV-3). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3 438 MARPLLGKTSSVRRRLESLSACSIFFFLRKFCQKMASLVFLNSPVYQMSNILLTERRQVDRAMGGSDDDGVMVVALSPSDFKTVLGSALLAVERDMVHVVPKYLQTPGILHDMLVLLTPIFGEALSVDMSGATDVMVQQIATAGFVDVDPLHSSVSWKDNVSCPVALLAVSNAVRTMMGQPCQVTLIIDVGTQNILRDLVNLPVEMSGDLQVMAYTKDPLGKVPAVGVSVFDSGSVQKGDAHSVGAPDGLVSFHTHPVSSAVELNYHAGWPSNVDMSSLLTMKNLMHVVVAEEGLWTMARTLSMQRLTKVLTDAEKDVMRAAAFNLFLPLNELRVMGTKDSNNKSLKTYFEVFETFTIGALMKHSGVTPTAFVDRRWLDNTIYHMGFIPWGRDMRFVVEYDLDGTNPFLNTVPTLMSVKRKAKIQEMFDNMVSRMVTS 3 AY548484 YP_031581.1 2947775 NULL vg:2947775 NULL NULL NULL NULL NULL NULL NULL UP000008770 Reference proteome; Signal ECO:0000255 Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.. Comparative genomic analyses of frog virus 3, type species of the genus Ranavirus (family Iridoviridae).. Virology 323:70-84(2004).. PubMed:15165820 +Q6GZX1 004R_FRG3G Reviewed RecName: Full=Uncharacterized protein 004R; {'ORFNames': ['FV3-004R']} Frog virus 3 (isolate Goorha) (FV-3). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3 60 MNAKYDTDQGVGRMLFLGTIGLAVVVGGLMAYGYYYDGKTPSSGTSFHTASPSFSSRYRY 4 AY548484 YP_031582.1 2947776 NULL vg:2947776 NULL NULL NULL NULL NULL NULL GO:0033644 (C:host cell membrane); GO:0016020 (C:membrane) UP000008770 Host membrane; Membrane; Reference proteome; Transmembrane; Transmembrane helix ECO:0000255; ECO:0000256|SAM:MobiDB-lite; ECO:0000256|SAM:MobiDB-lite Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.. Comparative genomic analyses of frog virus 3, type species of the genus Ranavirus (family Iridoviridae).. Virology 323:70-84(2004).. PubMed:15165820 +Q197F5 005L_IIV3 Reviewed RecName: Full=Uncharacterized protein 005L; Flags: Precursor; {'ORFNames': ['IIV3-005L']} Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus 217 MRYTVLIALQGALLLLLLIDDGQGQSPYPYPGMPCNSSRQCGLGTCVHSRCAHCSSDGTLCSPEDPTMVWPCCPESSCQLVVGLPSLVNHYNCLPNQCTDSSQCPGGFGCMTRRSKCELCKADGEACNSPYLDWRKDKECCSGYCHTEARGLEGVCIDPKKIFCTPKNPWQLAPYPPSYHQPTTLRPPTSLYDSWLMSGFLVKSTTAPSTQEEEDDY 3 DQ643392 YP_654577.1 4156254 NULL vg:4156254 NULL NULL NULL NULL NULL NULL NULL UP000001358 Reference proteome; Signal ECO:0000255 Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., Kutish G.F., Rock D.L.. Genome of invertebrate iridescent virus type 3 (mosquito iridescent virus).. J. Virol. 80:8439-8449(2006).. PubMed:16912294 +Q6GZX0 005R_FRG3G Reviewed RecName: Full=Uncharacterized protein 005R; {'ORFNames': ['FV3-005R']} Frog virus 3 (isolate Goorha) (FV-3). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3 204 MQNPLPEVMSPEHDKRTTTPMSKEANKFIRELDKKPGDLAVVSDFVKRNTGKRLPIGKRSNLYVRICDLSGTIYMGETFILESWEELYLPEPTKMEVLGTLESCCGIPPFPEWIVMVGEDQCVYAYGDEEILLFAYSVKQLVEEGIQETGISYKYPDDISDVDEEVLQQDEEIQKIRKKTREFVDKDAQEFQDFLNSLDASLLS 4 AY548484 YP_031583.1 2947777 NULL vg:2947777 NULL NULL NULL NULL PF02393 IPR003360 NULL UP000008770 Reference proteome ECO:0000256|SAM:MobiDB-lite Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.. Comparative genomic analyses of frog virus 3, type species of the genus Ranavirus (family Iridoviridae).. Virology 323:70-84(2004).. PubMed:15165820 +Q91G88 006L_IIV6 Reviewed RecName: Full=Putative KilA-N domain-containing protein 006L; {'ORFNames': ['IIV6-006L']} Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Betairidovirinae; Iridovirus 352 MDSLNEVCYEQIKGTFYKGLFGDFPLIVDKKTGCFNATKLCVLGGKRFVDWNKTLRSKKLIQYYETRCDIKTESLLYEIKGDNNDEITKQITGTYLPKEFILDIASWISVEFYDKCNNIIINYFVNEYKTMDKKTLQSKINEVEEKMQKLLNEKEEELQEKNDKIDELILFSKRMEEDRKKDREMMIKQEKMLRELGIHLEDVSSQNNELIEKVDEQVEQNAVLNFKIDNIQNKLEIAVEDRAPQPKQNLKRERFILLKRNDDYYPYYTIRAQDINARSALKRQKNLYNEVSVLLDLTCHPNSKTLYVRVKDELKQKGVVFNLCKVSISNSKINEEELIKAMETINDEKRDV 3 AF303741 NP_149469.1 1733056 NULL vg:1733056 NULL NULL NULL NULL PF12299; PF04383 IPR022549; IPR018004; IPR017880 NULL UP000001359 Coiled coil; Reference proteome ECO:0000255|PROSITE-ProRule:PRU00631; ECO:0000255 Jakob N.J., Mueller K., Bahr U., Darai G.. Analysis of the first complete DNA sequence of an invertebrate iridovirus: coding strategy of the genome of Chilo iridescent virus.. Virology 286:182-196(2001).. PubMed:11448171; Eaton H.E., Metcalf J., Penny E., Tcherepanov V., Upton C., Brunetti C.R.. Comparative genomic analysis of the family Iridoviridae: re-annotating and defining the core set of iridovirus genes.. Virol. J. 4:11-11(2007).. PubMed:17239238 +Q6GZW9 006R_FRG3G Reviewed RecName: Full=Uncharacterized protein 006R; {'ORFNames': ['FV3-006R']} Frog virus 3 (isolate Goorha) (FV-3). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3 75 MYKMYFLKDQKFSLSGTIRINDKTQSEYGSVWCPGLSITGLHHDAIDHNMFEEMETEIIEYLGPWVQAEYRRIKG 4 AY548484 YP_031584.1 2947778 NULL vg:2947778 NULL NULL NULL NULL NULL NULL NULL UP000008770 Reference proteome NULL Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.. Comparative genomic analyses of frog virus 3, type species of the genus Ranavirus (family Iridoviridae).. Virology 323:70-84(2004).. PubMed:15165820 +Q6GZW8 007R_FRG3G Reviewed RecName: Full=Uncharacterized protein 007R; {'ORFNames': ['FV3-007R']} Frog virus 3 (isolate Goorha) (FV-3). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3 128 MRSIKPLRCCNAHGRHVSQEYGRCTLLLFREKLFLQTGLVCNKQCNAPNNDGAESKHHGIHHGSRGALALRGAGVHLLASAALGPRVLAGLVPTGRSVQGSVGQCGRVAQIGRARDVAARKQESYCEK 4 AY548484 YP_031585.1 2947779 NULL vg:2947779 NULL NULL NULL NULL NULL NULL NULL UP000008770 Reference proteome NULL Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.. Comparative genomic analyses of frog virus 3, type species of the genus Ranavirus (family Iridoviridae).. Virology 323:70-84(2004).. PubMed:15165820 +Q197F3 007R_IIV3 Reviewed RecName: Full=Uncharacterized protein 007R; {'ORFNames': ['IIV3-007R']} Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus 447 MEAKNITIDNTTYNFFKFYNINQPLTNLKYLNSERLCFSNAVMGKIVDDASTITITYHRVYFGISGPKPRQVADLGEYYDVNELLNYDTYTKTQEFAQKYNSLVKPTIDAKNWSGNELVLLVGNEWYCKTFGKAGSKNVFLYNMIPTIYRDEPQHQEQILKKFMFFNATKNVEQNPNFLDNVPEEYYHLLLPKSWVEKNLSDKYRKIMETEHKPLVFSCEPAFSFGLCRNTQDKNESYQLSLCLYEREKPRDAEIVWAAKYDELAAMVRDYLKKTPEFKKYRSFISCMKGLSWKNNEIGDKDGPKLYPKVIFNRKKGEFVTIFTKDDDVEPETIEDPRTILDRRCVVQAALRLESVFVHNKVAIQLRINDVLISEWKEASSKPQPLILRRHRFTKPSSSVAKSTSPSLRNSGSDESDLNQSDSDKEDERVVPVPKTKRIVKTVKLPN 4 DQ643392 YP_654579.1 4156256 NULL vg:4156256 NULL NULL NULL NULL PF10927 IPR024416 NULL UP000001358 Reference proteome ECO:0000256|SAM:MobiDB-lite; ECO:0000256|SAM:MobiDB-lite; ECO:0000256|SAM:MobiDB-lite Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., Kutish G.F., Rock D.L.. Genome of invertebrate iridescent virus type 3 (mosquito iridescent virus).. J. Virol. 80:8439-8449(2006).. PubMed:16912294 +Q197F2 008L_IIV3 Reviewed RecName: Full=Uncharacterized protein 008L; {'ORFNames': ['IIV3-008L']} Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus 347 MSFKVYDPIAELIATQFPTSNPDLQIINNDVLVVSPHKITLPMGPQNAGDVTNKAYVDQAVMSAAVPVASSTTVGTIQMAGDLEGSSGTNPIIAANKITLNKLQKIGPKMVIGNPNSDWNNTQEIELDSSFRIVDNRLNAGIVPISSTDPNKSNTVIPAPQQNGLFYLDSSGRVWVWAEHYYKCITPSRYISKWMGVGDFQELTVGQSVMWDSGRPSIETVSTQGLEVEWISSTNFTLSSLYLIPIVVKVTICIPLLGQPDQMAKFVLYSVSSAQQPRTGIVLTTDSSRSSAPIVSEYITVNWFEPKSYSVQLKEVNSDSGTTVTICSDKWLANPFLDCWITIEEVG 4 DQ643392 YP_654580.1 4156257 NULL vg:4156257 NULL NULL NULL NULL PF19264 IPR045571 NULL UP000001358 Reference proteome NULL Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., Kutish G.F., Rock D.L.. Genome of invertebrate iridescent virus type 3 (mosquito iridescent virus).. J. Virol. 80:8439-8449(2006).. PubMed:16912294 +Q6GZW6 009L_FRG3G Reviewed RecName: Full=Putative helicase 009L; EC=3.6.4.-; {'ORFNames': ['FV3-009L']} Frog virus 3 (isolate Goorha) (FV-3). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3 948 MDTSPYDFLKLYPWLSRGEADKGTLLDAFPGETFEQSLASDVAMRRAVQDDPAFGHQKLVETFLSEDTPYRELLLFHAPGTGKTCTVVSVAERAKEKGLTRGCIVLARGAALLRNFLHELVFNCGTGGRYIPEGYADMGDQERTRKMRKAVSSYYQFRTYETFAKSVATMSAEAIRARYDRFVIVMDEVHHLRSVQAEGVNTYSAISRFLRTVRGCVKMLLTGTPMTNEPGELADVLNLILPQDKTIRPEDGIFSNSGDLLKPDELAERVRGRVSYLKAARPDAGLTFAGEVLGGTGMTHLRLVRLEMSAFQSDAYASAWDQDAGDRNIFSNSRQCSLAVMPDRRWGSAAEARNPSQVRRMAGQNLAEYSVKYDYLVRVASSSPKTFAYCEYVNGSGLSLLSDILLANGWRRATGRETTPGKRFALLTASQKNIHKIVQRFNHEDNVDGAYISLLLGSRVVAEGLTFKEVRHTVILTPHWNYTETAQAIARSWRAGSHDRLKARGEAVAVTVHRLVAVPRGRDTPRSIDSDMYAVSEVKDKRIKAVERILMTSAADCSLLRSRNLYPSEFDGSRECEYGRCAYRCSNVSVEPGPLPALLGASAAEAVAQVRLDGGGDPAIMKVDMSTLWAEVTAGRRYVNRWGDGAVLRAEGGRLELSAPYGSSEEGRWGDFYKTRNLCYAKMDQDHLRADDLRDSLPQEVEELLTVSPVETIGETASAMPQEVATAILMACVQARADGKTLNVVRRDALLDFYKGFYAMGPSGWTVWLHARGANAKVYDGRRWNPADEDTLEFLAARSAKFTDTRIGYYGLYNPNLKDFCIRDVTQGKRDKVDLRKLTVGRRCVDWDQRTLVHIVARLMKIDGRRDFMPHATLREMRELAEQDPLHEPSDLTSKEACRRFLFWTQKGDNKFRRQDICKAMEKWFIENDLMEDNFDCGHQHKRRGKFA 4 AY548484 YP_031587.1 2947781 NULL vg:2947781 NULL NULL NULL NULL PF00271; PF04851 IPR006935; IPR014001; IPR001650; IPR027417; IPR038718; IPR050496 GO:0005524 (F:ATP binding); GO:0003677 (F:DNA binding); GO:0004386 (F:helicase activity); GO:0016787 (F:hydrolase activity) UP000008770 ATP-binding; Helicase; Hydrolase; Nucleotide-binding; Reference proteome ECO:0000255|PROSITE-ProRule:PRU00541; ECO:0000255|PROSITE-ProRule:PRU00542; ECO:0000255|PROSITE-ProRule:PRU00541 Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.. Comparative genomic analyses of frog virus 3, type species of the genus Ranavirus (family Iridoviridae).. Virology 323:70-84(2004).. PubMed:15165820 +Q91G85 009R_IIV6 Reviewed RecName: Full=Uncharacterized protein 009R; Flags: Precursor; {'ORFNames': ['IIV6-009R']} Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Betairidovirinae; Iridovirus 85 MIKLFCVLAAFISINSACQSSHQQREEFTVATYHSSSICTTYCYSNCVVASQHKGLNVESYTCDKPDPYGRETVCKCTLIKCHDI 3 AF303741 NP_149472.1 1733381 NULL vg:1733381 NULL NULL NULL NULL NULL NULL NULL UP000001359 Reference proteome; Signal ECO:0000255 Jakob N.J., Mueller K., Bahr U., Darai G.. Analysis of the first complete DNA sequence of an invertebrate iridovirus: coding strategy of the genome of Chilo iridescent virus.. Virology 286:182-196(2001).. PubMed:11448171; Eaton H.E., Metcalf J., Penny E., Tcherepanov V., Upton C., Brunetti C.R.. Comparative genomic analysis of the family Iridoviridae: re-annotating and defining the core set of iridovirus genes.. Virol. J. 4:11-11(2007).. PubMed:17239238 +Q6GZW5 010R_FRG3G Reviewed RecName: Full=Uncharacterized protein 010R; {'ORFNames': ['FV3-010R']} Frog virus 3 (isolate Goorha) (FV-3). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3 137 MKMDTDCRHWIVLASVPVLTVLAFKGEGALALAGLLVMAAVAMYRDRTEKKYSAARAPSPIAGHKTAYVTDPSAFAAGTVPVYPAPSNMGSDRFEGWVGGVLTGVGSSHLDHRKFAERQLVDRREKMVGYGWTKSFF 4 AY548484 YP_031588.1 2947782 NULL vg:2947782 NULL NULL NULL NULL NULL NULL GO:0033644 (C:host cell membrane); GO:0016020 (C:membrane) UP000008770 Host membrane; Membrane; Reference proteome; Transmembrane; Transmembrane helix ECO:0000255 Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.. Comparative genomic analyses of frog virus 3, type species of the genus Ranavirus (family Iridoviridae).. Virology 323:70-84(2004).. PubMed:15165820 +Q197E9 011L_IIV3 Reviewed RecName: Full=Uncharacterized protein 011L; {'ORFNames': ['IIV3-011L']} Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus 751 MMESPKYKKSTCSVTNLGGTCILPQKGATAPKAKDVSPELLVNKMDNLCQDWARTRNEYNKVHIEQAPTDSYFGVVHSHTPKKKYTSRDSDSEPEATSTRRSATAQRAANLKSSPVDQWSTTPPQPQPQPAAPTVKKTCASSPPAALSVKRTCTSPPPPPVLIDDDTGEDAFYDTNDPDIFYDIENGVSELETEGPKRPVYYQRNIRYPIDGSVPQESEQWYDPIDDEFLASSGDVVSLEPSPIAAFQPTPPKTVQFVPMPEEIIVPPPPPPKTVVDEGVQAMPYTVDQMIQTDFEESPLLANVNLRTIPIEEVNPNFSPVLMQDMVRDSFVFGTVAQRVMASQRVKQFFKELIEQDVSLAGRMCMDSGSPQLNLYNSLMGVKLLYRWRSSTTFYRAIVPEIDEPVQVMQDVLSSSEWAKFDSQAGIPPKMVYIHYKLLNDLVKTLICPNFQLTHAALVCVDCRPEAVGSDGLQDGRQRRCSNLVSEYHEMTLEDLFNTIKPADLNAKNIILSVLFQMLYAVATVQKQFGMGGLFANADSVHVRRIQPGGFWHYTVNGLRYSVPNYGYLVILTNFTDVVNYRPDFATTRYFGRRQAKVVPTRNWYKFVPFTTRYRPFVTVDPITQAKTTAYAPNPPTEGITINEFYKDSSDLRPSVPVDLNDMITFPVPEFHLTICRLFSFFSKFYDSNFIGNDPFVRNLVDRYSQPFEFPDVYWPEDGVSRVLACYTIEEIYPNWVDGDTDYVIESYNLD 4 DQ643392 YP_654583.1 4156260 NULL vg:4156260 NULL NULL NULL NULL NULL NULL NULL UP000001358 Reference proteome ECO:0000256|SAM:MobiDB-lite; ECO:0000256|SAM:MobiDB-lite; ECO:0000256|SAM:MobiDB-lite Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., Kutish G.F., Rock D.L.. Genome of invertebrate iridescent virus type 3 (mosquito iridescent virus).. J. Virol. 80:8439-8449(2006).. PubMed:16912294 +Q6GZW4 011R_FRG3G Reviewed RecName: Full=Uncharacterized protein 011R; {'ORFNames': ['FV3-011R']} Frog virus 3 (isolate Goorha) (FV-3). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3 70 MTSVKTIAMLAMLVIVAALIYMGYRTFTSMQSKLNELESRVNAPQLRPPVMSPIVPLNFIESEDLDKELD 4 AY548484 YP_031589.1 2947783 NULL vg:2947783 NULL NULL NULL NULL NULL NULL GO:0033644 (C:host cell membrane); GO:0016020 (C:membrane) UP000008770 Host membrane; Membrane; Reference proteome; Transmembrane; Transmembrane helix ECO:0000255 Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.. Comparative genomic analyses of frog virus 3, type species of the genus Ranavirus (family Iridoviridae).. Virology 323:70-84(2004).. PubMed:15165820; Tan W.G.H., Barkman T.J., Chinchar V.G., Essani K.. No Title. Submitted (FEB-2004) to the EMBL/GenBank/DDBJ databases.. +Q6GZW3 012L_FRG3G Reviewed RecName: Full=Uncharacterized protein 012L; {'ORFNames': ['FV3-012L']} Frog virus 3 (isolate Goorha) (FV-3). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3 297 MCAKLVEMAFGPVNADSPPLTAEEKESAVEKLVGSKPFPALKKKYHDKVPAQDPKYCLFSFVEVLPSCDIKAAGAEEMCSCCIKRRRGQVFGVACVRGTAHTLAKAKQKADKLVGDYDSVHVVQTCHVGRPFPLVSSGMAQETVAPSAMEAAEAAMDAKSAEKRKERMRQKLEMRKREQEIKARNRKLLEDPSCDPDAEEETDLERYATLRVKTTCLLENAKNASAQIKEYLASMRKSAEAVVAMEAADPTLVENYPGLIRDSRAKMGVSKQDTEAFLKMSSFDCLTAASELETMGF 4 AY548484 YP_031590.1 2947784 NULL vg:2947784 NULL NULL NULL NULL PF19150 IPR043872 NULL UP000008770 Reference proteome NULL Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.. Comparative genomic analyses of frog virus 3, type species of the genus Ranavirus (family Iridoviridae).. Virology 323:70-84(2004).. PubMed:15165820 +Q197E7 013L_IIV3 Reviewed RecName: Full=Uncharacterized protein IIV3-013L; {'ORFNames': ['IIV3-013L']} Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus 90 MYYRDQYGNVKYAPEGMGPHHAASSSHHSAQHHHMTKENFSMDDVHSWFEKYKMWFLYALILALIFGVFMWWSKYNHDKKRSLNTASIFY 4 DQ643392 YP_654585.1 4156262 NULL vg:4156262 NULL NULL NULL NULL NULL NULL GO:0033644 (C:host cell membrane); GO:0016020 (C:membrane) UP000001358 Host membrane; Membrane; Reference proteome; Transmembrane; Transmembrane helix ECO:0000255; ECO:0000256|SAM:MobiDB-lite Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., Kutish G.F., Rock D.L.. Genome of invertebrate iridescent virus type 3 (mosquito iridescent virus).. J. Virol. 80:8439-8449(2006).. PubMed:16912294 +Q6GZW2 013R_FRG3G Reviewed RecName: Full=Uncharacterized protein 013R; {'ORFNames': ['FV3-013R']} Frog virus 3 (isolate Goorha) (FV-3). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3 68 MANSVAFSSMTWYSPLASDNLYDICVDKVHNRVLCLCHSFGCCTNAVVIWILPSFDEFTPQTLSCKGP 4 AY548484 YP_031591.1 2947785 NULL vg:2947785 NULL NULL NULL NULL NULL NULL NULL UP000008770 Reference proteome NULL Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.. Comparative genomic analyses of frog virus 3, type species of the genus Ranavirus (family Iridoviridae).. Virology 323:70-84(2004).. PubMed:15165820 +Q6GZW1 014R_FRG3G Reviewed RecName: Full=Uncharacterized protein 014R; {'ORFNames': ['FV3-014R']} Frog virus 3 (isolate Goorha) (FV-3). Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3 119 METLVQAYLDIQGKIAEFRREIKALRVEEKAITANLFEAMGEAGVESIRISEDRYLVAEEKPKRTRSKQQFYQAAEGEGFTQEDVDRLMSLSRGAVTGSSSNVKIRKSAPARNEEDDDG 4 AY548484 YP_031592.1 2947786 NULL vg:2947786 NULL NULL NULL NULL NULL NULL NULL UP000008770 Coiled coil; Reference proteome ECO:0000256|SAM:MobiDB-lite; ECO:0000255 Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.. Comparative genomic analyses of frog virus 3, type species of the genus Ranavirus (family Iridoviridae).. Virology 323:70-84(2004).. PubMed:15165820 diff --git a/uniprot_utils/uniprotTest/uniprotTest.dat b/uniprot_utils/uniprotTest/uniprotTest.dat new file mode 100644 index 0000000..276c997 --- /dev/null +++ b/uniprot_utils/uniprotTest/uniprotTest.dat @@ -0,0 +1,1160 @@ +ID 001R_FRG3G Reviewed; 256 AA. +AC Q6GZX4; +DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. +DT 19-JUL-2004, sequence version 1. +DT 08-NOV-2023, entry version 44. +DE RecName: Full=Putative transcription factor 001R; +GN ORFNames=FV3-001R; +OS Frog virus 3 (isolate Goorha) (FV-3). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3. +OX NCBI_TaxID=654924; +OH NCBI_TaxID=30343; Dryophytes versicolor (chameleon treefrog). +OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). +OH NCBI_TaxID=45438; Lithobates sylvaticus (Wood frog) (Rana sylvatica). +OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; +RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; +RT "Comparative genomic analyses of frog virus 3, type species of the genus +RT Ranavirus (family Iridoviridae)."; +RL Virology 323:70-84(2004). +CC -!- FUNCTION: Transcription activation. {ECO:0000305}. +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AY548484; AAT09660.1; -; Genomic_DNA. +DR RefSeq; YP_031579.1; NC_005946.1. +DR SwissPalm; Q6GZX4; -. +DR GeneID; 2947773; -. +DR KEGG; vg:2947773; -. +DR Proteomes; UP000008770; Segment. +DR GO; GO:0046782; P:regulation of viral transcription; IEA:InterPro. +DR InterPro; IPR007031; Poxvirus_VLTF3. +DR Pfam; PF04947; Pox_VLTF3; 1. +PE 4: Predicted; +KW Activator; Reference proteome; Transcription; Transcription regulation. +FT CHAIN 1..256 +FT /note="Putative transcription factor 001R" +FT /id="PRO_0000410512" +SQ SEQUENCE 256 AA; 29735 MW; B4840739BF7D4121 CRC64; + MAFSAEDVLK EYDRRRRMEA LLLSLYYPND RKLLDYKEWS PPRVQVECPK APVEWNNPPS + EKGLIVGHFS GIKYKGEKAQ ASEVDVNKMC CWVSKFKDAM RRYQGIQTCK IPGKVLSDLD + AKIKAYNLTV EGVEGFVRYS RVTKQHVAAF LKELRHSKQY ENVNLIHYIL TDKRVDIQHL + EKDLVKDFKA LVESAHRMRQ GHMINVKYIL YQLLKKHGHG PDGPDILTVK TGSKGVLYDD + SFRKIYTDLG WKFTPL +// +ID 002L_FRG3G Reviewed; 320 AA. +AC Q6GZX3; +DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. +DT 19-JUL-2004, sequence version 1. +DT 08-NOV-2023, entry version 46. +DE RecName: Full=Uncharacterized protein 002L; +GN ORFNames=FV3-002L; +OS Frog virus 3 (isolate Goorha) (FV-3). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3. +OX NCBI_TaxID=654924; +OH NCBI_TaxID=30343; Dryophytes versicolor (chameleon treefrog). +OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). +OH NCBI_TaxID=45438; Lithobates sylvaticus (Wood frog) (Rana sylvatica). +OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; +RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; +RT "Comparative genomic analyses of frog virus 3, type species of the genus +RT Ranavirus (family Iridoviridae)."; +RL Virology 323:70-84(2004). +CC -!- SUBCELLULAR LOCATION: Host membrane {ECO:0000305}; Single-pass membrane +CC protein {ECO:0000305}. +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AY548484; AAT09661.1; -; Genomic_DNA. +DR RefSeq; YP_031580.1; NC_005946.1. +DR GeneID; 2947774; -. +DR KEGG; vg:2947774; -. +DR Proteomes; UP000008770; Segment. +DR GO; GO:0033644; C:host cell membrane; IEA:UniProtKB-SubCell. +DR GO; GO:0016020; C:membrane; IEA:UniProtKB-KW. +DR InterPro; IPR004251; Pox_virus_G9/A16. +DR Pfam; PF03003; Pox_G9-A16; 1. +PE 4: Predicted; +KW Host membrane; Membrane; Reference proteome; Transmembrane; +KW Transmembrane helix. +FT CHAIN 1..320 +FT /note="Uncharacterized protein 002L" +FT /id="PRO_0000410509" +FT TRANSMEM 301..318 +FT /note="Helical" +FT /evidence="ECO:0000255" +FT REGION 261..294 +FT /note="Disordered" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +SQ SEQUENCE 320 AA; 34642 MW; 9E110808B6E328E0 CRC64; + MSIIGATRLQ NDKSDTYSAG PCYAGGCSAF TPRGTCGKDW DLGEQTCASG FCTSQPLCAR + IKKTQVCGLR YSSKGKDPLV SAEWDSRGAP YVRCTYDADL IDTQAQVDQF VSMFGESPSL + AERYCMRGVK NTAGELVSRV SSDADPAGGW CRKWYSAHRG PDQDAALGSF CIKNPGAADC + KCINRASDPV YQKVKTLHAY PDQCWYVPCA ADVGELKMGT QRDTPTNCPT QVCQIVFNML + DDGSVTMDDV KNTINCDFSK YVPPPPPPKP TPPTPPTPPT PPTPPTPPTP PTPRPVHNRK + VMFFVAGAVL VAILISTVRW +// +ID 002R_IIV3 Reviewed; 458 AA. +AC Q197F8; +DT 16-JUN-2009, integrated into UniProtKB/Swiss-Prot. +DT 11-JUL-2006, sequence version 1. +DT 23-FEB-2022, entry version 29. +DE RecName: Full=Uncharacterized protein 002R; +GN ORFNames=IIV3-002R; +OS Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus. +OX NCBI_TaxID=345201; +OH NCBI_TaxID=7163; Aedes vexans (Inland floodwater mosquito) (Culex vexans). +OH NCBI_TaxID=42431; Culex territans. +OH NCBI_TaxID=332058; Culiseta annulata. +OH NCBI_TaxID=310513; Ochlerotatus sollicitans (eastern saltmarsh mosquito). +OH NCBI_TaxID=329105; Ochlerotatus taeniorhynchus (Black salt marsh mosquito) (Aedes taeniorhynchus). +OH NCBI_TaxID=7183; Psorophora ferox. +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=16912294; DOI=10.1128/jvi.00464-06; +RA Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., +RA Kutish G.F., Rock D.L.; +RT "Genome of invertebrate iridescent virus type 3 (mosquito iridescent +RT virus)."; +RL J. Virol. 80:8439-8449(2006). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; DQ643392; ABF82032.1; -; Genomic_DNA. +DR RefSeq; YP_654574.1; NC_008187.1. +DR GeneID; 4156251; -. +DR KEGG; vg:4156251; -. +DR Proteomes; UP000001358; Genome. +PE 4: Predicted; +KW Reference proteome. +FT CHAIN 1..458 +FT /note="Uncharacterized protein 002R" +FT /id="PRO_0000377938" +FT REGION 339..397 +FT /note="Disordered" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +FT REGION 434..458 +FT /note="Disordered" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +FT COMPBIAS 348..387 +FT /note="Acidic residues" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +FT COMPBIAS 435..458 +FT /note="Acidic residues" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +SQ SEQUENCE 458 AA; 53921 MW; E46E5C85D7ACA139 CRC64; + MASNTVSAQG GSNRPVRDFS NIQDVAQFLL FDPIWNEQPG SIVPWKMNRE QALAERYPEL + QTSEPSEDYS GPVESLELLP LEIKLDIMQY LSWEQISWCK HPWLWTRWYK DNVVRVSAIT + FEDFQREYAF PEKIQEIHFT DTRAEEIKAI LETTPNVTRL VIRRIDDMNY NTHGDLGLDD + LEFLTHLMVE DACGFTDFWA PSLTHLTIKN LDMHPRWFGP VMDGIKSMQS TLKYLYIFET + YGVNKPFVQW CTDNIETFYC TNSYRYENVP RPIYVWVLFQ EDEWHGYRVE DNKFHRRYMY + STILHKRDTD WVENNPLKTP AQVEMYKFLL RISQLNRDGT GYESDSDPEN EHFDDESFSS + GEEDSSDEDD PTWAPDSDDS DWETETEEEP SVAARILEKG KLTITNLMKS LGFKPKPKKI + QSIDRYFCSL DSNYNSEDED FEYDSDSEDD DSDSEDDC +// +ID 003L_IIV3 Reviewed; 156 AA. +AC Q197F7; +DT 16-JUN-2009, integrated into UniProtKB/Swiss-Prot. +DT 11-JUL-2006, sequence version 1. +DT 12-AUG-2020, entry version 23. +DE RecName: Full=Uncharacterized protein 003L; +GN ORFNames=IIV3-003L; +OS Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus. +OX NCBI_TaxID=345201; +OH NCBI_TaxID=7163; Aedes vexans (Inland floodwater mosquito) (Culex vexans). +OH NCBI_TaxID=42431; Culex territans. +OH NCBI_TaxID=332058; Culiseta annulata. +OH NCBI_TaxID=310513; Ochlerotatus sollicitans (eastern saltmarsh mosquito). +OH NCBI_TaxID=329105; Ochlerotatus taeniorhynchus (Black salt marsh mosquito) (Aedes taeniorhynchus). +OH NCBI_TaxID=7183; Psorophora ferox. +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=16912294; DOI=10.1128/jvi.00464-06; +RA Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., +RA Kutish G.F., Rock D.L.; +RT "Genome of invertebrate iridescent virus type 3 (mosquito iridescent +RT virus)."; +RL J. Virol. 80:8439-8449(2006). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; DQ643392; ABF82033.1; -; Genomic_DNA. +DR RefSeq; YP_654575.1; NC_008187.1. +DR GeneID; 4156252; -. +DR KEGG; vg:4156252; -. +DR Proteomes; UP000001358; Genome. +PE 4: Predicted; +KW Reference proteome. +FT CHAIN 1..156 +FT /note="Uncharacterized protein 003L" +FT /id="PRO_0000377939" +SQ SEQUENCE 156 AA; 17043 MW; D48A43940FF8C815 CRC64; + MYQAINPCPQ SWYGSPQLER EIVCKMSGAP HYPNYYPVHP NALGGAWFDT SLNARSLTTT + PSLTTCTPPS LAACTPPTSL GMVDSPPHIN PPRRIGTLCF DFGSAKSPQR CECVASDRPS + TTSNTAPDTY RLLITNSKTR KNNYGTCRLE PLTYGI +// +ID 003R_FRG3G Reviewed; 438 AA. +AC Q6GZX2; +DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. +DT 19-JUL-2004, sequence version 1. +DT 08-NOV-2023, entry version 38. +DE RecName: Full=Uncharacterized protein 3R; +DE Flags: Precursor; +GN ORFNames=FV3-003R; +OS Frog virus 3 (isolate Goorha) (FV-3). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3. +OX NCBI_TaxID=654924; +OH NCBI_TaxID=30343; Dryophytes versicolor (chameleon treefrog). +OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). +OH NCBI_TaxID=45438; Lithobates sylvaticus (Wood frog) (Rana sylvatica). +OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; +RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; +RT "Comparative genomic analyses of frog virus 3, type species of the genus +RT Ranavirus (family Iridoviridae)."; +RL Virology 323:70-84(2004). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AY548484; AAT09662.1; -; Genomic_DNA. +DR RefSeq; YP_031581.1; NC_005946.1. +DR GeneID; 2947775; -. +DR KEGG; vg:2947775; -. +DR Proteomes; UP000008770; Segment. +PE 3: Inferred from homology; +KW Reference proteome; Signal. +FT SIGNAL 1..32 +FT /evidence="ECO:0000255" +FT CHAIN 33..438 +FT /note="Uncharacterized protein 3R" +FT /id="PRO_0000410532" +SQ SEQUENCE 438 AA; 48297 MW; 075C8FA17B3C5C56 CRC64; + MARPLLGKTS SVRRRLESLS ACSIFFFLRK FCQKMASLVF LNSPVYQMSN ILLTERRQVD + RAMGGSDDDG VMVVALSPSD FKTVLGSALL AVERDMVHVV PKYLQTPGIL HDMLVLLTPI + FGEALSVDMS GATDVMVQQI ATAGFVDVDP LHSSVSWKDN VSCPVALLAV SNAVRTMMGQ + PCQVTLIIDV GTQNILRDLV NLPVEMSGDL QVMAYTKDPL GKVPAVGVSV FDSGSVQKGD + AHSVGAPDGL VSFHTHPVSS AVELNYHAGW PSNVDMSSLL TMKNLMHVVV AEEGLWTMAR + TLSMQRLTKV LTDAEKDVMR AAAFNLFLPL NELRVMGTKD SNNKSLKTYF EVFETFTIGA + LMKHSGVTPT AFVDRRWLDN TIYHMGFIPW GRDMRFVVEY DLDGTNPFLN TVPTLMSVKR + KAKIQEMFDN MVSRMVTS +// +ID 004R_FRG3G Reviewed; 60 AA. +AC Q6GZX1; +DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. +DT 19-JUL-2004, sequence version 1. +DT 08-NOV-2023, entry version 39. +DE RecName: Full=Uncharacterized protein 004R; +GN ORFNames=FV3-004R; +OS Frog virus 3 (isolate Goorha) (FV-3). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3. +OX NCBI_TaxID=654924; +OH NCBI_TaxID=30343; Dryophytes versicolor (chameleon treefrog). +OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). +OH NCBI_TaxID=45438; Lithobates sylvaticus (Wood frog) (Rana sylvatica). +OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; +RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; +RT "Comparative genomic analyses of frog virus 3, type species of the genus +RT Ranavirus (family Iridoviridae)."; +RL Virology 323:70-84(2004). +CC -!- SUBCELLULAR LOCATION: Host membrane {ECO:0000305}; Single-pass membrane +CC protein {ECO:0000305}. +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AY548484; AAT09663.1; -; Genomic_DNA. +DR RefSeq; YP_031582.1; NC_005946.1. +DR SMR; Q6GZX1; -. +DR GeneID; 2947776; -. +DR KEGG; vg:2947776; -. +DR Proteomes; UP000008770; Segment. +DR GO; GO:0033644; C:host cell membrane; IEA:UniProtKB-SubCell. +DR GO; GO:0016020; C:membrane; IEA:UniProtKB-KW. +PE 4: Predicted; +KW Host membrane; Membrane; Reference proteome; Transmembrane; +KW Transmembrane helix. +FT CHAIN 1..60 +FT /note="Uncharacterized protein 004R" +FT /id="PRO_0000410528" +FT TRANSMEM 14..34 +FT /note="Helical" +FT /evidence="ECO:0000255" +FT REGION 38..60 +FT /note="Disordered" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +FT COMPBIAS 39..60 +FT /note="Polar residues" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +SQ SEQUENCE 60 AA; 6514 MW; 12F072778EE6DFE4 CRC64; + MNAKYDTDQG VGRMLFLGTI GLAVVVGGLM AYGYYYDGKT PSSGTSFHTA SPSFSSRYRY +// +ID 005L_IIV3 Reviewed; 217 AA. +AC Q197F5; +DT 16-JUN-2009, integrated into UniProtKB/Swiss-Prot. +DT 11-JUL-2006, sequence version 1. +DT 12-OCT-2022, entry version 32. +DE RecName: Full=Uncharacterized protein 005L; +DE Flags: Precursor; +GN ORFNames=IIV3-005L; +OS Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus. +OX NCBI_TaxID=345201; +OH NCBI_TaxID=7163; Aedes vexans (Inland floodwater mosquito) (Culex vexans). +OH NCBI_TaxID=42431; Culex territans. +OH NCBI_TaxID=332058; Culiseta annulata. +OH NCBI_TaxID=310513; Ochlerotatus sollicitans (eastern saltmarsh mosquito). +OH NCBI_TaxID=329105; Ochlerotatus taeniorhynchus (Black salt marsh mosquito) (Aedes taeniorhynchus). +OH NCBI_TaxID=7183; Psorophora ferox. +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=16912294; DOI=10.1128/jvi.00464-06; +RA Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., +RA Kutish G.F., Rock D.L.; +RT "Genome of invertebrate iridescent virus type 3 (mosquito iridescent +RT virus)."; +RL J. Virol. 80:8439-8449(2006). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; DQ643392; ABF82035.1; -; Genomic_DNA. +DR RefSeq; YP_654577.1; NC_008187.1. +DR GeneID; 4156254; -. +DR KEGG; vg:4156254; -. +DR Proteomes; UP000001358; Genome. +PE 3: Inferred from homology; +KW Reference proteome; Signal. +FT SIGNAL 1..24 +FT /evidence="ECO:0000255" +FT CHAIN 25..217 +FT /note="Uncharacterized protein 005L" +FT /id="PRO_0000377940" +SQ SEQUENCE 217 AA; 23854 MW; 6DBE65967D7EEBD5 CRC64; + MRYTVLIALQ GALLLLLLID DGQGQSPYPY PGMPCNSSRQ CGLGTCVHSR CAHCSSDGTL + CSPEDPTMVW PCCPESSCQL VVGLPSLVNH YNCLPNQCTD SSQCPGGFGC MTRRSKCELC + KADGEACNSP YLDWRKDKEC CSGYCHTEAR GLEGVCIDPK KIFCTPKNPW QLAPYPPSYH + QPTTLRPPTS LYDSWLMSGF LVKSTTAPST QEEEDDY +// +ID 005R_FRG3G Reviewed; 204 AA. +AC Q6GZX0; +DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. +DT 19-JUL-2004, sequence version 1. +DT 08-NOV-2023, entry version 48. +DE RecName: Full=Uncharacterized protein 005R; +GN ORFNames=FV3-005R; +OS Frog virus 3 (isolate Goorha) (FV-3). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3. +OX NCBI_TaxID=654924; +OH NCBI_TaxID=30343; Dryophytes versicolor (chameleon treefrog). +OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). +OH NCBI_TaxID=45438; Lithobates sylvaticus (Wood frog) (Rana sylvatica). +OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; +RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; +RT "Comparative genomic analyses of frog virus 3, type species of the genus +RT Ranavirus (family Iridoviridae)."; +RL Virology 323:70-84(2004). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AY548484; AAT09664.1; -; Genomic_DNA. +DR RefSeq; YP_031583.1; NC_005946.1. +DR GeneID; 2947777; -. +DR KEGG; vg:2947777; -. +DR Proteomes; UP000008770; Segment. +DR InterPro; IPR003360; US22-like. +DR Pfam; PF02393; US22; 1. +PE 4: Predicted; +KW Reference proteome. +FT CHAIN 1..204 +FT /note="Uncharacterized protein 005R" +FT /id="PRO_0000410513" +FT REGION 1..20 +FT /note="Disordered" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +SQ SEQUENCE 204 AA; 23418 MW; 13AC9A179015F769 CRC64; + MQNPLPEVMS PEHDKRTTTP MSKEANKFIR ELDKKPGDLA VVSDFVKRNT GKRLPIGKRS + NLYVRICDLS GTIYMGETFI LESWEELYLP EPTKMEVLGT LESCCGIPPF PEWIVMVGED + QCVYAYGDEE ILLFAYSVKQ LVEEGIQETG ISYKYPDDIS DVDEEVLQQD EEIQKIRKKT + REFVDKDAQE FQDFLNSLDA SLLS +// +ID 006L_IIV6 Reviewed; 352 AA. +AC Q91G88; +DT 16-JUN-2009, integrated into UniProtKB/Swiss-Prot. +DT 01-DEC-2001, sequence version 1. +DT 28-JUN-2023, entry version 53. +DE RecName: Full=Putative KilA-N domain-containing protein 006L; +GN ORFNames=IIV6-006L; +OS Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Betairidovirinae; Iridovirus. +OX NCBI_TaxID=176652; +OH NCBI_TaxID=6997; Acheta domesticus (House cricket). +OH NCBI_TaxID=168631; Chilo suppressalis (Asiatic rice borer moth). +OH NCBI_TaxID=6999; Gryllus bimaculatus (Two-spotted cricket). +OH NCBI_TaxID=58607; Gryllus campestris. +OH NCBI_TaxID=7108; Spodoptera frugiperda (Fall armyworm). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=11448171; DOI=10.1006/viro.2001.0963; +RA Jakob N.J., Mueller K., Bahr U., Darai G.; +RT "Analysis of the first complete DNA sequence of an invertebrate iridovirus: +RT coding strategy of the genome of Chilo iridescent virus."; +RL Virology 286:182-196(2001). +RN [2] +RP GENOME REANNOTATION. +RX PubMed=17239238; DOI=10.1186/1743-422x-4-11; +RA Eaton H.E., Metcalf J., Penny E., Tcherepanov V., Upton C., Brunetti C.R.; +RT "Comparative genomic analysis of the family Iridoviridae: re-annotating and +RT defining the core set of iridovirus genes."; +RL Virol. J. 4:11-11(2007). +CC -!- SIMILARITY: Belongs to the IIV-6 006L/238R/313L/468L family. +CC {ECO:0000305}. +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AF303741; AAK81943.1; -; Genomic_DNA. +DR RefSeq; NP_149469.1; NC_003038.1. +DR SMR; Q91G88; -. +DR GeneID; 1733056; -. +DR KEGG; vg:1733056; -. +DR OrthoDB; 8312at10239; -. +DR Proteomes; UP000001359; Genome. +DR InterPro; IPR022549; DUF3627. +DR InterPro; IPR018004; KilA/APSES_HTH. +DR InterPro; IPR017880; KilA_N. +DR Pfam; PF12299; DUF3627; 1. +DR Pfam; PF04383; KilA-N; 1. +DR PROSITE; PS51301; KILA_N; 1. +PE 3: Inferred from homology; +KW Coiled coil; Reference proteome. +FT CHAIN 1..352 +FT /note="Putative KilA-N domain-containing protein 006L" +FT /id="PRO_0000377958" +FT DOMAIN 15..123 +FT /note="KilA-N" +FT /evidence="ECO:0000255|PROSITE-ProRule:PRU00631" +FT COILED 129..236 +FT /evidence="ECO:0000255" +SQ SEQUENCE 352 AA; 41658 MW; 3519C5E9E45A8AB6 CRC64; + MDSLNEVCYE QIKGTFYKGL FGDFPLIVDK KTGCFNATKL CVLGGKRFVD WNKTLRSKKL + IQYYETRCDI KTESLLYEIK GDNNDEITKQ ITGTYLPKEF ILDIASWISV EFYDKCNNII + INYFVNEYKT MDKKTLQSKI NEVEEKMQKL LNEKEEELQE KNDKIDELIL FSKRMEEDRK + KDREMMIKQE KMLRELGIHL EDVSSQNNEL IEKVDEQVEQ NAVLNFKIDN IQNKLEIAVE + DRAPQPKQNL KRERFILLKR NDDYYPYYTI RAQDINARSA LKRQKNLYNE VSVLLDLTCH + PNSKTLYVRV KDELKQKGVV FNLCKVSISN SKINEEELIK AMETINDEKR DV +// +ID 006R_FRG3G Reviewed; 75 AA. +AC Q6GZW9; +DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. +DT 19-JUL-2004, sequence version 1. +DT 08-NOV-2023, entry version 35. +DE RecName: Full=Uncharacterized protein 006R; +GN ORFNames=FV3-006R; +OS Frog virus 3 (isolate Goorha) (FV-3). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3. +OX NCBI_TaxID=654924; +OH NCBI_TaxID=30343; Dryophytes versicolor (chameleon treefrog). +OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). +OH NCBI_TaxID=45438; Lithobates sylvaticus (Wood frog) (Rana sylvatica). +OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; +RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; +RT "Comparative genomic analyses of frog virus 3, type species of the genus +RT Ranavirus (family Iridoviridae)."; +RL Virology 323:70-84(2004). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AY548484; AAT09665.1; -; Genomic_DNA. +DR RefSeq; YP_031584.1; NC_005946.1. +DR GeneID; 2947778; -. +DR KEGG; vg:2947778; -. +DR Proteomes; UP000008770; Segment. +PE 4: Predicted; +KW Reference proteome. +FT CHAIN 1..75 +FT /note="Uncharacterized protein 006R" +FT /id="PRO_0000410535" +SQ SEQUENCE 75 AA; 8848 MW; DC926963B93EDF36 CRC64; + MYKMYFLKDQ KFSLSGTIRI NDKTQSEYGS VWCPGLSITG LHHDAIDHNM FEEMETEIIE + YLGPWVQAEY RRIKG +// +ID 007R_FRG3G Reviewed; 128 AA. +AC Q6GZW8; +DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. +DT 19-JUL-2004, sequence version 1. +DT 08-NOV-2023, entry version 33. +DE RecName: Full=Uncharacterized protein 007R; +GN ORFNames=FV3-007R; +OS Frog virus 3 (isolate Goorha) (FV-3). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3. +OX NCBI_TaxID=654924; +OH NCBI_TaxID=30343; Dryophytes versicolor (chameleon treefrog). +OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). +OH NCBI_TaxID=45438; Lithobates sylvaticus (Wood frog) (Rana sylvatica). +OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; +RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; +RT "Comparative genomic analyses of frog virus 3, type species of the genus +RT Ranavirus (family Iridoviridae)."; +RL Virology 323:70-84(2004). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AY548484; AAT09666.1; -; Genomic_DNA. +DR RefSeq; YP_031585.1; NC_005946.1. +DR GeneID; 2947779; -. +DR KEGG; vg:2947779; -. +DR Proteomes; UP000008770; Segment. +PE 4: Predicted; +KW Reference proteome. +FT CHAIN 1..128 +FT /note="Uncharacterized protein 007R" +FT /id="PRO_0000410565" +SQ SEQUENCE 128 AA; 13707 MW; 9DD5475975A4405F CRC64; + MRSIKPLRCC NAHGRHVSQE YGRCTLLLFR EKLFLQTGLV CNKQCNAPNN DGAESKHHGI + HHGSRGALAL RGAGVHLLAS AALGPRVLAG LVPTGRSVQG SVGQCGRVAQ IGRARDVAAR + KQESYCEK +// +ID 007R_IIV3 Reviewed; 447 AA. +AC Q197F3; +DT 16-JUN-2009, integrated into UniProtKB/Swiss-Prot. +DT 11-JUL-2006, sequence version 1. +DT 22-FEB-2023, entry version 28. +DE RecName: Full=Uncharacterized protein 007R; +GN ORFNames=IIV3-007R; +OS Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus. +OX NCBI_TaxID=345201; +OH NCBI_TaxID=7163; Aedes vexans (Inland floodwater mosquito) (Culex vexans). +OH NCBI_TaxID=42431; Culex territans. +OH NCBI_TaxID=332058; Culiseta annulata. +OH NCBI_TaxID=310513; Ochlerotatus sollicitans (eastern saltmarsh mosquito). +OH NCBI_TaxID=329105; Ochlerotatus taeniorhynchus (Black salt marsh mosquito) (Aedes taeniorhynchus). +OH NCBI_TaxID=7183; Psorophora ferox. +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=16912294; DOI=10.1128/jvi.00464-06; +RA Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., +RA Kutish G.F., Rock D.L.; +RT "Genome of invertebrate iridescent virus type 3 (mosquito iridescent +RT virus)."; +RL J. Virol. 80:8439-8449(2006). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; DQ643392; ABF82037.1; -; Genomic_DNA. +DR RefSeq; YP_654579.1; NC_008187.1. +DR GeneID; 4156256; -. +DR KEGG; vg:4156256; -. +DR OrthoDB; 9237at10239; -. +DR Proteomes; UP000001358; Genome. +DR InterPro; IPR024416; DUF2738. +DR Pfam; PF10927; DUF2738; 1. +PE 4: Predicted; +KW Reference proteome. +FT CHAIN 1..447 +FT /note="Uncharacterized protein 007R" +FT /id="PRO_0000377941" +FT REGION 392..435 +FT /note="Disordered" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +FT COMPBIAS 397..417 +FT /note="Polar residues" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +FT COMPBIAS 418..435 +FT /note="Basic and acidic residues" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +SQ SEQUENCE 447 AA; 52027 MW; BE02EE24D34C1B46 CRC64; + MEAKNITIDN TTYNFFKFYN INQPLTNLKY LNSERLCFSN AVMGKIVDDA STITITYHRV + YFGISGPKPR QVADLGEYYD VNELLNYDTY TKTQEFAQKY NSLVKPTIDA KNWSGNELVL + LVGNEWYCKT FGKAGSKNVF LYNMIPTIYR DEPQHQEQIL KKFMFFNATK NVEQNPNFLD + NVPEEYYHLL LPKSWVEKNL SDKYRKIMET EHKPLVFSCE PAFSFGLCRN TQDKNESYQL + SLCLYEREKP RDAEIVWAAK YDELAAMVRD YLKKTPEFKK YRSFISCMKG LSWKNNEIGD + KDGPKLYPKV IFNRKKGEFV TIFTKDDDVE PETIEDPRTI LDRRCVVQAA LRLESVFVHN + KVAIQLRIND VLISEWKEAS SKPQPLILRR HRFTKPSSSV AKSTSPSLRN SGSDESDLNQ + SDSDKEDERV VPVPKTKRIV KTVKLPN +// +ID 008L_IIV3 Reviewed; 347 AA. +AC Q197F2; +DT 16-JUN-2009, integrated into UniProtKB/Swiss-Prot. +DT 11-JUL-2006, sequence version 1. +DT 23-FEB-2022, entry version 22. +DE RecName: Full=Uncharacterized protein 008L; +GN ORFNames=IIV3-008L; +OS Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus. +OX NCBI_TaxID=345201; +OH NCBI_TaxID=7163; Aedes vexans (Inland floodwater mosquito) (Culex vexans). +OH NCBI_TaxID=42431; Culex territans. +OH NCBI_TaxID=332058; Culiseta annulata. +OH NCBI_TaxID=310513; Ochlerotatus sollicitans (eastern saltmarsh mosquito). +OH NCBI_TaxID=329105; Ochlerotatus taeniorhynchus (Black salt marsh mosquito) (Aedes taeniorhynchus). +OH NCBI_TaxID=7183; Psorophora ferox. +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=16912294; DOI=10.1128/jvi.00464-06; +RA Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., +RA Kutish G.F., Rock D.L.; +RT "Genome of invertebrate iridescent virus type 3 (mosquito iridescent +RT virus)."; +RL J. Virol. 80:8439-8449(2006). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; DQ643392; ABF82038.1; -; Genomic_DNA. +DR RefSeq; YP_654580.1; NC_008187.1. +DR GeneID; 4156257; -. +DR KEGG; vg:4156257; -. +DR Proteomes; UP000001358; Genome. +DR InterPro; IPR045571; DUF5907. +DR Pfam; PF19264; DUF5907; 1. +PE 4: Predicted; +KW Reference proteome. +FT CHAIN 1..347 +FT /note="Uncharacterized protein 008L" +FT /id="PRO_0000377800" +SQ SEQUENCE 347 AA; 37905 MW; 5668FE61005BBE9D CRC64; + MSFKVYDPIA ELIATQFPTS NPDLQIINND VLVVSPHKIT LPMGPQNAGD VTNKAYVDQA + VMSAAVPVAS STTVGTIQMA GDLEGSSGTN PIIAANKITL NKLQKIGPKM VIGNPNSDWN + NTQEIELDSS FRIVDNRLNA GIVPISSTDP NKSNTVIPAP QQNGLFYLDS SGRVWVWAEH + YYKCITPSRY ISKWMGVGDF QELTVGQSVM WDSGRPSIET VSTQGLEVEW ISSTNFTLSS + LYLIPIVVKV TICIPLLGQP DQMAKFVLYS VSSAQQPRTG IVLTTDSSRS SAPIVSEYIT + VNWFEPKSYS VQLKEVNSDS GTTVTICSDK WLANPFLDCW ITIEEVG +// +ID 009L_FRG3G Reviewed; 948 AA. +AC Q6GZW6; +DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. +DT 19-JUL-2004, sequence version 1. +DT 02-OCT-2024, entry version 71. +DE RecName: Full=Putative helicase 009L; +DE EC=3.6.4.-; +GN ORFNames=FV3-009L; +OS Frog virus 3 (isolate Goorha) (FV-3). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3. +OX NCBI_TaxID=654924; +OH NCBI_TaxID=30343; Dryophytes versicolor (chameleon treefrog). +OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). +OH NCBI_TaxID=45438; Lithobates sylvaticus (Wood frog) (Rana sylvatica). +OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; +RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; +RT "Comparative genomic analyses of frog virus 3, type species of the genus +RT Ranavirus (family Iridoviridae)."; +RL Virology 323:70-84(2004). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AY548484; AAT09668.1; -; Genomic_DNA. +DR RefSeq; YP_031587.1; NC_005946.1. +DR GeneID; 2947781; -. +DR KEGG; vg:2947781; -. +DR Proteomes; UP000008770; Segment. +DR GO; GO:0005524; F:ATP binding; IEA:UniProtKB-KW. +DR GO; GO:0003677; F:DNA binding; IEA:InterPro. +DR GO; GO:0004386; F:helicase activity; IEA:UniProtKB-KW. +DR GO; GO:0016787; F:hydrolase activity; IEA:UniProtKB-KW. +DR Gene3D; 3.40.50.300; P-loop containing nucleotide triphosphate hydrolases; 1. +DR Gene3D; 3.40.50.10810; Tandem AAA-ATPase domain; 1. +DR InterPro; IPR006935; Helicase/UvrB_N. +DR InterPro; IPR014001; Helicase_ATP-bd. +DR InterPro; IPR001650; Helicase_C-like. +DR InterPro; IPR027417; P-loop_NTPase. +DR InterPro; IPR038718; SNF2-like_sf. +DR InterPro; IPR050496; SNF2_RAD54_helicase_repair. +DR PANTHER; PTHR45629:SF7; DNA EXCISION REPAIR PROTEIN ERCC-6-RELATED; 1. +DR PANTHER; PTHR45629; SNF2/RAD54 FAMILY MEMBER; 1. +DR Pfam; PF00271; Helicase_C; 1. +DR Pfam; PF04851; ResIII; 1. +DR SMART; SM00487; DEXDc; 1. +DR SMART; SM00490; HELICc; 1. +DR SUPFAM; SSF52540; P-loop containing nucleoside triphosphate hydrolases; 2. +DR PROSITE; PS51192; HELICASE_ATP_BIND_1; 1. +DR PROSITE; PS51194; HELICASE_CTER; 1. +PE 4: Predicted; +KW ATP-binding; Helicase; Hydrolase; Nucleotide-binding; Reference proteome. +FT CHAIN 1..948 +FT /note="Putative helicase 009L" +FT /id="PRO_0000410580" +FT DOMAIN 64..243 +FT /note="Helicase ATP-binding" +FT /evidence="ECO:0000255|PROSITE-ProRule:PRU00541" +FT DOMAIN 371..554 +FT /note="Helicase C-terminal" +FT /evidence="ECO:0000255|PROSITE-ProRule:PRU00542" +FT MOTIF 187..190 +FT /note="DEAH box" +FT BINDING 77..84 +FT /ligand="ATP" +FT /ligand_id="ChEBI:CHEBI:30616" +FT /evidence="ECO:0000255|PROSITE-ProRule:PRU00541" +SQ SEQUENCE 948 AA; 106444 MW; FD4765111B43B4FF CRC64; + MDTSPYDFLK LYPWLSRGEA DKGTLLDAFP GETFEQSLAS DVAMRRAVQD DPAFGHQKLV + ETFLSEDTPY RELLLFHAPG TGKTCTVVSV AERAKEKGLT RGCIVLARGA ALLRNFLHEL + VFNCGTGGRY IPEGYADMGD QERTRKMRKA VSSYYQFRTY ETFAKSVATM SAEAIRARYD + RFVIVMDEVH HLRSVQAEGV NTYSAISRFL RTVRGCVKML LTGTPMTNEP GELADVLNLI + LPQDKTIRPE DGIFSNSGDL LKPDELAERV RGRVSYLKAA RPDAGLTFAG EVLGGTGMTH + LRLVRLEMSA FQSDAYASAW DQDAGDRNIF SNSRQCSLAV MPDRRWGSAA EARNPSQVRR + MAGQNLAEYS VKYDYLVRVA SSSPKTFAYC EYVNGSGLSL LSDILLANGW RRATGRETTP + GKRFALLTAS QKNIHKIVQR FNHEDNVDGA YISLLLGSRV VAEGLTFKEV RHTVILTPHW + NYTETAQAIA RSWRAGSHDR LKARGEAVAV TVHRLVAVPR GRDTPRSIDS DMYAVSEVKD + KRIKAVERIL MTSAADCSLL RSRNLYPSEF DGSRECEYGR CAYRCSNVSV EPGPLPALLG + ASAAEAVAQV RLDGGGDPAI MKVDMSTLWA EVTAGRRYVN RWGDGAVLRA EGGRLELSAP + YGSSEEGRWG DFYKTRNLCY AKMDQDHLRA DDLRDSLPQE VEELLTVSPV ETIGETASAM + PQEVATAILM ACVQARADGK TLNVVRRDAL LDFYKGFYAM GPSGWTVWLH ARGANAKVYD + GRRWNPADED TLEFLAARSA KFTDTRIGYY GLYNPNLKDF CIRDVTQGKR DKVDLRKLTV + GRRCVDWDQR TLVHIVARLM KIDGRRDFMP HATLREMREL AEQDPLHEPS DLTSKEACRR + FLFWTQKGDN KFRRQDICKA MEKWFIENDL MEDNFDCGHQ HKRRGKFA +// +ID 009R_IIV6 Reviewed; 85 AA. +AC Q91G85; +DT 16-JUN-2009, integrated into UniProtKB/Swiss-Prot. +DT 01-DEC-2001, sequence version 1. +DT 22-FEB-2023, entry version 38. +DE RecName: Full=Uncharacterized protein 009R; +DE Flags: Precursor; +GN ORFNames=IIV6-009R; +OS Invertebrate iridescent virus 6 (IIV-6) (Chilo iridescent virus). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Betairidovirinae; Iridovirus. +OX NCBI_TaxID=176652; +OH NCBI_TaxID=6997; Acheta domesticus (House cricket). +OH NCBI_TaxID=168631; Chilo suppressalis (Asiatic rice borer moth). +OH NCBI_TaxID=6999; Gryllus bimaculatus (Two-spotted cricket). +OH NCBI_TaxID=58607; Gryllus campestris. +OH NCBI_TaxID=7108; Spodoptera frugiperda (Fall armyworm). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=11448171; DOI=10.1006/viro.2001.0963; +RA Jakob N.J., Mueller K., Bahr U., Darai G.; +RT "Analysis of the first complete DNA sequence of an invertebrate iridovirus: +RT coding strategy of the genome of Chilo iridescent virus."; +RL Virology 286:182-196(2001). +RN [2] +RP GENOME REANNOTATION. +RX PubMed=17239238; DOI=10.1186/1743-422x-4-11; +RA Eaton H.E., Metcalf J., Penny E., Tcherepanov V., Upton C., Brunetti C.R.; +RT "Comparative genomic analysis of the family Iridoviridae: re-annotating and +RT defining the core set of iridovirus genes."; +RL Virol. J. 4:11-11(2007). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AF303741; AAK81947.1; -; Genomic_DNA. +DR RefSeq; NP_149472.1; NC_003038.1. +DR GeneID; 1733381; -. +DR KEGG; vg:1733381; -. +DR OrthoDB; 37229at10239; -. +DR Proteomes; UP000001359; Genome. +PE 3: Inferred from homology; +KW Reference proteome; Signal. +FT SIGNAL 1..20 +FT /evidence="ECO:0000255" +FT CHAIN 21..85 +FT /note="Uncharacterized protein 009R" +FT /id="PRO_0000377959" +SQ SEQUENCE 85 AA; 9522 MW; 10D019471F22B372 CRC64; + MIKLFCVLAA FISINSACQS SHQQREEFTV ATYHSSSICT TYCYSNCVVA SQHKGLNVES + YTCDKPDPYG RETVCKCTLI KCHDI +// +ID 010R_FRG3G Reviewed; 137 AA. +AC Q6GZW5; +DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. +DT 19-JUL-2004, sequence version 1. +DT 08-NOV-2023, entry version 38. +DE RecName: Full=Uncharacterized protein 010R; +GN ORFNames=FV3-010R; +OS Frog virus 3 (isolate Goorha) (FV-3). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3. +OX NCBI_TaxID=654924; +OH NCBI_TaxID=30343; Dryophytes versicolor (chameleon treefrog). +OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). +OH NCBI_TaxID=45438; Lithobates sylvaticus (Wood frog) (Rana sylvatica). +OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; +RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; +RT "Comparative genomic analyses of frog virus 3, type species of the genus +RT Ranavirus (family Iridoviridae)."; +RL Virology 323:70-84(2004). +CC -!- SUBCELLULAR LOCATION: Host membrane {ECO:0000305}; Single-pass membrane +CC protein {ECO:0000305}. +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AY548484; AAT09669.1; -; Genomic_DNA. +DR RefSeq; YP_031588.1; NC_005946.1. +DR GeneID; 2947782; -. +DR KEGG; vg:2947782; -. +DR Proteomes; UP000008770; Segment. +DR GO; GO:0033644; C:host cell membrane; IEA:UniProtKB-SubCell. +DR GO; GO:0016020; C:membrane; IEA:UniProtKB-KW. +PE 4: Predicted; +KW Host membrane; Membrane; Reference proteome; Transmembrane; +KW Transmembrane helix. +FT CHAIN 1..137 +FT /note="Uncharacterized protein 010R" +FT /id="PRO_0000410534" +FT TRANSMEM 20..42 +FT /note="Helical" +FT /evidence="ECO:0000255" +SQ SEQUENCE 137 AA; 14878 MW; 69F02C3584628958 CRC64; + MKMDTDCRHW IVLASVPVLT VLAFKGEGAL ALAGLLVMAA VAMYRDRTEK KYSAARAPSP + IAGHKTAYVT DPSAFAAGTV PVYPAPSNMG SDRFEGWVGG VLTGVGSSHL DHRKFAERQL + VDRREKMVGY GWTKSFF +// +ID 011L_IIV3 Reviewed; 751 AA. +AC Q197E9; +DT 16-JUN-2009, integrated into UniProtKB/Swiss-Prot. +DT 11-JUL-2006, sequence version 1. +DT 22-FEB-2023, entry version 28. +DE RecName: Full=Uncharacterized protein 011L; +GN ORFNames=IIV3-011L; +OS Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus. +OX NCBI_TaxID=345201; +OH NCBI_TaxID=7163; Aedes vexans (Inland floodwater mosquito) (Culex vexans). +OH NCBI_TaxID=42431; Culex territans. +OH NCBI_TaxID=332058; Culiseta annulata. +OH NCBI_TaxID=310513; Ochlerotatus sollicitans (eastern saltmarsh mosquito). +OH NCBI_TaxID=329105; Ochlerotatus taeniorhynchus (Black salt marsh mosquito) (Aedes taeniorhynchus). +OH NCBI_TaxID=7183; Psorophora ferox. +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=16912294; DOI=10.1128/jvi.00464-06; +RA Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., +RA Kutish G.F., Rock D.L.; +RT "Genome of invertebrate iridescent virus type 3 (mosquito iridescent +RT virus)."; +RL J. Virol. 80:8439-8449(2006). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; DQ643392; ABF82041.1; -; Genomic_DNA. +DR RefSeq; YP_654583.1; NC_008187.1. +DR GeneID; 4156260; -. +DR KEGG; vg:4156260; -. +DR OrthoDB; 3146at10239; -. +DR Proteomes; UP000001358; Genome. +PE 4: Predicted; +KW Reference proteome. +FT CHAIN 1..751 +FT /note="Uncharacterized protein 011L" +FT /id="PRO_0000377801" +FT REGION 73..169 +FT /note="Disordered" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +FT COMPBIAS 82..96 +FT /note="Basic and acidic residues" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +FT COMPBIAS 97..120 +FT /note="Polar residues" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +SQ SEQUENCE 751 AA; 84876 MW; 6C5EA29DA7BDF6F7 CRC64; + MMESPKYKKS TCSVTNLGGT CILPQKGATA PKAKDVSPEL LVNKMDNLCQ DWARTRNEYN + KVHIEQAPTD SYFGVVHSHT PKKKYTSRDS DSEPEATSTR RSATAQRAAN LKSSPVDQWS + TTPPQPQPQP AAPTVKKTCA SSPPAALSVK RTCTSPPPPP VLIDDDTGED AFYDTNDPDI + FYDIENGVSE LETEGPKRPV YYQRNIRYPI DGSVPQESEQ WYDPIDDEFL ASSGDVVSLE + PSPIAAFQPT PPKTVQFVPM PEEIIVPPPP PPKTVVDEGV QAMPYTVDQM IQTDFEESPL + LANVNLRTIP IEEVNPNFSP VLMQDMVRDS FVFGTVAQRV MASQRVKQFF KELIEQDVSL + AGRMCMDSGS PQLNLYNSLM GVKLLYRWRS STTFYRAIVP EIDEPVQVMQ DVLSSSEWAK + FDSQAGIPPK MVYIHYKLLN DLVKTLICPN FQLTHAALVC VDCRPEAVGS DGLQDGRQRR + CSNLVSEYHE MTLEDLFNTI KPADLNAKNI ILSVLFQMLY AVATVQKQFG MGGLFANADS + VHVRRIQPGG FWHYTVNGLR YSVPNYGYLV ILTNFTDVVN YRPDFATTRY FGRRQAKVVP + TRNWYKFVPF TTRYRPFVTV DPITQAKTTA YAPNPPTEGI TINEFYKDSS DLRPSVPVDL + NDMITFPVPE FHLTICRLFS FFSKFYDSNF IGNDPFVRNL VDRYSQPFEF PDVYWPEDGV + SRVLACYTIE EIYPNWVDGD TDYVIESYNL D +// +ID 011R_FRG3G Reviewed; 70 AA. +AC Q6GZW4; +DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. +DT 19-JUL-2004, sequence version 1. +DT 08-NOV-2023, entry version 38. +DE RecName: Full=Uncharacterized protein 011R; +GN ORFNames=FV3-011R; +OS Frog virus 3 (isolate Goorha) (FV-3). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3. +OX NCBI_TaxID=654924; +OH NCBI_TaxID=30343; Dryophytes versicolor (chameleon treefrog). +OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). +OH NCBI_TaxID=45438; Lithobates sylvaticus (Wood frog) (Rana sylvatica). +OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; +RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; +RT "Comparative genomic analyses of frog virus 3, type species of the genus +RT Ranavirus (family Iridoviridae)."; +RL Virology 323:70-84(2004). +RN [2] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RA Tan W.G.H., Barkman T.J., Chinchar V.G., Essani K.; +RL Submitted (FEB-2004) to the EMBL/GenBank/DDBJ databases. +CC -!- SUBCELLULAR LOCATION: Host membrane {ECO:0000305}; Single-pass membrane +CC protein {ECO:0000305}. +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AY548484; AAT09670.1; -; Genomic_DNA. +DR RefSeq; YP_031589.1; NC_005946.1. +DR SMR; Q6GZW4; -. +DR GeneID; 2947783; -. +DR KEGG; vg:2947783; -. +DR Proteomes; UP000008770; Segment. +DR GO; GO:0033644; C:host cell membrane; IEA:UniProtKB-SubCell. +DR GO; GO:0016020; C:membrane; IEA:UniProtKB-KW. +PE 4: Predicted; +KW Host membrane; Membrane; Reference proteome; Transmembrane; +KW Transmembrane helix. +FT CHAIN 1..70 +FT /note="Uncharacterized protein 011R" +FT /id="PRO_0000410547" +FT TRANSMEM 4..24 +FT /note="Helical" +FT /evidence="ECO:0000255" +SQ SEQUENCE 70 AA; 7872 MW; D4A52724E9D798C6 CRC64; + MTSVKTIAML AMLVIVAALI YMGYRTFTSM QSKLNELESR VNAPQLRPPV MSPIVPLNFI + ESEDLDKELD +// +ID 012L_FRG3G Reviewed; 297 AA. +AC Q6GZW3; +DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. +DT 19-JUL-2004, sequence version 1. +DT 08-NOV-2023, entry version 36. +DE RecName: Full=Uncharacterized protein 012L; +GN ORFNames=FV3-012L; +OS Frog virus 3 (isolate Goorha) (FV-3). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3. +OX NCBI_TaxID=654924; +OH NCBI_TaxID=30343; Dryophytes versicolor (chameleon treefrog). +OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). +OH NCBI_TaxID=45438; Lithobates sylvaticus (Wood frog) (Rana sylvatica). +OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; +RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; +RT "Comparative genomic analyses of frog virus 3, type species of the genus +RT Ranavirus (family Iridoviridae)."; +RL Virology 323:70-84(2004). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AY548484; AAT09671.1; -; Genomic_DNA. +DR RefSeq; YP_031590.1; NC_005946.1. +DR SMR; Q6GZW3; -. +DR GeneID; 2947784; -. +DR KEGG; vg:2947784; -. +DR Proteomes; UP000008770; Segment. +DR InterPro; IPR043872; DUF5832. +DR Pfam; PF19150; DUF5832; 1. +PE 4: Predicted; +KW Reference proteome. +FT CHAIN 1..297 +FT /note="Uncharacterized protein 012L" +FT /id="PRO_0000410530" +SQ SEQUENCE 297 AA; 32669 MW; 9B1D9247FF7E5D25 CRC64; + MCAKLVEMAF GPVNADSPPL TAEEKESAVE KLVGSKPFPA LKKKYHDKVP AQDPKYCLFS + FVEVLPSCDI KAAGAEEMCS CCIKRRRGQV FGVACVRGTA HTLAKAKQKA DKLVGDYDSV + HVVQTCHVGR PFPLVSSGMA QETVAPSAME AAEAAMDAKS AEKRKERMRQ KLEMRKREQE + IKARNRKLLE DPSCDPDAEE ETDLERYATL RVKTTCLLEN AKNASAQIKE YLASMRKSAE + AVVAMEAADP TLVENYPGLI RDSRAKMGVS KQDTEAFLKM SSFDCLTAAS ELETMGF +// +ID 013L_IIV3 Reviewed; 90 AA. +AC Q197E7; +DT 16-JUN-2009, integrated into UniProtKB/Swiss-Prot. +DT 11-JUL-2006, sequence version 1. +DT 22-FEB-2023, entry version 37. +DE RecName: Full=Uncharacterized protein IIV3-013L; +GN ORFNames=IIV3-013L; +OS Invertebrate iridescent virus 3 (IIV-3) (Mosquito iridescent virus). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Betairidovirinae; Chloriridovirus. +OX NCBI_TaxID=345201; +OH NCBI_TaxID=7163; Aedes vexans (Inland floodwater mosquito) (Culex vexans). +OH NCBI_TaxID=42431; Culex territans. +OH NCBI_TaxID=332058; Culiseta annulata. +OH NCBI_TaxID=310513; Ochlerotatus sollicitans (eastern saltmarsh mosquito). +OH NCBI_TaxID=329105; Ochlerotatus taeniorhynchus (Black salt marsh mosquito) (Aedes taeniorhynchus). +OH NCBI_TaxID=7183; Psorophora ferox. +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=16912294; DOI=10.1128/jvi.00464-06; +RA Delhon G., Tulman E.R., Afonso C.L., Lu Z., Becnel J.J., Moser B.A., +RA Kutish G.F., Rock D.L.; +RT "Genome of invertebrate iridescent virus type 3 (mosquito iridescent +RT virus)."; +RL J. Virol. 80:8439-8449(2006). +CC -!- SUBCELLULAR LOCATION: Host membrane {ECO:0000305}; Single-pass membrane +CC protein {ECO:0000305}. +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; DQ643392; ABF82043.1; -; Genomic_DNA. +DR RefSeq; YP_654585.1; NC_008187.1. +DR SMR; Q197E7; -. +DR GeneID; 4156262; -. +DR KEGG; vg:4156262; -. +DR OrthoDB; 21455at10239; -. +DR Proteomes; UP000001358; Genome. +DR GO; GO:0033644; C:host cell membrane; IEA:UniProtKB-SubCell. +DR GO; GO:0016020; C:membrane; IEA:UniProtKB-KW. +PE 4: Predicted; +KW Host membrane; Membrane; Reference proteome; Transmembrane; +KW Transmembrane helix. +FT CHAIN 1..90 +FT /note="Uncharacterized protein IIV3-013L" +FT /id="PRO_0000377942" +FT TRANSMEM 52..72 +FT /note="Helical" +FT /evidence="ECO:0000255" +FT REGION 13..34 +FT /note="Disordered" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +SQ SEQUENCE 90 AA; 10851 MW; 077C22D16315DB07 CRC64; + MYYRDQYGNV KYAPEGMGPH HAASSSHHSA QHHHMTKENF SMDDVHSWFE KYKMWFLYAL + ILALIFGVFM WWSKYNHDKK RSLNTASIFY +// +ID 013R_FRG3G Reviewed; 68 AA. +AC Q6GZW2; +DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. +DT 19-JUL-2004, sequence version 1. +DT 08-NOV-2023, entry version 31. +DE RecName: Full=Uncharacterized protein 013R; +GN ORFNames=FV3-013R; +OS Frog virus 3 (isolate Goorha) (FV-3). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3. +OX NCBI_TaxID=654924; +OH NCBI_TaxID=30343; Dryophytes versicolor (chameleon treefrog). +OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). +OH NCBI_TaxID=45438; Lithobates sylvaticus (Wood frog) (Rana sylvatica). +OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; +RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; +RT "Comparative genomic analyses of frog virus 3, type species of the genus +RT Ranavirus (family Iridoviridae)."; +RL Virology 323:70-84(2004). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AY548484; AAT09672.1; -; Genomic_DNA. +DR RefSeq; YP_031591.1; NC_005946.1. +DR GeneID; 2947785; -. +DR KEGG; vg:2947785; -. +DR Proteomes; UP000008770; Segment. +PE 4: Predicted; +KW Reference proteome. +FT CHAIN 1..68 +FT /note="Uncharacterized protein 013R" +FT /id="PRO_0000410562" +SQ SEQUENCE 68 AA; 7549 MW; D0C9713859A73203 CRC64; + MANSVAFSSM TWYSPLASDN LYDICVDKVH NRVLCLCHSF GCCTNAVVIW ILPSFDEFTP + QTLSCKGP +// +ID 014R_FRG3G Reviewed; 119 AA. +AC Q6GZW1; +DT 28-JUN-2011, integrated into UniProtKB/Swiss-Prot. +DT 19-JUL-2004, sequence version 1. +DT 08-NOV-2023, entry version 36. +DE RecName: Full=Uncharacterized protein 014R; +GN ORFNames=FV3-014R; +OS Frog virus 3 (isolate Goorha) (FV-3). +OC Viruses; Varidnaviria; Bamfordvirae; Nucleocytoviricota; Megaviricetes; +OC Pimascovirales; Iridoviridae; Alphairidovirinae; Ranavirus; Frog virus 3. +OX NCBI_TaxID=654924; +OH NCBI_TaxID=30343; Dryophytes versicolor (chameleon treefrog). +OH NCBI_TaxID=8404; Lithobates pipiens (Northern leopard frog) (Rana pipiens). +OH NCBI_TaxID=45438; Lithobates sylvaticus (Wood frog) (Rana sylvatica). +OH NCBI_TaxID=8316; Notophthalmus viridescens (Eastern newt) (Triturus viridescens). +RN [1] +RP NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]. +RX PubMed=15165820; DOI=10.1016/j.virol.2004.02.019; +RA Tan W.G., Barkman T.J., Gregory Chinchar V., Essani K.; +RT "Comparative genomic analyses of frog virus 3, type species of the genus +RT Ranavirus (family Iridoviridae)."; +RL Virology 323:70-84(2004). +CC --------------------------------------------------------------------------- +CC Copyrighted by the UniProt Consortium, see https://www.uniprot.org/terms +CC Distributed under the Creative Commons Attribution (CC BY 4.0) License +CC --------------------------------------------------------------------------- +DR EMBL; AY548484; AAT09673.1; -; Genomic_DNA. +DR RefSeq; YP_031592.1; NC_005946.1. +DR SMR; Q6GZW1; -. +DR GeneID; 2947786; -. +DR KEGG; vg:2947786; -. +DR Proteomes; UP000008770; Segment. +PE 4: Predicted; +KW Coiled coil; Reference proteome. +FT CHAIN 1..119 +FT /note="Uncharacterized protein 014R" +FT /id="PRO_0000410559" +FT REGION 95..119 +FT /note="Disordered" +FT /evidence="ECO:0000256|SAM:MobiDB-lite" +FT COILED 6..36 +FT /evidence="ECO:0000255" +SQ SEQUENCE 119 AA; 13358 MW; C15D333E848F54B1 CRC64; + METLVQAYLD IQGKIAEFRR EIKALRVEEK AITANLFEAM GEAGVESIRI SEDRYLVAEE + KPKRTRSKQQ FYQAAEGEGF TQEDVDRLMS LSRGAVTGSS SNVKIRKSAP ARNEEDDDG +// diff --git a/uniprot_utils/uniprot_textFile_parser.py b/uniprot_utils/uniprot_textFile_parser.py new file mode 100644 index 0000000..bd5b51a --- /dev/null +++ b/uniprot_utils/uniprot_textFile_parser.py @@ -0,0 +1,130 @@ +import pandas as pd +import hashlib +import ast # Safe evaluation of strings to Python literals +import uuid # For generating UUIDs + +#Janaka E +# Function to calculate SHA256 hash for sequences +def calculate_hash(sequence): + return hashlib.sha256(sequence.encode('utf-8')).hexdigest() + +# Function to generate UUID +def generate_uuid(): + return str(uuid.uuid4()) + +# Load the parsed_swissprot_data.tsv file - This is the parsed tsv file that was genrated based on the Uniprot data +input_file = "Full_parsed_trembl_data.tsv" +data = pd.read_csv(input_file, sep='\t') + +# Calculate SHA256 hash for sequences +data['hash'] = data['Sequence'].apply(calculate_hash) + +# Remove duplicate sequences based on the hash +unique_data = data.drop_duplicates(subset=['hash']) + +# Map UUIDs to each protein +hash_to_uuid_map = {hash_val: generate_uuid() for hash_val in unique_data['hash']} + +# 1. Generate the 'protein' table +protein_table = pd.DataFrame({ + 'protein_id': unique_data['hash'].map(hash_to_uuid_map), + 'name': unique_data['Entry'], + 'length': unique_data['Length'], + 'sequence': unique_data['Sequence'], + 'hash': unique_data['hash'], + 'description': unique_data['Protein names'] +}) + +# Save the 'protein' table as Parquet +protein_table.to_parquet("protein_table.parquet", index=False) + +# 2. Generate the 'name' table +def extract_source(gene_names): + if pd.isna(gene_names) or not gene_names.strip(): + return "NULL" + try: + parsed_data = ast.literal_eval(gene_names) # Safe evaluation + if isinstance(parsed_data, dict) and 'ORFNames' in parsed_data: + orf_names = parsed_data.get('ORFNames', []) + if isinstance(orf_names, list) and len(orf_names) > 0: + return orf_names[0] + return "NULL" + except (ValueError, SyntaxError): + return "NULL" + +name_table = pd.DataFrame({ + 'protein_id': unique_data['hash'].map(hash_to_uuid_map), + 'name': unique_data['Entry'], + 'entry': unique_data['Entry Name'], + 'source': unique_data['Gene Names'].apply(extract_source), + 'description': unique_data['Protein names'] +}) + +# Save the 'name' table as Parquet +name_table.to_parquet("name_table.parquet", index=False) + +# 3. Generate the 'identifier' table +identifier_table = pd.DataFrame({ + 'protein_id': unique_data['hash'].map(hash_to_uuid_map), + 'identifier': unique_data['Entry Name'], + 'source': unique_data['Entry'].apply(lambda x: f"https://www.uniprot.org/uniprotkb/{x}/entry"), + 'description': unique_data['Protein names'] +}) + +# Save the 'identifier' table as Parquet +identifier_table.to_parquet("identifier_table.parquet", index=False) + +# 4. Generate the 'association' table +def parse_ontologies(row): + ontologies = [] + if not pd.isna(row['KEGG']): + ontologies.append(f"KEGG: {row['KEGG']}") + if not pd.isna(row['GO']): + go_terms = [term.split(' ')[0] for term in row['GO'].split('; ') if term] + ontologies.extend(go_terms) + return ontologies + +association_data = [] +for _, row in unique_data.iterrows(): + protein_id = hash_to_uuid_map[row['hash']] + ontologies = parse_ontologies(row) + for ontology in ontologies: + association_data.append({ + 'subject': protein_id, + 'ontology_id': ontology, + 'publications': row['Publications'] if not pd.isna(row['Publications']) else "NULL", + 'evidence_type': row['Evidence Codes'] if not pd.isna(row['Evidence Codes']) else "NULL" + }) + +association_table = pd.DataFrame(association_data) + +# Save the 'association' table as Parquet +association_table.to_parquet("association_table.parquet", index=False) + +# 5. Generate the 'feature_x_protein' table +feature_x_protein_data = [] + +for _, row in unique_data.iterrows(): + protein_id = hash_to_uuid_map[row['hash']] + gene_ids = row['GeneID'] if not pd.isna(row['GeneID']) else "NULL" + + if gene_ids != "NULL": + for gene_id in gene_ids.split("; "): # Handle multiple GeneIDs + feature_x_protein_data.append({ + 'protein_id': protein_id, + 'feature_id': gene_id.strip(), + 'protocol_id': "SwissProt/NCBI" + }) + else: + feature_x_protein_data.append({ + 'protein_id': protein_id, + 'feature_id': "NULL", + 'protocol_id': "SwissProt/NCBI" + }) + +feature_x_protein_table = pd.DataFrame(feature_x_protein_data) + +# Save the 'feature_x_protein' table as Parquet +feature_x_protein_table.to_parquet("feature_x_protein.parquet", index=False) + +print("Tables generated and saved as Parquet: protein_table.parquet, name_table.parquet, identifier_table.parquet, association_table.parquet, feature_x_protein.parquet")