From c1a03a1860fdbecab96e912688f1a1f39e2d0c7e Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Thu, 20 Mar 2025 15:30:46 +0000 Subject: [PATCH 1/9] added helix regions beta regions activet site --- src/pyeed/adapter/uniprot_mapper.py | 80 +++++++++++++++++++++++++++-- src/pyeed/model.py | 4 ++ 2 files changed, 79 insertions(+), 5 deletions(-) diff --git a/src/pyeed/adapter/uniprot_mapper.py b/src/pyeed/adapter/uniprot_mapper.py index 5a285adb..98604bfe 100644 --- a/src/pyeed/adapter/uniprot_mapper.py +++ b/src/pyeed/adapter/uniprot_mapper.py @@ -12,6 +12,7 @@ GOAnnotation, Organism, Protein, + Region, Site, ) @@ -59,26 +60,95 @@ def add_to_db(self, response: Response) -> None: protein.organism.connect(organism) self.add_sites(record, protein) + self.add_regions(record, protein) self.add_catalytic_activity(record, protein) self.add_go(record, protein) def add_sites(self, record: dict[str, Any], protein: Protein) -> None: - ligand_dict: dict[str, list[int]] = defaultdict(list) + data_dict: dict[str, list[int]] = defaultdict(list) for feature in record.get("features", []): if feature["type"] == "BINDING": for position in range(int(feature["begin"]), int(feature["end"]) + 1): - ligand_dict[feature["ligand"]["name"]].append(position) + data_dict[feature["ligand"]["name"] + "$binding"].append(position) + elif feature["type"] == "ACT_SITE": + for position in range(int(feature["begin"]), int(feature["end"]) + 1): + data_dict[feature["category"] + "$site"].append(position) + + for entry, positions in data_dict.items(): + if entry.split("$")[1] == "binding": + annotation = Annotation.BINDING_SITE.value + elif entry.split("$")[1] == "site": + annotation = Annotation.ACTIVE_SITE.value - for ligand, positions in ligand_dict.items(): site = Site( - name=ligand, - annotation=Annotation.BINDING_SITE.value, + name=entry.split("$")[0], + annotation=annotation, ) site.save() protein.site.connect(site, {"positions": positions}) + def add_regions(self, record: dict[str, Any], protein: Protein) -> None: + data_list: list[tuple[str, tuple[int, int]]] = [] + + for feature in record.get("features", []): + if feature["type"] == "HELIX": + data_list.append( + ( + feature["category"] + "$helix", + (int(feature["begin"]), int(feature["end"])), + ) + ) + elif feature["type"] == "STRAND": + data_list.append( + ( + feature["category"] + "$strand", + (int(feature["begin"]), int(feature["end"])), + ) + ) + elif feature["type"] == "TURN": + data_list.append( + ( + feature["category"] + "$turn", + (int(feature["begin"]), int(feature["end"])), + ) + ) + elif feature["type"] == "SIGNAL": + data_list.append( + ( + feature["category"] + "$signal", + (int(feature["begin"]), int(feature["end"])), + ) + ) + elif feature["type"] == "PROPEP": + data_list.append( + ( + feature["category"] + "$propep", + (int(feature["begin"]), int(feature["end"])), + ) + ) + + for name, positions in data_list: + if name.split("$")[1] == "helix": + annotation = Annotation.ALPHAHELIX.value + elif name.split("$")[1] == "strand": + annotation = Annotation.BETASTRAND.value + elif name.split("$")[1] == "turn": + annotation = Annotation.TURN.value + elif name.split("$")[1] == "signal": + annotation = Annotation.SIGNAL.value + elif name.split("$")[1] == "propep": + annotation = Annotation.PROPEP.value + + region = Region( + name=name, + annotation=annotation, + ) + region.save() + + protein.region.connect(region, {"start": positions[0], "end": positions[1]}) + def add_catalytic_activity(self, record: dict[str, Any], protein: Protein) -> None: try: for reference in record["comments"]: diff --git a/src/pyeed/model.py b/src/pyeed/model.py index aa374669..48991048 100644 --- a/src/pyeed/model.py +++ b/src/pyeed/model.py @@ -140,6 +140,9 @@ class Annotation(Enum): FAMILY = "family" MOTIVE = "motive" PROTEIN = "protein" + TURN = "turn" + SIGNAL = "signal" + PROPEP = "propep" class Organism(StrictStructuredNode): @@ -296,6 +299,7 @@ class Site(StrictStructuredNode): class Region(StrictStructuredNode): region_id = UniqueIdProperty() + name = StringProperty() annotation = StringProperty( choices=[(e.value, e.name) for e in Annotation], required=True ) From 1386e0ecce47e045892f8a5b8c319fd9173d3b62 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Tue, 25 Mar 2025 08:46:38 +0000 Subject: [PATCH 2/9] update esm3 calbilities for last layer --- docs/usage/embeddings_analysis.ipynb | 969 ++++----------------------- src/pyeed/embedding.py | 103 ++- 2 files changed, 222 insertions(+), 850 deletions(-) diff --git a/docs/usage/embeddings_analysis.ipynb b/docs/usage/embeddings_analysis.ipynb index 65a2398c..e609174f 100644 --- a/docs/usage/embeddings_analysis.ipynb +++ b/docs/usage/embeddings_analysis.ipynb @@ -18,9 +18,18 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/nab/anaconda3/envs/pyeed_niklas/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "import sys\n", "\n", @@ -47,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -68,7 +77,7 @@ }, { "cell_type": "code", - "execution_count": 70, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -85,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -109,836 +118,84 @@ }, { "cell_type": "code", - "execution_count": 72, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2025-02-07 15:21:19.142\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mFound 0 sequences in the database.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:19.143\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m89\u001b[0m - \u001b[1mFetching 68 sequences from ncbi_protein.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:19.167\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mStarting requests for 7 batches.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:19.168\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m142\u001b[0m - \u001b[34m\u001b[1mPrepared 7 request payloads.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:19.170\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36m_fetch_response\u001b[0m:\u001b[36m121\u001b[0m - \u001b[34m\u001b[1mSending request to https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi with parameters: {'retmode': 'text', 'rettype': 'genbank', 'db': 'protein', 'id': 'AAP20891.1,CAJ85677.1,SAQ02853.1,CDR98216.1,WP_109963600.1,CAA41038.1,WP_109874025.1,CAA46344.1,APG33178.1,AKC98298.1'}\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:19.671\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36m_fetch_response\u001b[0m:\u001b[36m121\u001b[0m - \u001b[34m\u001b[1mSending request to https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi with parameters: {'retmode': 'text', 'rettype': 'genbank', 'db': 'protein', 'id': 'KJO56189.1,KLP91446.1,CAA46346.1,CAA74912.2,AFN21551.1,ACB22021.1,CAA76794.1,CAA76795.1,CCG28759.1,KLG19745.1'}\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:20.171\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36m_fetch_response\u001b[0m:\u001b[36m121\u001b[0m - \u001b[34m\u001b[1mSending request to https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi with parameters: {'retmode': 'text', 'rettype': 'genbank', 'db': 'protein', 'id': 'AAC32891.1,CAA76796.1,CAD24670.1,ARF45649.1,CTA52364.1,ADL13944.1,AGQ50511.1,AKA60778.1,APT65830.1,HAH6232254.1'}\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:20.672\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36m_fetch_response\u001b[0m:\u001b[36m121\u001b[0m - \u001b[34m\u001b[1mSending request to https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi with parameters: {'retmode': 'text', 'rettype': 'genbank', 'db': 'protein', 'id': 'QDO66746.1,CBX53726.1,AAC32889.2,CAA64682.1,CAA71322.1,CAA71323.1,CAA71324.1,AEC32455.1,AAD22538.1,AAD22539.1'}\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:20.749\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAP20891.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:20.750\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAP20891.1: Taxonomy ID: 470\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:20.751\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAP20891.1: Mapped organism 'Acinetobacter baumannii' with taxonomy ID 470.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:20.787\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAP20891.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:20.788\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAP20891.1: Mapped protein with name 'beta-lactamase TEM-1' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:20.900\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAP20891.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:20.901\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAP20891.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:20.902\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAP20891.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:20.902\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:20.940\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAP20891.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:20.999\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAP20891.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.000\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAP20891.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.001\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAJ85677.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.001\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAJ85677.1: Taxonomy ID: 35419\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.002\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAJ85677.1: Mapped organism 'Birmingham IncP-alpha plasmid' with taxonomy ID 35419.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.007\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAJ85677.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.008\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAJ85677.1: Mapped protein with name 'beta lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.018\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAJ85677.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.018\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAJ85677.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.019\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAJ85677.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.019\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.024\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAJ85677.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.061\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAJ85677.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.061\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAJ85677.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.062\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record SAQ02853.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.063\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord SAQ02853.1: Taxonomy ID: 571\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.063\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord SAQ02853.1: Mapped organism 'Klebsiella oxytoca' with taxonomy ID 571.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.068\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord SAQ02853.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.069\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord SAQ02853.1: Mapped protein with name 'beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.078\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord SAQ02853.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.079\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein SAQ02853.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.080\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord SAQ02853.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.080\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.113\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord SAQ02853.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.123\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein SAQ02853.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.124\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein SAQ02853.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.124\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CDR98216.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.125\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CDR98216.1: Taxonomy ID: 573\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.126\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CDR98216.1: Mapped organism 'Klebsiella pneumoniae' with taxonomy ID 573.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.131\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CDR98216.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.131\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CDR98216.1: Mapped protein with name 'beta-lactamase TEM-4' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.142\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CDR98216.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.143\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CDR98216.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.144\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CDR98216.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.144\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.149\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CDR98216.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.159\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CDR98216.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.160\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CDR98216.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.160\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record WP_109963600.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.161\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord WP_109963600.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.162\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord WP_109963600.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.167\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord WP_109963600.1: Mapped protein with name 'extended-spectrum class A beta-lactamase TEM-5' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.206\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord WP_109963600.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.207\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein WP_109963600.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.207\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m174\u001b[0m - \u001b[34m\u001b[1mRecord WP_109963600.1: No CDS feature found.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.208\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord WP_109963600.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.218\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein WP_109963600.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.219\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109963600.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.220\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAA41038.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.220\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAA41038.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.221\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAA41038.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.265\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAA41038.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.265\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAA41038.1: Mapped protein with name 'beta lactamase TEM6' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.277\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAA41038.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.278\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAA41038.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.278\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAA41038.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.279\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.284\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAA41038.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.295\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAA41038.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.296\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA41038.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.296\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record WP_109874025.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.297\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord WP_109874025.1: Taxonomy ID: 543\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.298\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord WP_109874025.1: Mapped organism 'Enterobacteriaceae' with taxonomy ID 543.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.302\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord WP_109874025.1: Mapped protein with name 'extended-spectrum class A beta-lactamase TEM-7' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.314\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord WP_109874025.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.314\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein WP_109874025.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.315\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m174\u001b[0m - \u001b[34m\u001b[1mRecord WP_109874025.1: No CDS feature found.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.316\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord WP_109874025.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.327\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein WP_109874025.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.328\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109874025.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.329\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAA46344.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.329\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAA46344.1: Taxonomy ID: 573\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.330\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAA46344.1: Mapped organism 'Klebsiella pneumoniae' with taxonomy ID 573.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.336\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAA46344.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.337\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAA46344.1: Mapped protein with name 'extended spectrum beta-lactamase CAZ-2' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.349\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAA46344.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.350\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAA46344.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.350\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAA46344.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.351\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.356\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAA46344.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.367\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAA46344.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.368\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46344.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.369\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record APG33178.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.369\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord APG33178.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.370\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord APG33178.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.376\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord APG33178.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.377\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord APG33178.1: Mapped protein with name 'class A extended-spectrum beta-lactamase TEM-9' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.389\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord APG33178.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.389\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein APG33178.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.390\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord APG33178.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.390\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.395\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord APG33178.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.407\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein APG33178.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.408\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APG33178.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.408\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AKC98298.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.409\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AKC98298.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.410\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AKC98298.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.417\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AKC98298.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.418\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AKC98298.1: Mapped protein with name 'TEM-1' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.429\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AKC98298.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.430\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AKC98298.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.430\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AKC98298.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.431\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.482\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AKC98298.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.494\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AKC98298.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.495\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKC98298.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.502\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36m_fetch_response\u001b[0m:\u001b[36m121\u001b[0m - \u001b[34m\u001b[1mSending request to https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi with parameters: {'retmode': 'text', 'rettype': 'genbank', 'db': 'protein', 'id': 'ABB97007.1,ACJ43254.1,AAC05975.1,BCD58813.1,AAK17194.1,AAD33116.2,CAB92324.1,AAL03985.1,AAF19151.1,AAF05613.1'}\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.509\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record KJO56189.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.509\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord KJO56189.1: Taxonomy ID: 299766\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.510\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord KJO56189.1: Mapped organism 'Enterobacter hormaechei subsp. steigerwaltii' with taxonomy ID 299766.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.519\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord KJO56189.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.519\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord KJO56189.1: Mapped protein with name 'beta-lactamase TEM' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.541\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord KJO56189.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.542\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein KJO56189.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.542\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord KJO56189.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.543\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.547\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord KJO56189.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.567\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein KJO56189.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.568\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KJO56189.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.568\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record KLP91446.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.569\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord KLP91446.1: Taxonomy ID: 1812934\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.569\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord KLP91446.1: Mapped organism 'Enterobacter hormaechei subsp. hoffmannii' with taxonomy ID 1812934.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.574\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord KLP91446.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.575\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord KLP91446.1: Mapped protein with name 'class A beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.586\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord KLP91446.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.587\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein KLP91446.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.587\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord KLP91446.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.588\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.592\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord KLP91446.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.608\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein KLP91446.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.609\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLP91446.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.609\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAA46346.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.610\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAA46346.1: Taxonomy ID: 573\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.610\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAA46346.1: Mapped organism 'Klebsiella pneumoniae' with taxonomy ID 573.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.616\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAA46346.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.617\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAA46346.1: Mapped protein with name 'extended spectrum beta-lactamase CAZ-7' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.631\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAA46346.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.632\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAA46346.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.633\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAA46346.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.633\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.638\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAA46346.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.648\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAA46346.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46346.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.649\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAA74912.2\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.650\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAA74912.2: Taxonomy ID: 1018\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.650\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAA74912.2: Mapped organism 'Capnocytophaga ochracea' with taxonomy ID 1018.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.655\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAA74912.2: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.656\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAA74912.2: Mapped protein with name 'beta-lactamase class A' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.666\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAA74912.2: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.667\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAA74912.2.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.668\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAA74912.2: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.668\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.673\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAA74912.2: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.683\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAA74912.2.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.684\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA74912.2 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.686\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AFN21551.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.686\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AFN21551.1: Taxonomy ID: 470\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.687\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AFN21551.1: Mapped organism 'Acinetobacter baumannii' with taxonomy ID 470.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.692\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AFN21551.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.693\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AFN21551.1: Mapped protein with name 'beta-lactamase TEM-19' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.703\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AFN21551.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.704\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AFN21551.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.705\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AFN21551.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.705\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.710\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AFN21551.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.720\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AFN21551.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AFN21551.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.721\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record ACB22021.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.722\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord ACB22021.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.722\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord ACB22021.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.728\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord ACB22021.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.729\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord ACB22021.1: Mapped protein with name 'extended spectrum beta-lactamase blaTEM-20' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.741\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord ACB22021.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.742\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein ACB22021.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.742\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord ACB22021.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.743\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.747\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord ACB22021.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.757\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein ACB22021.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.758\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACB22021.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.758\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAA76794.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.759\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAA76794.1: Taxonomy ID: 573\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.759\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAA76794.1: Mapped organism 'Klebsiella pneumoniae' with taxonomy ID 573.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.766\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAA76794.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.766\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAA76794.1: Mapped protein with name 'beta-lactamase class A' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.777\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAA76794.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.778\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAA76794.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.778\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAA76794.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.779\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.790\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAA76794.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.804\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAA76794.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.805\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76794.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.805\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAA76795.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.806\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAA76795.1: Taxonomy ID: 573\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.807\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAA76795.1: Mapped organism 'Klebsiella pneumoniae' with taxonomy ID 573.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.813\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAA76795.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.814\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAA76795.1: Mapped protein with name 'beta-lactamase class A' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.825\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAA76795.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.826\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAA76795.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.826\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAA76795.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.827\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.832\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAA76795.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.843\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAA76795.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.843\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76795.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.844\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CCG28759.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.844\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CCG28759.1: Taxonomy ID: 935296\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.845\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CCG28759.1: Mapped organism 'Klebsiella aerogenes EA1509E' with taxonomy ID 935296.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.849\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CCG28759.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.850\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CCG28759.1: Mapped protein with name 'Beta-lactamase (EC 3.5.2.6)' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.862\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CCG28759.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.862\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CCG28759.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.863\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CCG28759.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.864\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.914\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CCG28759.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.925\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CCG28759.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.926\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CCG28759.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.926\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record KLG19745.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.927\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord KLG19745.1: Taxonomy ID: 208224\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.927\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord KLG19745.1: Mapped organism 'Enterobacter kobei' with taxonomy ID 208224.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.932\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord KLG19745.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.933\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord KLG19745.1: Mapped protein with name 'beta-lactamase TEM' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.944\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord KLG19745.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.945\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein KLG19745.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.945\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord KLG19745.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.946\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.951\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord KLG19745.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.962\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein KLG19745.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.963\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLG19745.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.970\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAC32891.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.971\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAC32891.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.971\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAC32891.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.977\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAC32891.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.978\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAC32891.1: Mapped protein with name 'beta-lactamase TEM-28' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.990\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAC32891.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.991\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAC32891.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.991\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAC32891.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.992\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:21.997\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAC32891.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.007\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAC32891.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.008\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32891.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.008\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAA76796.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.009\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAA76796.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.010\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAA76796.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.021\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAA76796.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.022\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAA76796.1: Mapped protein with name 'beta-lactamase class A' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.033\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAA76796.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.034\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAA76796.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.035\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAA76796.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.035\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.041\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAA76796.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.051\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAA76796.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.052\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76796.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.053\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAD24670.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.053\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAD24670.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.054\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAD24670.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.060\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAD24670.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.061\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAD24670.1: Mapped protein with name 'beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.073\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAD24670.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.074\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAD24670.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.074\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAD24670.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.075\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.080\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAD24670.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.091\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAD24670.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.091\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAD24670.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.092\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record ARF45649.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.092\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord ARF45649.1: Taxonomy ID: 32630\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.093\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord ARF45649.1: Mapped organism 'synthetic construct' with taxonomy ID 32630.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.098\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord ARF45649.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.098\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord ARF45649.1: Mapped protein with name 'beta-lactamase TEM-1 variant' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.109\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord ARF45649.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.110\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein ARF45649.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.110\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord ARF45649.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.111\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.116\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord ARF45649.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.127\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein ARF45649.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.128\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ARF45649.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.129\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CTA52364.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.129\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CTA52364.1: Taxonomy ID: 624\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.130\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CTA52364.1: Mapped organism 'Shigella sonnei' with taxonomy ID 624.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.135\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CTA52364.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.136\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CTA52364.1: Mapped protein with name 'Beta-lactamase TEM precursor' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.147\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CTA52364.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.148\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CTA52364.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.148\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CTA52364.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.149\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.154\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CTA52364.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.165\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CTA52364.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.166\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CTA52364.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.166\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record ADL13944.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.167\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord ADL13944.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.168\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord ADL13944.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.174\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord ADL13944.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.175\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord ADL13944.1: Mapped protein with name 'TEM-33' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.187\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord ADL13944.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.188\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein ADL13944.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.188\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord ADL13944.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.189\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.193\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord ADL13944.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.205\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein ADL13944.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.206\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ADL13944.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.206\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AGQ50511.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.207\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AGQ50511.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.207\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AGQ50511.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.213\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AGQ50511.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.214\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AGQ50511.1: Mapped protein with name 'beta lactamase blaTEM-34' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.225\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AGQ50511.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.225\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AGQ50511.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.226\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AGQ50511.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.226\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.231\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AGQ50511.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.242\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AGQ50511.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.243\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AGQ50511.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.244\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AKA60778.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.244\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AKA60778.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.245\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AKA60778.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.252\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AKA60778.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.253\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AKA60778.1: Mapped protein with name 'beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.263\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AKA60778.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.264\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AKA60778.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.265\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AKA60778.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.266\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.270\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AKA60778.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.281\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AKA60778.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.282\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKA60778.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.282\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record APT65830.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.283\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord APT65830.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.284\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord APT65830.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.290\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord APT65830.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.291\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord APT65830.1: Mapped protein with name 'inhibitor-resistant class A broad-spectrum beta-lactamase TEM-36' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.303\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord APT65830.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.303\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein APT65830.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.304\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord APT65830.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.304\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.309\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord APT65830.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.320\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein APT65830.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.321\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APT65830.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.321\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record HAH6232254.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.322\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord HAH6232254.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.322\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord HAH6232254.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.328\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord HAH6232254.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.329\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord HAH6232254.1: Mapped protein with name 'TEM family class A beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.340\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord HAH6232254.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.341\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein HAH6232254.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.341\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord HAH6232254.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.342\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.391\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord HAH6232254.1: Mapped 0 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.392\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 0 region(s) to protein HAH6232254.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.392\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein HAH6232254.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.398\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36m_fetch_response\u001b[0m:\u001b[36m121\u001b[0m - \u001b[34m\u001b[1mSending request to https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi with parameters: {'retmode': 'text', 'rettype': 'genbank', 'db': 'protein', 'id': 'AAF05614.1,AAF05612.1,AAF05611.1,AAM15527.1,AAL29433.1,AAL29434.1,AAL29435.1,AAL29436.1,CAC43229.1,CAC43230.1'}\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.495\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record ABB97007.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.495\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord ABB97007.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.496\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord ABB97007.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.503\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord ABB97007.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.504\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord ABB97007.1: Mapped protein with name 'beta-lactamase TEM-55' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.516\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord ABB97007.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.517\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein ABB97007.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.517\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord ABB97007.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.518\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.522\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord ABB97007.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.532\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein ABB97007.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.533\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ABB97007.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.534\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record ACJ43254.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.534\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord ACJ43254.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.535\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord ACJ43254.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.542\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord ACJ43254.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.542\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord ACJ43254.1: Mapped protein with name 'TEM-57 beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.555\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord ACJ43254.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.555\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein ACJ43254.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.556\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord ACJ43254.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.557\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.561\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord ACJ43254.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.572\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein ACJ43254.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.573\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACJ43254.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.573\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAC05975.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.574\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAC05975.1: Taxonomy ID: 588\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.574\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAC05975.1: Mapped organism 'Providencia stuartii' with taxonomy ID 588.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.579\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAC05975.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.580\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAC05975.1: Mapped protein with name 'beta-lactamase TEM-60' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.591\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAC05975.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.592\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAC05975.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.593\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAC05975.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.593\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.598\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAC05975.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.609\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAC05975.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.609\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC05975.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.610\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record BCD58813.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.610\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord BCD58813.1: Taxonomy ID: 615\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.611\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord BCD58813.1: Mapped organism 'Serratia marcescens' with taxonomy ID 615.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.616\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord BCD58813.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.616\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord BCD58813.1: Mapped protein with name 'class A extended-spectrum beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.627\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord BCD58813.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.628\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein BCD58813.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.629\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord BCD58813.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.629\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.634\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord BCD58813.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.645\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein BCD58813.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.646\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BCD58813.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.646\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAK17194.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.647\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAK17194.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.647\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAK17194.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.654\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAK17194.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.654\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAK17194.1: Mapped protein with name 'extended spectrum beta-lactamase TEM-63' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.666\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAK17194.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.666\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAK17194.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.667\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAK17194.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.667\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.672\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAK17194.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.683\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAK17194.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.684\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK17194.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.684\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAD33116.2\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.685\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAD33116.2: Taxonomy ID: 584\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.685\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAD33116.2: Mapped organism 'Proteus mirabilis' with taxonomy ID 584.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.690\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAD33116.2: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.691\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAD33116.2: Mapped protein with name 'beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.702\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAD33116.2: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.702\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAD33116.2.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.703\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAD33116.2: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.703\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.709\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAD33116.2: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.719\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAD33116.2.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.720\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD33116.2 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.720\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAB92324.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.721\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAB92324.1: Taxonomy ID: 72407\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.721\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAB92324.1: Mapped organism 'Klebsiella pneumoniae subsp. pneumoniae' with taxonomy ID 72407.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.727\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAB92324.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.727\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAB92324.1: Mapped protein with name 'TEM-68 ES-beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.739\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAB92324.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.740\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAB92324.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.740\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAB92324.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.741\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.746\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAB92324.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.756\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAB92324.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.757\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAB92324.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.758\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAL03985.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.758\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAL03985.1: Taxonomy ID: 573\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.759\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAL03985.1: Mapped organism 'Klebsiella pneumoniae' with taxonomy ID 573.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.765\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAL03985.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.766\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAL03985.1: Mapped protein with name 'extended-spectrum beta-lactamase TEM-71' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.779\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAL03985.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.780\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAL03985.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.781\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAL03985.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.782\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.786\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAL03985.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.798\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAL03985.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.798\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL03985.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.799\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAF19151.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.800\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAF19151.1: Taxonomy ID: 582\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.800\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAF19151.1: Mapped organism 'Morganella morganii' with taxonomy ID 582.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.805\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAF19151.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.806\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAF19151.1: Mapped protein with name 'beta-lactamase TEM-72' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.817\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAF19151.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.819\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAF19151.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.819\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAF19151.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.820\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.825\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAF19151.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.837\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAF19151.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.838\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF19151.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.838\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAF05613.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.838\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAF05613.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.839\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAF05613.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.846\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAF05613.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.847\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAF05613.1: Mapped protein with name 'inhibitor-resistant beta-lactamase TEM-76' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.858\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAF05613.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.859\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAF05613.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.859\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAF05613.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.860\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.865\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAF05613.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.878\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAF05613.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.879\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05613.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.891\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record QDO66746.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.892\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord QDO66746.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.893\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord QDO66746.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.899\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord QDO66746.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.900\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord QDO66746.1: Mapped protein with name 'beta-lactamase TEM-39' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.913\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord QDO66746.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.914\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein QDO66746.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.915\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord QDO66746.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.915\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.921\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord QDO66746.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.932\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein QDO66746.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.933\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein QDO66746.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.933\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CBX53726.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.934\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CBX53726.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.934\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CBX53726.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.941\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CBX53726.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.942\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CBX53726.1: Mapped protein with name 'beta-lactamase TEM-40' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.954\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CBX53726.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.955\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CBX53726.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.955\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CBX53726.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.956\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.961\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CBX53726.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.972\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CBX53726.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.973\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CBX53726.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.974\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAC32889.2\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.974\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAC32889.2: Taxonomy ID: 573\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.974\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAC32889.2: Mapped organism 'Klebsiella pneumoniae' with taxonomy ID 573.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.981\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAC32889.2: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.982\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAC32889.2: Mapped protein with name 'beta-lactamase TEM-43' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.995\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAC32889.2: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.995\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAC32889.2.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.997\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAC32889.2: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:22.997\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.002\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAC32889.2: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.013\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAC32889.2.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.014\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32889.2 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.015\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAA64682.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.015\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAA64682.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.016\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAA64682.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.022\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAA64682.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.023\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAA64682.1: Mapped protein with name 'beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.035\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAA64682.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.036\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAA64682.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.037\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAA64682.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.037\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.042\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAA64682.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.054\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAA64682.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.055\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA64682.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.055\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAA71322.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.056\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAA71322.1: Taxonomy ID: 573\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.056\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAA71322.1: Mapped organism 'Klebsiella pneumoniae' with taxonomy ID 573.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.063\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAA71322.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.063\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAA71322.1: Mapped protein with name 'ES-beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.076\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAA71322.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.076\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAA71322.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.077\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAA71322.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.077\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.082\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAA71322.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.095\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAA71322.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.095\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71322.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.096\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAA71323.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.096\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAA71323.1: Taxonomy ID: 573\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.097\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAA71323.1: Mapped organism 'Klebsiella pneumoniae' with taxonomy ID 573.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.103\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAA71323.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.104\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAA71323.1: Mapped protein with name 'ES-beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.116\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAA71323.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.116\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAA71323.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.117\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAA71323.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.117\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.122\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAA71323.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.135\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAA71323.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.136\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71323.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.137\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAA71324.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.138\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAA71324.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.139\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAA71324.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.146\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAA71324.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.147\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAA71324.1: Mapped protein with name 'ES-beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.159\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAA71324.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.160\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAA71324.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.161\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAA71324.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.161\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.167\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAA71324.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.179\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAA71324.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.180\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71324.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.180\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AEC32455.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.181\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AEC32455.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.182\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AEC32455.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.189\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AEC32455.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.190\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AEC32455.1: Mapped protein with name 'beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.202\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AEC32455.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.203\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AEC32455.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.204\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AEC32455.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.205\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.211\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AEC32455.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.223\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AEC32455.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.224\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AEC32455.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.224\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAD22538.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.225\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAD22538.1: Taxonomy ID: 573\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.226\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAD22538.1: Mapped organism 'Klebsiella pneumoniae' with taxonomy ID 573.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.233\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAD22538.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.234\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAD22538.1: Mapped protein with name 'extended spectrum beta-lactamase TEM-53' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.246\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAD22538.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.247\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAD22538.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.247\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAD22538.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.248\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.253\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAD22538.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.263\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAD22538.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.264\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22538.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.265\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAD22539.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.266\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAD22539.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.266\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAD22539.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.273\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAD22539.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.273\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAD22539.1: Mapped protein with name 'inhibitor resistant beta lactamase TEM-54' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.286\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAD22539.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.287\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAD22539.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.288\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAD22539.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.288\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.293\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAD22539.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.304\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAD22539.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.305\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22539.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.309\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAF05614.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.310\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAF05614.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.310\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAF05614.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.317\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAF05614.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.318\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAF05614.1: Mapped protein with name 'inhibitor-resistant beta-lactamase TEM-77' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.329\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAF05614.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.330\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAF05614.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.330\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAF05614.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.331\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.335\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAF05614.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.346\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAF05614.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.347\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05614.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.348\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAF05612.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.348\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAF05612.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.349\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAF05612.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.356\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAF05612.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.356\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAF05612.1: Mapped protein with name 'inhibitor-resistant beta-lactamase TEM-78' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.368\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAF05612.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.369\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAF05612.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.370\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAF05612.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.370\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.375\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAF05612.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.388\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAF05612.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.388\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05612.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.389\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAF05611.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.389\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAF05611.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.390\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAF05611.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.396\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAF05611.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.397\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAF05611.1: Mapped protein with name 'inhibitor-resistant beta-lactamase TEM-79' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.408\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAF05611.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.409\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAF05611.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.410\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAF05611.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.410\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.415\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAF05611.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.426\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAF05611.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.427\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05611.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.428\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAM15527.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.428\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAM15527.1: Taxonomy ID: 550\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.429\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAM15527.1: Mapped organism 'Enterobacter cloacae' with taxonomy ID 550.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.434\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAM15527.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.435\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAM15527.1: Mapped protein with name 'inhibitor-resistant beta-lactamase TEM-80' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.449\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAM15527.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.449\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAM15527.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.450\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAM15527.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.451\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.456\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAM15527.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.467\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAM15527.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.468\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAM15527.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.469\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAL29433.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.470\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAL29433.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.471\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAL29433.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.477\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAL29433.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.478\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAL29433.1: Mapped protein with name 'inhibitor-resistant beta-lactamase TEM-81' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.490\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAL29433.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.491\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAL29433.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.491\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAL29433.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.492\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.496\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAL29433.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.507\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAL29433.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.508\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29433.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.509\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAL29434.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.509\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAL29434.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.510\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAL29434.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.516\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAL29434.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.517\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAL29434.1: Mapped protein with name 'inhibitor-resistant beta-lactamase TEM-82' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.529\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAL29434.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.530\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAL29434.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.531\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAL29434.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.531\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.536\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAL29434.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.547\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAL29434.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.547\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29434.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.548\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAL29435.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.549\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAL29435.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.549\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAL29435.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.556\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAL29435.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.556\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAL29435.1: Mapped protein with name 'inhibitor-resistant beta-lactamase TEM-83' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.567\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAL29435.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.568\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAL29435.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.569\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAL29435.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.569\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.574\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAL29435.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.586\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAL29435.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.586\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29435.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.587\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAL29436.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.588\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAL29436.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.588\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAL29436.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.594\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAL29436.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.595\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAL29436.1: Mapped protein with name 'inhibitor-resistant beta-lactamase TEM-84' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.606\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAL29436.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.607\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAL29436.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.607\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAL29436.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.608\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.613\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAL29436.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.623\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAL29436.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.624\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29436.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.625\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAC43229.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.625\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAC43229.1: Taxonomy ID: 573\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.626\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAC43229.1: Mapped organism 'Klebsiella pneumoniae' with taxonomy ID 573.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.632\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAC43229.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.633\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAC43229.1: Mapped protein with name 'TEM-85 ES-beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.644\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAC43229.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.645\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAC43229.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.645\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAC43229.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.646\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.651\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAC43229.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.663\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAC43229.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.663\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43229.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.664\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAC43230.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.664\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAC43230.1: Taxonomy ID: 573\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.665\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAC43230.1: Mapped organism 'Klebsiella pneumoniae' with taxonomy ID 573.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.671\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAC43230.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.672\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAC43230.1: Mapped protein with name 'TEM-86 ES-beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.683\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAC43230.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.684\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAC43230.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.684\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAC43230.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.685\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.689\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAC43230.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.699\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAC43230.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.700\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43230.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:23.701\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36m_fetch_response\u001b[0m:\u001b[36m121\u001b[0m - \u001b[34m\u001b[1mSending request to https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi with parameters: {'retmode': 'text', 'rettype': 'genbank', 'db': 'protein', 'id': 'AAG44570.1,AAK14792.1,AAK30619.1,BAB16308.1,AAF66653.1,CAC85660.1,CAC85661.1,CAC67290.1'}\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.377\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAG44570.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.377\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAG44570.1: Taxonomy ID: 584\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.378\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAG44570.1: Mapped organism 'Proteus mirabilis' with taxonomy ID 584.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.385\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAG44570.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.386\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAG44570.1: Mapped protein with name 'class A beta-lactamase TEM-87' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.398\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAG44570.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.399\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAG44570.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.399\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAG44570.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.400\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.405\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAG44570.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.416\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAG44570.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.417\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAG44570.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.417\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAK14792.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.418\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAK14792.1: Taxonomy ID: 573\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.418\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAK14792.1: Mapped organism 'Klebsiella pneumoniae' with taxonomy ID 573.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.425\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAK14792.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.425\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAK14792.1: Mapped protein with name 'extended-spectrum beta-lactamase TEM-88' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.437\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAK14792.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.437\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAK14792.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.438\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAK14792.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.439\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.443\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAK14792.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.455\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAK14792.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.456\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK14792.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.456\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAK30619.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.457\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAK30619.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.457\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAK30619.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.464\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAK30619.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.464\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAK30619.1: Mapped protein with name 'TEM-90 beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.477\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAK30619.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.479\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAK30619.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.479\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAK30619.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.480\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.486\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAK30619.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.497\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAK30619.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.498\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK30619.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.499\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record BAB16308.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.499\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord BAB16308.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.500\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord BAB16308.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.507\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord BAB16308.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.507\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord BAB16308.1: Mapped protein with name 'TEM-derived extended-spectrum beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.519\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord BAB16308.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.520\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein BAB16308.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.521\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord BAB16308.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.522\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.527\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord BAB16308.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.540\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein BAB16308.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.541\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BAB16308.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.542\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record AAF66653.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.543\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord AAF66653.1: Taxonomy ID: 584\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.543\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord AAF66653.1: Mapped organism 'Proteus mirabilis' with taxonomy ID 584.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.550\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord AAF66653.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.551\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord AAF66653.1: Mapped protein with name 'class A beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.564\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord AAF66653.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.565\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein AAF66653.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.566\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord AAF66653.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.566\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.571\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord AAF66653.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.583\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein AAF66653.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.584\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF66653.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.584\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAC85660.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.585\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAC85660.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.585\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAC85660.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.592\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAC85660.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.593\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAC85660.1: Mapped protein with name 'TEM-93 ES-beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.606\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAC85660.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.606\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAC85660.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.607\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAC85660.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.608\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.613\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAC85660.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.625\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAC85660.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.626\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85660.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.626\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAC85661.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.627\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAC85661.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.627\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAC85661.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.634\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAC85661.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.634\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAC85661.1: Mapped protein with name 'TEM-94 ES-beta-lactamase' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.646\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAC85661.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.647\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAC85661.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.647\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAC85661.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.648\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.653\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAC85661.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.663\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAC85661.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.664\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85661.1 in database\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.664\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m259\u001b[0m - \u001b[34m\u001b[1mProcessing NCBI protein record CAC67290.1\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.665\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m49\u001b[0m - \u001b[34m\u001b[1mRecord CAC67290.1: Taxonomy ID: 562\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.665\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_organism\u001b[0m:\u001b[36m77\u001b[0m - \u001b[34m\u001b[1mRecord CAC67290.1: Mapped organism 'Escherichia coli' with taxonomy ID 562.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.671\u001b[0m | \u001b[33m\u001b[1mWARNING \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m115\u001b[0m - \u001b[33m\u001b[1mRecord CAC67290.1: Molecular weight missing or invalid; setting to None.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.672\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_protein\u001b[0m:\u001b[36m128\u001b[0m - \u001b[34m\u001b[1mRecord CAC67290.1: Mapped protein with name 'beta lactamase TEM-95' to 'Protein' object.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.684\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_sites\u001b[0m:\u001b[36m149\u001b[0m - \u001b[34m\u001b[1mRecord CAC67290.1: Mapped 0 site(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.684\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_sites\u001b[0m:\u001b[36m160\u001b[0m - \u001b[34m\u001b[1mConnected 0 site(s) to protein CAC67290.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.685\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_cds\u001b[0m:\u001b[36m183\u001b[0m - \u001b[34m\u001b[1mRecord CAC67290.1: Processing CDS feature with qualifiers.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.685\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mget_cds_regions\u001b[0m:\u001b[36m211\u001b[0m - \u001b[34m\u001b[1mExtracted 1 CDS region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.690\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36mmap_regions\u001b[0m:\u001b[36m234\u001b[0m - \u001b[34m\u001b[1mRecord CAC67290.1: Mapped 1 region(s).\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.701\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_regions\u001b[0m:\u001b[36m247\u001b[0m - \u001b[34m\u001b[1mConnected 1 region(s) to protein CAC67290.1.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:24.702\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC67290.1 in database\u001b[0m\n" + "\u001b[32m2025-03-21 10:35:45.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mFound 2 sequences in the database.\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:45.248\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m89\u001b[0m - \u001b[1mFetching 68 sequences from ncbi_protein.\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:45.263\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mStarting requests for 7 batches.\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:46.789\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KJO56189.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:46.832\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLP91446.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:46.878\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46346.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:46.903\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA74912.2 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:46.928\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AFN21551.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:46.980\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACB22021.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.006\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76794.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.031\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76795.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.113\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CCG28759.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.138\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLG19745.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.174\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAP20891.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.199\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAJ85677.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.224\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein SAQ02853.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.249\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CDR98216.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.300\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109963600.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.325\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA41038.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.346\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109874025.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.370\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46344.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.396\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APG33178.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.470\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKC98298.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.501\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32891.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.525\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76796.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.549\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAD24670.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.573\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ARF45649.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.598\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CTA52364.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.623\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ADL13944.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.649\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AGQ50511.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.675\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKA60778.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.699\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APT65830.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:47.766\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein HAH6232254.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.031\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein QDO66746.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.056\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CBX53726.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.094\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32889.2 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.120\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA64682.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.146\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71322.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.173\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71323.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.198\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71324.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.224\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AEC32455.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.250\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22538.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.275\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22539.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.340\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ABB97007.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.365\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACJ43254.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.390\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC05975.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.415\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BCD58813.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.440\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK17194.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.464\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD33116.2 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.488\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAB92324.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.514\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL03985.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.540\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF19151.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.569\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05613.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.782\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05614.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.809\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05612.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.836\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05611.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.861\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAM15527.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.887\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29433.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.913\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29434.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.939\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29435.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.965\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29436.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:48.992\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43229.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:49.018\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43230.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:49.585\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAG44570.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:49.613\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK14792.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:49.639\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK30619.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:49.667\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BAB16308.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:49.694\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF66653.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:49.721\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85660.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:49.748\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85661.1 in database\u001b[0m\n", + "\u001b[32m2025-03-21 10:35:49.776\u001b[0m | \u001b[1mINFO \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC67290.1 in database\u001b[0m\n" ] } ], @@ -957,19 +214,14 @@ }, { "cell_type": "code", - "execution_count": 73, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "\u001b[32m2025-02-07 15:21:25.789\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mcalculate_sequence_embeddings\u001b[0m:\u001b[36m222\u001b[0m - \u001b[34m\u001b[1mCalculating embeddings for 68 sequences.\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:25.789\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mcalculate_sequence_embeddings\u001b[0m:\u001b[36m229\u001b[0m - \u001b[34m\u001b[1mProcessing batch 1/6\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:26.421\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mcalculate_sequence_embeddings\u001b[0m:\u001b[36m229\u001b[0m - \u001b[34m\u001b[1mProcessing batch 2/6\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:27.004\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mcalculate_sequence_embeddings\u001b[0m:\u001b[36m229\u001b[0m - \u001b[34m\u001b[1mProcessing batch 3/6\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:27.575\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mcalculate_sequence_embeddings\u001b[0m:\u001b[36m229\u001b[0m - \u001b[34m\u001b[1mProcessing batch 4/6\u001b[0m\n", - "\u001b[32m2025-02-07 15:21:28.145\u001b[0m | \u001b[34m\u001b[1mDEBUG \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mcalculate_sequence_embeddings\u001b[0m:\u001b[36m229\u001b[0m - \u001b[34m\u001b[1mProcessing batch 5/6\u001b[0m\n" + "Fetching 4 files: 100%|██████████| 4/4 [00:00<00:00, 284.23it/s]\n" ] } ], @@ -988,7 +240,7 @@ }, { "cell_type": "code", - "execution_count": 78, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -1000,7 +252,7 @@ }, { "data": { - "image/png": "", + "image/png": "", "text/plain": [ "
" ] @@ -1070,7 +322,7 @@ }, { "cell_type": "code", - "execution_count": 75, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -1078,7 +330,7 @@ "output_type": "stream", "text": [ "Resulst for index AAP20891.1 are:\n", - "[('AAP20891.1', 0.0), ('AGQ50511.1', 0.00016200621801287785), ('ABB97007.1', 0.0001810048295400879), ('AFN21551.1', 0.00018909362988450695), ('CAC67290.1', 0.00021654775310264718), ('ADL13944.1', 0.0002567003210336427), ('AAK30619.1', 0.0002616398020808264), ('AAL29433.1', 0.0002646931927183793), ('ACJ43254.1', 0.0002669990760338914), ('ACB22021.1', 0.0002755243601859636)]\n" + "[('AAP20891.1', 0.0), ('P62593', 0.0), ('AGQ50511.1', 0.00016200621801287785), ('ABB97007.1', 0.0001810048295400879), ('AFN21551.1', 0.00018909362988450695), ('CAC67290.1', 0.00021654775310264718), ('ADL13944.1', 0.0002567003210336427), ('AAK30619.1', 0.0002616398020808264), ('AAL29433.1', 0.0002646931927183793), ('ACJ43254.1', 0.0002669990760338914)]\n" ] } ], @@ -1122,7 +374,7 @@ }, { "cell_type": "code", - "execution_count": 76, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -1171,14 +423,41 @@ }, { "cell_type": "code", - "execution_count": 77, + "execution_count": 10, "metadata": {}, "outputs": [ + { + "data": { + "text/html": [ + "
/home/nab/anaconda3/envs/pyeed_niklas/lib/python3.10/site-packages/rich/live.py:231: UserWarning: install \n",
+       "\"ipywidgets\" for Jupyter support\n",
+       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
+       "
\n" + ], + "text/plain": [ + "/home/nab/anaconda3/envs/pyeed_niklas/lib/python3.10/site-packages/rich/live.py:231: UserWarning: install \n", + "\"ipywidgets\" for Jupyter support\n", + " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "[('AAP20891.1', 1.0), ('AGQ50511.1', 0.9999189376831055), ('ABB97007.1', 0.999909520149231), ('AFN21551.1', 0.9999054670333862), ('CAC67290.1', 0.9998918771743774), ('ADL13944.1', 0.9998717904090881), ('AAK30619.1', 0.9998692274093628), ('AAL29433.1', 0.9998676776885986), ('ACJ43254.1', 0.9998666048049927), ('CBX53726.1', 0.9998624920845032)]\n"
+      "[('AAP20891.1', 1.0), ('P62593', 1.0), ('AGQ50511.1', 0.9999189376831055), ('ABB97007.1', 0.999909520149231), ('AFN21551.1', 0.9999054670333862), ('CAC67290.1', 0.9998918771743774), ('ADL13944.1', 0.9998717904090881), ('AAK30619.1', 0.9998692274093628), ('AAL29433.1', 0.9998676776885986), ('ACJ43254.1', 0.9998666048049927)]\n"
      ]
     }
    ],
@@ -1223,7 +502,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "pyeed_niklas",
+   "display_name": "Python 3",
    "language": "python",
    "name": "python3"
   },
@@ -1237,7 +516,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.8"
+   "version": "3.10.16"
   }
  },
  "nbformat": 4,
diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py
index 82b5c6d7..0cb6770e 100644
--- a/src/pyeed/embedding.py
+++ b/src/pyeed/embedding.py
@@ -4,8 +4,9 @@
 
 import numpy as np
 import torch
+from esm.models.esm3 import ESM3
 from esm.models.esmc import ESMC
-from esm.sdk.api import ESMProtein, LogitsConfig
+from esm.sdk.api import ESM3InferenceClient, ESMProtein, LogitsConfig
 from huggingface_hub import HfFolder, login
 from numpy.typing import NDArray
 from transformers import EsmModel, EsmTokenizer
@@ -33,7 +34,7 @@ def get_hf_token() -> str:
 def load_model_and_tokenizer(
     model_name: str,
 ) -> Tuple[
-    Union[EsmModel, ESMC],  # Changed from ESM3InferenceClient to ESMC
+    Union[EsmModel, ESMC, ESM3InferenceClient],  # Added ESMC to the Union type
     Union[EsmTokenizer, None],
     torch.device,
 ]:
@@ -54,8 +55,12 @@ def load_model_and_tokenizer(
     # Check if this is an ESM-3 variant
     if "esmc" in model_name.lower():
         # Using ESMC from_pretrained
-        model = ESMC.from_pretrained(model_name)
+        model: Any = ESMC.from_pretrained(model_name)
         model = model.to(device)
+        return model, None, device
+    elif "esm3-sm-open-v1" in model_name.lower():
+        model: Any = ESM3.from_pretrained("esm3_sm_open_v1").to(device)
+
         return model, None, device
     else:
         # Otherwise, assume it's an ESM-2 model on Hugging Face
@@ -64,7 +69,7 @@ def load_model_and_tokenizer(
             if model_name.startswith("facebook/")
             else f"facebook/{model_name}"
         )
-        model = EsmModel.from_pretrained(full_model_name, use_auth_token=token)
+        model: Any = EsmModel.from_pretrained(full_model_name, use_auth_token=token)
         tokenizer = EsmTokenizer.from_pretrained(full_model_name, use_auth_token=token)
         model = model.to(device)
         return model, tokenizer, device
@@ -160,6 +165,74 @@ def calculate_single_sequence_embedding_all_layers(
     return get_single_embedding_all_layers(sequence, model, tokenizer, device)
 
 
+def calculate_single_sequence_embedding_first_layer(
+    sequence: str, model_name: str = "facebook/esm2_t33_650M_UR50D"
+) -> NDArray[np.float64]:
+    """
+    Calculates an embedding for a single sequence using the first layer.
+    """
+    model, tokenizer, device = load_model_and_tokenizer(model_name)
+    return get_single_embedding_first_layer(sequence, model, tokenizer, device)
+
+
+def get_single_embedding_first_layer(
+    sequence: str, model: Any, tokenizer: Any, device: torch.device
+) -> NDArray[np.float64]:
+    """
+    Generates normalized embeddings for each token in the sequence across all layers.
+    """
+    embeddings_list = []
+
+    with torch.no_grad():
+        if isinstance(model, ESMC):
+            # ESM-3 logic
+            from esm.sdk.api import ESMProtein, LogitsConfig
+
+            protein = ESMProtein(sequence=sequence)
+            protein_tensor = model.encode(protein)
+            logits_output = model.logits(
+                protein_tensor,
+                LogitsConfig(
+                    sequence=True,
+                    return_embeddings=True,
+                    return_hidden_states=True,
+                ),
+            )
+            if logits_output.hidden_states is None:
+                raise ValueError(
+                    "Model did not return hidden states. Check LogitsConfig settings."
+                )
+            embedding = (
+                logits_output.hidden_states[0][0].to(torch.float32).cpu().numpy()
+            )
+
+        elif isinstance(model, ESM3):
+            # ESM-3 logic
+            from esm.sdk.api import ESMProtein, SamplingConfig
+
+            protein = ESMProtein(sequence=sequence)
+            protein_tensor = model.encode(protein)
+            embedding = model.forward_and_sample(
+                protein_tensor,
+                SamplingConfig(return_per_residue_embeddings=True),
+            )
+            if embedding is None or embedding.per_residue_embedding is None:
+                raise ValueError("Model did not return embeddings")
+            embedding = embedding.per_residue_embedding.to(torch.float32).cpu().numpy()
+
+        else:
+            # ESM-2 logic
+            inputs = tokenizer(sequence, return_tensors="pt").to(device)
+            outputs = model(**inputs, output_hidden_states=True)
+            # Get the first layer's hidden states for all residues (excluding special tokens)
+            embedding = outputs.hidden_states[0][0, 1:-1, :].detach().cpu().numpy()
+
+    # Ensure embedding is a numpy array and normalize it
+    embedding = np.asarray(embedding, dtype=np.float64)
+    embedding = embedding / np.linalg.norm(embedding, axis=1, keepdims=True)
+    return embedding
+
+
 def get_single_embedding_last_hidden_state(
     sequence: str, model: Any, tokenizer: Any, device: torch.device
 ) -> NDArray[np.float64]:
@@ -200,10 +273,27 @@ def get_single_embedding_last_hidden_state(
             embedding = (
                 logits_output.hidden_states[-1][0].to(torch.float32).cpu().numpy()
             )
+        elif isinstance(model, ESM3):
+            # ESM-3 logic
+            from esm.sdk.api import ESMProtein, SamplingConfig
+
+            protein = ESMProtein(sequence=sequence)
+            sequence_encoding = model.encode(protein)
+
+            embedding = model.forward_and_sample(
+                sequence_encoding, SamplingConfig(return_per_residue_embeddings=True)
+            )
+
+            if embedding is None or embedding.per_residue_embedding is None:
+                raise ValueError("Model did not return embeddings")
+            embedding = embedding.per_residue_embedding.to(torch.float32).cpu().numpy()
+
         else:
             # ESM-2 logic
             inputs = tokenizer(sequence, return_tensors="pt").to(device)
-            outputs = model(**inputs)
+            outputs = model(**inputs, output_hidden_states=True, return_dict=True)
+            # Extract per-residue embeddings (excluding special tokens)
+            # [0] to get first batch, [1:-1] to remove start/end tokens
             embedding = outputs.last_hidden_state[0, 1:-1, :].detach().cpu().numpy()
 
     # normalize the embedding
@@ -260,6 +350,9 @@ def get_single_embedding_all_layers(
                 emb = emb / np.linalg.norm(emb, axis=1, keepdims=True)
                 embeddings_list.append(emb)
 
+        elif isinstance(model, ESM3):
+            raise NotImplementedError("ESM3 is not supported for all layers")
+
         else:
             # For ESM-2: Get hidden states with output_hidden_states=True
             inputs = tokenizer(sequence, return_tensors="pt").to(device)

From f157a46f0c0d297ed4f083171f0861ca235a6ee3 Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Wed, 26 Mar 2025 11:39:13 +0000
Subject: [PATCH 3/9] working on three GPUS

---
 docs/usage/embeddings_analysis.ipynb | 296 +++++++++++++++------------
 src/pyeed/embedding.py               | 114 +++++++----
 src/pyeed/main.py                    |  51 ++++-
 3 files changed, 284 insertions(+), 177 deletions(-)

diff --git a/docs/usage/embeddings_analysis.ipynb b/docs/usage/embeddings_analysis.ipynb
index e609174f..d831243c 100644
--- a/docs/usage/embeddings_analysis.ipynb
+++ b/docs/usage/embeddings_analysis.ipynb
@@ -18,21 +18,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 7,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/nab/anaconda3/envs/pyeed_niklas/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import sys\n",
-    "\n",
+    "import numpy as np\n",
     "from loguru import logger\n",
     "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
@@ -56,14 +47,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "📡 Connected to database.\n"
+      "📡 Connected to database.\n",
+      "All data has been wiped from the database.\n"
      ]
     }
    ],
@@ -72,12 +64,13 @@
     "user = \"neo4j\"\n",
     "password = \"12345678\"\n",
     "\n",
-    "eedb = Pyeed(uri, user=user, password=password)"
+    "eedb = Pyeed(uri, user=user, password=password)\n",
+    "eedb.db.wipe_database(date='2025-03-26')"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -94,7 +87,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -118,84 +111,84 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[32m2025-03-21 10:35:45.248\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mFound 2 sequences in the database.\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:45.248\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m89\u001b[0m - \u001b[1mFetching 68 sequences from ncbi_protein.\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:45.263\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mStarting requests for 7 batches.\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:46.789\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KJO56189.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:46.832\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLP91446.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:46.878\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46346.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:46.903\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA74912.2 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:46.928\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AFN21551.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:46.980\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACB22021.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.006\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76794.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.031\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76795.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.113\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CCG28759.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.138\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLG19745.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.174\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAP20891.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.199\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAJ85677.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.224\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein SAQ02853.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.249\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CDR98216.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.300\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109963600.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.325\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA41038.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.346\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109874025.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.370\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46344.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.396\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APG33178.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.470\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKC98298.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.501\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32891.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.525\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76796.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.549\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAD24670.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.573\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ARF45649.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.598\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CTA52364.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.623\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ADL13944.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.649\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AGQ50511.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.675\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKA60778.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.699\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APT65830.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:47.766\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein HAH6232254.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.031\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein QDO66746.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.056\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CBX53726.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.094\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32889.2 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.120\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA64682.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.146\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71322.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.173\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71323.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.198\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71324.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.224\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AEC32455.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.250\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22538.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.275\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22539.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.340\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ABB97007.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.365\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACJ43254.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.390\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC05975.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.415\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BCD58813.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.440\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK17194.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.464\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD33116.2 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.488\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAB92324.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.514\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL03985.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.540\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF19151.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.569\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05613.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.782\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05614.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.809\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05612.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.836\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05611.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.861\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAM15527.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.887\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29433.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.913\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29434.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.939\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29435.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.965\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29436.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:48.992\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43229.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:49.018\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43230.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:49.585\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAG44570.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:49.613\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK14792.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:49.639\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK30619.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:49.667\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BAB16308.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:49.694\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF66653.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:49.721\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85660.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:49.748\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85661.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-21 10:35:49.776\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC67290.1 in database\u001b[0m\n"
+      "\u001b[32m2025-03-26 11:33:06.267\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mFound 0 sequences in the database.\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:06.268\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m89\u001b[0m - \u001b[1mFetching 68 sequences from ncbi_protein.\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:06.282\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mStarting requests for 7 batches.\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.458\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KJO56189.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.484\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLP91446.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.507\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46346.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.530\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA74912.2 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.552\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AFN21551.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.576\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACB22021.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.600\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76794.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.624\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76795.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.647\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CCG28759.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.669\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLG19745.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.797\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAP20891.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.820\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAJ85677.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.842\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein SAQ02853.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.866\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CDR98216.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.913\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109963600.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.937\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA41038.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.956\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109874025.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:07.981\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46344.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.005\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APG33178.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.029\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKC98298.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.475\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32891.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.499\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76796.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.523\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAD24670.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.546\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ARF45649.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.570\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CTA52364.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.595\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ADL13944.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.619\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AGQ50511.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.643\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKA60778.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.668\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APT65830.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.684\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein HAH6232254.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.718\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein QDO66746.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.742\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CBX53726.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.767\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32889.2 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.791\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA64682.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.816\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71322.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.840\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71323.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.864\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71324.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.890\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AEC32455.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.914\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22538.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:08.937\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22539.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:09.646\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ABB97007.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:09.669\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACJ43254.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:09.695\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC05975.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:09.718\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BCD58813.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:09.743\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK17194.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:09.767\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD33116.2 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:09.790\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAB92324.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:09.815\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL03985.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:09.838\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF19151.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:09.863\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05613.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.058\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05614.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.083\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05612.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.107\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05611.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.129\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAM15527.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.152\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29433.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.176\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29434.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.200\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29435.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.224\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29436.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.248\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43229.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.273\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43230.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.366\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAG44570.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.390\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK14792.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.414\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK30619.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.438\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BAB16308.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.462\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF66653.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.486\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85660.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.509\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85661.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:33:10.534\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC67290.1 in database\u001b[0m\n"
      ]
     }
    ],
@@ -214,19 +207,54 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "Fetching 4 files: 100%|██████████| 4/4 [00:00<00:00, 284.23it/s]\n"
+      "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/modeling_utils.py:3437: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "ename": "OSError",
+     "evalue": "facebook/esm2_t48_3B_UR50D is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=`",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mHTTPError\u001b[0m                                 Traceback (most recent call last)",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/utils/_http.py:409\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m    408\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 409\u001b[0m     \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    410\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/requests/models.py:1024\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1023\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[0;32m-> 1024\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
+      "\u001b[0;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: https://huggingface.co/facebook/esm2_t48_3B_UR50D/resolve/main/config.json",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mRepositoryNotFoundError\u001b[0m                   Traceback (most recent call last)",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/utils/hub.py:403\u001b[0m, in \u001b[0;36mcached_file\u001b[0;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m    401\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    402\u001b[0m     \u001b[38;5;66;03m# Load from URL or cache if already cached\u001b[39;00m\n\u001b[0;32m--> 403\u001b[0m     resolved_file \u001b[38;5;241m=\u001b[39m \u001b[43mhf_hub_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    404\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpath_or_repo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    405\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    406\u001b[0m \u001b[43m        \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    407\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    408\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    409\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    410\u001b[0m \u001b[43m        \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    411\u001b[0m \u001b[43m        \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    412\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    413\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    414\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    415\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    416\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    417\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m GatedRepoError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:862\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[0;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, resume_download, force_filename, local_dir_use_symlinks)\u001b[0m\n\u001b[1;32m    861\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 862\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_hf_hub_download_to_cache_dir\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    863\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# Destination\u001b[39;49;00m\n\u001b[1;32m    864\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    865\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# File info\u001b[39;49;00m\n\u001b[1;32m    866\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    867\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    868\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    869\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    870\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# HTTP info\u001b[39;49;00m\n\u001b[1;32m    871\u001b[0m \u001b[43m        \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    872\u001b[0m \u001b[43m        \u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    873\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    874\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    875\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    876\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# Additional options\u001b[39;49;00m\n\u001b[1;32m    877\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    878\u001b[0m \u001b[43m        \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    879\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:969\u001b[0m, in \u001b[0;36m_hf_hub_download_to_cache_dir\u001b[0;34m(cache_dir, repo_id, filename, repo_type, revision, endpoint, etag_timeout, headers, proxies, token, local_files_only, force_download)\u001b[0m\n\u001b[1;32m    968\u001b[0m     \u001b[38;5;66;03m# Otherwise, raise appropriate error\u001b[39;00m\n\u001b[0;32m--> 969\u001b[0m     \u001b[43m_raise_on_head_call_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhead_call_error\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    971\u001b[0m \u001b[38;5;66;03m# From now on, etag, commit_hash, url and size are not None.\u001b[39;00m\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:1486\u001b[0m, in \u001b[0;36m_raise_on_head_call_error\u001b[0;34m(head_call_error, force_download, local_files_only)\u001b[0m\n\u001b[1;32m   1481\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(head_call_error, (RepositoryNotFoundError, GatedRepoError)) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m   1482\u001b[0m     \u001b[38;5;28misinstance\u001b[39m(head_call_error, HfHubHTTPError) \u001b[38;5;129;01mand\u001b[39;00m head_call_error\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m401\u001b[39m\n\u001b[1;32m   1483\u001b[0m ):\n\u001b[1;32m   1484\u001b[0m     \u001b[38;5;66;03m# Repo not found or gated => let's raise the actual error\u001b[39;00m\n\u001b[1;32m   1485\u001b[0m     \u001b[38;5;66;03m# Unauthorized => likely a token issue => let's raise the actual error\u001b[39;00m\n\u001b[0;32m-> 1486\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m head_call_error\n\u001b[1;32m   1487\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1488\u001b[0m     \u001b[38;5;66;03m# Otherwise: most likely a connection issue or Hub downtime => let's warn the user\u001b[39;00m\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:1376\u001b[0m, in \u001b[0;36m_get_metadata_or_catch_error\u001b[0;34m(repo_id, filename, repo_type, revision, endpoint, proxies, etag_timeout, headers, token, local_files_only, relative_filename, storage_folder)\u001b[0m\n\u001b[1;32m   1375\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1376\u001b[0m     metadata \u001b[38;5;241m=\u001b[39m \u001b[43mget_hf_file_metadata\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1377\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\n\u001b[1;32m   1378\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1379\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m http_error:\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:1296\u001b[0m, in \u001b[0;36mget_hf_file_metadata\u001b[0;34m(url, token, proxies, timeout, library_name, library_version, user_agent, headers)\u001b[0m\n\u001b[1;32m   1295\u001b[0m \u001b[38;5;66;03m# Retrieve metadata\u001b[39;00m\n\u001b[0;32m-> 1296\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1297\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mHEAD\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1298\u001b[0m \u001b[43m    \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1299\u001b[0m \u001b[43m    \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1300\u001b[0m \u001b[43m    \u001b[49m\u001b[43mallow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1301\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1302\u001b[0m \u001b[43m    \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1303\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1304\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1305\u001b[0m hf_raise_for_status(r)\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:280\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m    279\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m follow_relative_redirects:\n\u001b[0;32m--> 280\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    281\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    282\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    283\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    284\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    285\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    287\u001b[0m     \u001b[38;5;66;03m# If redirection, we redirect only relative paths.\u001b[39;00m\n\u001b[1;32m    288\u001b[0m     \u001b[38;5;66;03m# This is useful in case of a renamed repository.\u001b[39;00m\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:304\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m    303\u001b[0m response \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mrequest(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams)\n\u001b[0;32m--> 304\u001b[0m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    305\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/utils/_http.py:458\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m    450\u001b[0m     message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m    451\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    452\u001b[0m         \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    456\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m make sure you are authenticated.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    457\u001b[0m     )\n\u001b[0;32m--> 458\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m _format(RepositoryNotFoundError, message, response) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m    460\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m400\u001b[39m:\n",
+      "\u001b[0;31mRepositoryNotFoundError\u001b[0m: 404 Client Error. (Request ID: Root=1-67e3e5f6-44cf0d8d57a4fe053b72a484;c071d07d-bc72-4882-a792-4d6486057291)\n\nRepository Not Found for url: https://huggingface.co/facebook/esm2_t48_3B_UR50D/resolve/main/config.json.\nPlease make sure you specified the correct `repo_id` and `repo_type`.\nIf you are trying to access a private or gated repo, make sure you are authenticated.",
+      "\nThe above exception was the direct cause of the following exception:\n",
+      "\u001b[0;31mOSError\u001b[0m                                   Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43meedb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate_sequence_embeddings\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfacebook/esm2_t48_3B_UR50D\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/main.py:208\u001b[0m, in \u001b[0;36mPyeed.calculate_sequence_embeddings\u001b[0;34m(self, batch_size, model_name)\u001b[0m\n\u001b[1;32m    197\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    198\u001b[0m \u001b[38;5;124;03mCalculates embeddings for all sequences in the database that do not have embeddings, processing in batches.\u001b[39;00m\n\u001b[1;32m    199\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    204\u001b[0m \u001b[38;5;124;03m        Available models can be found at https://huggingface.co/facebook/esm2_t6_8M_UR50D.\u001b[39;00m\n\u001b[1;32m    205\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    207\u001b[0m \u001b[38;5;66;03m# Load the model, tokenizer, and device\u001b[39;00m\n\u001b[0;32m--> 208\u001b[0m model, tokenizer, device \u001b[38;5;241m=\u001b[39m \u001b[43mload_model_and_tokenizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    210\u001b[0m \u001b[38;5;66;03m# Cypher query to retrieve proteins without embeddings and with valid sequences\u001b[39;00m\n\u001b[1;32m    211\u001b[0m query \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m    212\u001b[0m \u001b[38;5;124mMATCH (p:Protein)\u001b[39m\n\u001b[1;32m    213\u001b[0m \u001b[38;5;124mWHERE p.embedding IS NULL AND p.sequence IS NOT NULL\u001b[39m\n\u001b[1;32m    214\u001b[0m \u001b[38;5;124mRETURN p.accession_id AS accession, p.sequence AS sequence\u001b[39m\n\u001b[1;32m    215\u001b[0m \u001b[38;5;124m\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n",
+      "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/embedding.py:72\u001b[0m, in \u001b[0;36mload_model_and_tokenizer\u001b[0;34m(model_name)\u001b[0m\n\u001b[1;32m     65\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     66\u001b[0m     \u001b[38;5;66;03m# Otherwise, assume it's an ESM-2 model on Hugging Face\u001b[39;00m\n\u001b[1;32m     67\u001b[0m     full_model_name \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m     68\u001b[0m         model_name\n\u001b[1;32m     69\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m model_name\u001b[38;5;241m.\u001b[39mstartswith(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfacebook/\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     70\u001b[0m         \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfacebook/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodel_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     71\u001b[0m     )\n\u001b[0;32m---> 72\u001b[0m     model: Any \u001b[38;5;241m=\u001b[39m \u001b[43mEsmModel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfull_model_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_auth_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     73\u001b[0m     tokenizer \u001b[38;5;241m=\u001b[39m EsmTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(full_model_name, use_auth_token\u001b[38;5;241m=\u001b[39mtoken)\n\u001b[1;32m     74\u001b[0m     model \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mto(device)\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/modeling_utils.py:3464\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m   3461\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m commit_hash \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   3462\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(config, PretrainedConfig):\n\u001b[1;32m   3463\u001b[0m         \u001b[38;5;66;03m# We make a call to the config file first (which may be absent) to get the commit hash as soon as possible\u001b[39;00m\n\u001b[0;32m-> 3464\u001b[0m         resolved_config_file \u001b[38;5;241m=\u001b[39m \u001b[43mcached_file\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   3465\u001b[0m \u001b[43m            \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3466\u001b[0m \u001b[43m            \u001b[49m\u001b[43mCONFIG_NAME\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3467\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3468\u001b[0m \u001b[43m            \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3469\u001b[0m \u001b[43m            \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3470\u001b[0m \u001b[43m            \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3471\u001b[0m \u001b[43m            \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3472\u001b[0m \u001b[43m            \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3473\u001b[0m \u001b[43m            \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3474\u001b[0m \u001b[43m            \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3475\u001b[0m \u001b[43m            \u001b[49m\u001b[43m_raise_exceptions_for_gated_repo\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   3476\u001b[0m \u001b[43m            \u001b[49m\u001b[43m_raise_exceptions_for_missing_entries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   3477\u001b[0m \u001b[43m            \u001b[49m\u001b[43m_raise_exceptions_for_connection_errors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   3478\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3479\u001b[0m         commit_hash \u001b[38;5;241m=\u001b[39m extract_commit_hash(resolved_config_file, commit_hash)\n\u001b[1;32m   3480\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/utils/hub.py:426\u001b[0m, in \u001b[0;36mcached_file\u001b[0;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m    421\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m    422\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou are trying to access a gated repo.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mMake sure to have access to it at \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    423\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(e)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    424\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m    425\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RepositoryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 426\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m    427\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not a local folder and is not a valid model identifier \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    428\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlisted on \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/models\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mIf this is a private repository, make sure to pass a token \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    429\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhaving permission to this repo either by logging in with `huggingface-cli login` or by passing \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    430\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`token=`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    431\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m    432\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RevisionNotFoundError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    433\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m    434\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrevision\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not a valid git identifier (branch name, tag name or commit id) that exists \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    435\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfor this model name. Check the model page at \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    436\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m for available revisions.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    437\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n",
+      "\u001b[0;31mOSError\u001b[0m: facebook/esm2_t48_3B_UR50D is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=`"
      ]
     }
    ],
    "source": [
-    "eedb.calculate_sequence_embeddings(model_name=\"esmc_300m\")"
+    "eedb.calculate_sequence_embeddings(model_name=\"facebook/esm2_t36_3B_UR50D\")"
    ]
   },
   {
@@ -242,6 +270,28 @@
    "cell_type": "code",
    "execution_count": 7,
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "(1152,)\n"
+     ]
+    }
+   ],
+   "source": [
+    "# get the dimensions of the embeddings get one protein and then get the dimensions of the embedding\n",
+    "# any random protein will do\n",
+    "query = \"MATCH (p:Protein) RETURN p.embedding LIMIT 1\"\n",
+    "result = eedb.db.execute_read(query)\n",
+    "dimensions = np.array(result[0]['p.embedding']).shape\n",
+    "print(dimensions)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -252,7 +302,7 @@
     },
     {
      "data": {
-      "image/png": "",
+      "image/png": "",
       "text/plain": [
        "
" ] @@ -322,7 +372,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -330,7 +380,7 @@ "output_type": "stream", "text": [ "Resulst for index AAP20891.1 are:\n", - "[('AAP20891.1', 0.0), ('P62593', 0.0), ('AGQ50511.1', 0.00016200621801287785), ('ABB97007.1', 0.0001810048295400879), ('AFN21551.1', 0.00018909362988450695), ('CAC67290.1', 0.00021654775310264718), ('ADL13944.1', 0.0002567003210336427), ('AAK30619.1', 0.0002616398020808264), ('AAL29433.1', 0.0002646931927183793), ('ACJ43254.1', 0.0002669990760338914)]\n" + "[('AAP20891.1', 0.0), ('ADL13944.1', 5.6168107638088216e-05), ('AFN21551.1', 6.611455559601964e-05), ('AAF05613.1', 0.00010320505315297712), ('CAC67290.1', 0.00012422009260193434), ('AAL29433.1', 0.00012499919288500028), ('ABB97007.1', 0.00012965265937237014), ('CAA74912.2', 0.00013823680560853813), ('CBX53726.1', 0.00015387097994867815), ('AGQ50511.1', 0.00015501224370340072)]\n" ] } ], @@ -374,7 +424,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -423,41 +473,29 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 11, "metadata": {}, "outputs": [ { - "data": { - "text/html": [ - "
/home/nab/anaconda3/envs/pyeed_niklas/lib/python3.10/site-packages/rich/live.py:231: UserWarning: install \n",
-       "\"ipywidgets\" for Jupyter support\n",
-       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
-       "
\n" - ], - "text/plain": [ - "/home/nab/anaconda3/envs/pyeed_niklas/lib/python3.10/site-packages/rich/live.py:231: UserWarning: install \n", - "\"ipywidgets\" for Jupyter support\n", - " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "data": { - "text/html": [ - "
\n"
-      ],
-      "text/plain": []
-     },
-     "metadata": {},
-     "output_type": "display_data"
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "[('AAP20891.1', 1.0), ('P62593', 1.0), ('AGQ50511.1', 0.9999189376831055), ('ABB97007.1', 0.999909520149231), ('AFN21551.1', 0.9999054670333862), ('CAC67290.1', 0.9998918771743774), ('ADL13944.1', 0.9998717904090881), ('AAK30619.1', 0.9998692274093628), ('AAL29433.1', 0.9998676776885986), ('ACJ43254.1', 0.9998666048049927)]\n"
+     "ename": "ClientError",
+     "evalue": "{code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `db.index.vector.queryNodes`: Caused by: java.lang.IllegalArgumentException: Index query vector has 1152 dimensions, but indexed vectors have 960.}",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mClientError\u001b[0m                               Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[11], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# here we use the vector index to find the closest matches\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43met\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfind_nearest_neighbors_based_on_vector_index\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdb\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43meedb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdb\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m    \u001b[49m\u001b[43mquery_protein_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdf\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprotein_id_database\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mtolist\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m    \u001b[49m\u001b[43mindex_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mvector_index_Protein_embedding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m    \u001b[49m\u001b[43mnumber_of_neighbors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m10\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[38;5;28mprint\u001b[39m(results)\n",
+      "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/analysis/embedding_analysis.py:415\u001b[0m, in \u001b[0;36mEmbeddingTool.find_nearest_neighbors_based_on_vector_index\u001b[0;34m(self, db, query_protein_id, index_name, number_of_neighbors)\u001b[0m\n\u001b[1;32m    406\u001b[0m     logger\u001b[38;5;241m.\u001b[39minfo(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIndex \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mindex_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is populated, finding nearest neighbors\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    408\u001b[0m query_find_nearest_neighbors \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m    409\u001b[0m \u001b[38;5;124mMATCH (source:Protein \u001b[39m\u001b[38;5;130;01m{{\u001b[39;00m\u001b[38;5;124maccession_id: \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquery_protein_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m}}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\n\u001b[1;32m    410\u001b[0m \u001b[38;5;124mWITH source.embedding AS embedding\u001b[39m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    413\u001b[0m \u001b[38;5;124mRETURN fprotein.accession_id, score\u001b[39m\n\u001b[1;32m    414\u001b[0m \u001b[38;5;124m\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[0;32m--> 415\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[43mdb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mquery_find_nearest_neighbors\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    416\u001b[0m neighbors: \u001b[38;5;28mlist\u001b[39m[\u001b[38;5;28mtuple\u001b[39m[\u001b[38;5;28mstr\u001b[39m, \u001b[38;5;28mfloat\u001b[39m]] \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m    417\u001b[0m     (\u001b[38;5;28mstr\u001b[39m(record[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfprotein.accession_id\u001b[39m\u001b[38;5;124m\"\u001b[39m]), \u001b[38;5;28mfloat\u001b[39m(record[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mscore\u001b[39m\u001b[38;5;124m\"\u001b[39m]))\n\u001b[1;32m    418\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m record \u001b[38;5;129;01min\u001b[39;00m results\n\u001b[1;32m    419\u001b[0m ]\n\u001b[1;32m    420\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m neighbors\n",
+      "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/dbconnect.py:45\u001b[0m, in \u001b[0;36mDatabaseConnector.execute_read\u001b[0;34m(self, query, parameters)\u001b[0m\n\u001b[1;32m     34\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     35\u001b[0m \u001b[38;5;124;03mExecutes a read (MATCH) query using the Neo4j driver.\u001b[39;00m\n\u001b[1;32m     36\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m     42\u001b[0m \u001b[38;5;124;03m    list[dict]: The result of the query as a list of dictionaries.\u001b[39;00m\n\u001b[1;32m     43\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m     44\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdriver\u001b[38;5;241m.\u001b[39msession() \u001b[38;5;28;01mas\u001b[39;00m session:\n\u001b[0;32m---> 45\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43msession\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mexecute_read\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mquery\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/work/session.py:661\u001b[0m, in \u001b[0;36mSession.execute_read\u001b[0;34m(self, transaction_function, *args, **kwargs)\u001b[0m\n\u001b[1;32m    592\u001b[0m \u001b[38;5;129m@NonConcurrentMethodChecker\u001b[39m\u001b[38;5;241m.\u001b[39mnon_concurrent_method\n\u001b[1;32m    593\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21mexecute_read\u001b[39m(\n\u001b[1;32m    594\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    598\u001b[0m     \u001b[38;5;241m*\u001b[39margs: _P\u001b[38;5;241m.\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs: _P\u001b[38;5;241m.\u001b[39mkwargs\n\u001b[1;32m    599\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m _R:\n\u001b[1;32m    600\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Execute a unit of work in a managed read transaction.\u001b[39;00m\n\u001b[1;32m    601\u001b[0m \n\u001b[1;32m    602\u001b[0m \u001b[38;5;124;03m    .. note::\u001b[39;00m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    659\u001b[0m \u001b[38;5;124;03m    .. versionadded:: 5.0\u001b[39;00m\n\u001b[1;32m    660\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 661\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_transaction\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    662\u001b[0m \u001b[43m        \u001b[49m\u001b[43mREAD_ACCESS\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mTelemetryAPI\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mTX_FUNC\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    663\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtransaction_function\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\n\u001b[1;32m    664\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/work/session.py:552\u001b[0m, in \u001b[0;36mSession._run_transaction\u001b[0;34m(self, access_mode, api, transaction_function, args, kwargs)\u001b[0m\n\u001b[1;32m    550\u001b[0m tx \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_transaction\n\u001b[1;32m    551\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 552\u001b[0m     result \u001b[38;5;241m=\u001b[39m \u001b[43mtransaction_function\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    553\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39mCancelledError:\n\u001b[1;32m    554\u001b[0m     \u001b[38;5;66;03m# if cancellation callback has not been called yet:\u001b[39;00m\n\u001b[1;32m    555\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_transaction \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
+      "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/dbconnect.py:222\u001b[0m, in \u001b[0;36mDatabaseConnector._run_query\u001b[0;34m(tx, query, parameters)\u001b[0m\n\u001b[1;32m    220\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Executes a Cypher query in the provided transaction.\"\"\"\u001b[39;00m\n\u001b[1;32m    221\u001b[0m result \u001b[38;5;241m=\u001b[39m tx\u001b[38;5;241m.\u001b[39mrun(query, parameters)\n\u001b[0;32m--> 222\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [record\u001b[38;5;241m.\u001b[39mdata() \u001b[38;5;28;01mfor\u001b[39;00m record \u001b[38;5;129;01min\u001b[39;00m result]\n",
+      "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/dbconnect.py:222\u001b[0m, in \u001b[0;36m\u001b[0;34m(.0)\u001b[0m\n\u001b[1;32m    220\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Executes a Cypher query in the provided transaction.\"\"\"\u001b[39;00m\n\u001b[1;32m    221\u001b[0m result \u001b[38;5;241m=\u001b[39m tx\u001b[38;5;241m.\u001b[39mrun(query, parameters)\n\u001b[0;32m--> 222\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m [record\u001b[38;5;241m.\u001b[39mdata() \u001b[38;5;28;01mfor\u001b[39;00m record \u001b[38;5;129;01min\u001b[39;00m result]\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/work/result.py:270\u001b[0m, in \u001b[0;36mResult.__iter__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    268\u001b[0m     \u001b[38;5;28;01myield\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_record_buffer\u001b[38;5;241m.\u001b[39mpopleft()\n\u001b[1;32m    269\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_streaming:\n\u001b[0;32m--> 270\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_connection\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfetch_message\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    271\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_discarding:\n\u001b[1;32m    272\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_discard()\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/io/_common.py:178\u001b[0m, in \u001b[0;36mConnectionErrorHandler.__getattr__..outer..inner\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    176\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21minner\u001b[39m(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m    177\u001b[0m     \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 178\u001b[0m         \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    179\u001b[0m     \u001b[38;5;28;01mexcept\u001b[39;00m (Neo4jError, ServiceUnavailable, SessionExpired) \u001b[38;5;28;01mas\u001b[39;00m exc:\n\u001b[1;32m    180\u001b[0m         \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m asyncio\u001b[38;5;241m.\u001b[39miscoroutinefunction(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m__on_error)\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/io/_bolt.py:850\u001b[0m, in \u001b[0;36mBolt.fetch_message\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m    846\u001b[0m \u001b[38;5;66;03m# Receive exactly one message\u001b[39;00m\n\u001b[1;32m    847\u001b[0m tag, fields \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minbox\u001b[38;5;241m.\u001b[39mpop(\n\u001b[1;32m    848\u001b[0m     hydration_hooks\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresponses[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mhydration_hooks\n\u001b[1;32m    849\u001b[0m )\n\u001b[0;32m--> 850\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_process_message\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtag\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfields\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    851\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39midle_since \u001b[38;5;241m=\u001b[39m monotonic()\n\u001b[1;32m    852\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/io/_bolt5.py:369\u001b[0m, in \u001b[0;36mBolt5x0._process_message\u001b[0;34m(self, tag, fields)\u001b[0m\n\u001b[1;32m    367\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_server_state_manager\u001b[38;5;241m.\u001b[39mstate \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbolt_states\u001b[38;5;241m.\u001b[39mFAILED\n\u001b[1;32m    368\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 369\u001b[0m     \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mon_failure\u001b[49m\u001b[43m(\u001b[49m\u001b[43msummary_metadata\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    370\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ServiceUnavailable, DatabaseUnavailable):\n\u001b[1;32m    371\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpool:\n",
+      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/io/_common.py:245\u001b[0m, in \u001b[0;36mResponse.on_failure\u001b[0;34m(self, metadata)\u001b[0m\n\u001b[1;32m    243\u001b[0m handler \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandlers\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mon_summary\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m    244\u001b[0m Util\u001b[38;5;241m.\u001b[39mcallback(handler)\n\u001b[0;32m--> 245\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Neo4jError\u001b[38;5;241m.\u001b[39mhydrate(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmetadata)\n",
+      "\u001b[0;31mClientError\u001b[0m: {code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `db.index.vector.queryNodes`: Caused by: java.lang.IllegalArgumentException: Index query vector has 1152 dimensions, but indexed vectors have 960.}"
      ]
     }
    ],
@@ -502,7 +540,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "pyeed_niklas_env",
    "language": "python",
    "name": "python3"
   },
diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py
index 0cb6770e..fc1cb79a 100644
--- a/src/pyeed/embedding.py
+++ b/src/pyeed/embedding.py
@@ -1,4 +1,5 @@
 import gc
+import logging
 import os
 from typing import Any, Tuple, Union
 
@@ -6,13 +7,15 @@
 import torch
 from esm.models.esm3 import ESM3
 from esm.models.esmc import ESMC
-from esm.sdk.api import ESM3InferenceClient, ESMProtein, LogitsConfig
+from esm.sdk.api import ESM3InferenceClient, ESMProtein, LogitsConfig, SamplingConfig
 from huggingface_hub import HfFolder, login
 from numpy.typing import NDArray
 from transformers import EsmModel, EsmTokenizer
 
 from pyeed.dbconnect import DatabaseConnector
 
+logger = logging.getLogger(__name__)
+
 
 def get_hf_token() -> str:
     """Get or request Hugging Face token."""
@@ -34,36 +37,37 @@ def get_hf_token() -> str:
 def load_model_and_tokenizer(
     model_name: str,
 ) -> Tuple[
-    Union[EsmModel, ESMC, ESM3InferenceClient],  # Added ESMC to the Union type
+    Union[
+        EsmModel,
+        ESMC,
+        ESM3InferenceClient,
+        torch.nn.DataParallel,
+        ESM3,
+    ],  # Updated return type
     Union[EsmTokenizer, None],
     torch.device,
 ]:
     """
-    Loads either an ESM-3 (using ESMC) or an ESM-2 (using Transformers) model,
-    depending on the `model_name` provided.
+    Loads either an ESM++, ESM-3 (using ESMC or ESM3) or an ESM-2 (using Transformers) model,
+    depending on the `model_name` provided. Uses multiple GPUs in parallel if available.
 
     Args:
-        model_name (str): The model name or identifier (e.g., 'esmc' or 'esm2_t12_35M_UR50D').
+        model_name (str): The model name or identifier.
 
     Returns:
         Tuple of (model, tokenizer, device)
     """
-    # Get token only when loading model
     token = get_hf_token()
+    # Default device is the first CUDA device if available, else CPU.
     device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+    tokenizer = None
 
-    # Check if this is an ESM-3 variant
     if "esmc" in model_name.lower():
-        # Using ESMC from_pretrained
         model: Any = ESMC.from_pretrained(model_name)
         model = model.to(device)
-        return model, None, device
     elif "esm3-sm-open-v1" in model_name.lower():
         model: Any = ESM3.from_pretrained("esm3_sm_open_v1").to(device)
-
-        return model, None, device
     else:
-        # Otherwise, assume it's an ESM-2 model on Hugging Face
         full_model_name = (
             model_name
             if model_name.startswith("facebook/")
@@ -72,40 +76,56 @@ def load_model_and_tokenizer(
         model: Any = EsmModel.from_pretrained(full_model_name, use_auth_token=token)
         tokenizer = EsmTokenizer.from_pretrained(full_model_name, use_auth_token=token)
         model = model.to(device)
-        return model, tokenizer, device
+
+    # Check if multiple GPUs are available and wrap the model accordingly
+    if torch.cuda.device_count() > 1 and device.type == "cuda":
+        logger.info(f"Using {torch.cuda.device_count()} GPUs for parallel inference.")
+        model = torch.nn.DataParallel(model)
+
+    return model, tokenizer, device
 
 
 def get_batch_embeddings(
     batch_sequences: list[str],
-    model: Union[EsmModel, ESMC],
+    model: Union[
+        EsmModel,
+        ESMC,
+        torch.nn.DataParallel,
+        ESM3InferenceClient,
+        ESM3,
+    ],
     tokenizer_or_alphabet: Union[EsmTokenizer, None],
     device: torch.device,
     pool_embeddings: bool = True,
 ) -> list[NDArray[np.float64]]:
     """
     Generates mean-pooled embeddings for a batch of sequences.
+    Supports ESM++, ESM-2 and ESM-3 models.
 
     Args:
-        batch_sequences (list[str]): List of sequence strings to be embedded.
-        model (Union[EsmModel, ESMC]): Loaded model (ESM-2 or ESM-3).
-        tokenizer_or_alphabet (Union[EsmTokenizer, None]): Tokenizer if ESM-2, None if ESM-3.
-        device (torch.device): Device on which to run inference (CPU or GPU).
-        pool_embeddings (bool): Whether to pool embeddings across sequence length.
+        batch_sequences (list[str]): List of sequence strings.
+        model: Loaded model (could be wrapped in DataParallel).
+        tokenizer_or_alphabet: Tokenizer if needed.
+        device: Inference device (CPU/GPU).
+        pool_embeddings (bool): Whether to average embeddings across the sequence length.
 
     Returns:
-        list[NDArray[np.float64]]: A list of embeddings as NumPy arrays.
+        List of embeddings as NumPy arrays.
     """
-    if isinstance(model, ESMC):
+    # First, determine the base model type
+    base_model = model.module if isinstance(model, torch.nn.DataParallel) else model
+
+    if isinstance(base_model, ESMC):
+        # For ESMC models
+        embedding_list = []
         with torch.no_grad():
-            embedding_list = []
             for sequence in batch_sequences:
-                # Process each sequence individually
                 protein = ESMProtein(sequence=sequence)
-                protein_tensor = model.encode(protein)
-                logits_output = model.logits(
+                # Use the model directly - DataParallel handles internal distribution
+                protein_tensor = base_model.encode(protein)
+                logits_output = base_model.logits(
                     protein_tensor, LogitsConfig(sequence=True, return_embeddings=True)
                 )
-                # Convert embeddings to numpy array - ensure embeddings is not None
                 if logits_output.embeddings is None:
                     raise ValueError(
                         "Model did not return embeddings. Check LogitsConfig settings."
@@ -114,9 +134,27 @@ def get_batch_embeddings(
                 if pool_embeddings:
                     embeddings = embeddings.mean(axis=1)
                 embedding_list.append(embeddings[0])
-
         return embedding_list
-
+    elif isinstance(base_model, ESM3):
+        # For ESM3 models
+        embedding_list = []
+        with torch.no_grad():
+            for sequence in batch_sequences:
+                protein = ESMProtein(sequence=sequence)
+                sequence_encoding = base_model.encode(protein)
+                result = base_model.forward_and_sample(
+                    sequence_encoding,
+                    SamplingConfig(return_per_residue_embeddings=True),
+                )
+                if result is None or result.per_residue_embedding is None:
+                    raise ValueError("Model did not return embeddings")
+                embeddings = (
+                    result.per_residue_embedding.to(torch.float32).cpu().numpy()
+                )
+                if pool_embeddings:
+                    embeddings = embeddings.mean(axis=0)
+                embedding_list.append(embeddings)
+        return embedding_list
     else:
         # ESM-2 logic
         assert tokenizer_or_alphabet is not None, "Tokenizer required for ESM-2 models"
@@ -124,11 +162,15 @@ def get_batch_embeddings(
             batch_sequences, padding=True, truncation=True, return_tensors="pt"
         ).to(device)
         with torch.no_grad():
-            outputs = model(**inputs)
-        embeddings = outputs.last_hidden_state.cpu().numpy()
+            outputs = model(**inputs, output_hidden_states=True)
+
+        # Get last hidden state for each sequence
+        hidden_states = outputs.last_hidden_state.cpu().numpy()
+
         if pool_embeddings:
-            return [embedding.mean(axis=0) for embedding in embeddings]
-        return list(embeddings)
+            # Mean pooling across sequence length
+            return [embedding.mean(axis=0) for embedding in hidden_states]
+        return list(hidden_states)
 
 
 def calculate_single_sequence_embedding_last_hidden_state(
@@ -259,12 +301,9 @@ def get_single_embedding_last_hidden_state(
             logits_output = model.logits(
                 protein_tensor,
                 LogitsConfig(
-                    sequence=True,
-                    return_embeddings=True,
-                    return_hidden_states=True,
+                    sequence=True, return_embeddings=True, return_hidden_states=True
                 ),
             )
-            # Ensure hidden_states is not None before accessing it
             if logits_output.hidden_states is None:
                 raise ValueError(
                     "Model did not return hidden states. Check LogitsConfig settings."
@@ -287,13 +326,10 @@ def get_single_embedding_last_hidden_state(
             if embedding is None or embedding.per_residue_embedding is None:
                 raise ValueError("Model did not return embeddings")
             embedding = embedding.per_residue_embedding.to(torch.float32).cpu().numpy()
-
         else:
             # ESM-2 logic
             inputs = tokenizer(sequence, return_tensors="pt").to(device)
             outputs = model(**inputs, output_hidden_states=True, return_dict=True)
-            # Extract per-residue embeddings (excluding special tokens)
-            # [0] to get first batch, [1:-1] to remove start/end tokens
             embedding = outputs.last_hidden_state[0, 1:-1, :].detach().cpu().numpy()
 
     # normalize the embedding
diff --git a/src/pyeed/main.py b/src/pyeed/main.py
index d4a520b9..18c83fc6 100644
--- a/src/pyeed/main.py
+++ b/src/pyeed/main.py
@@ -347,9 +347,9 @@ def fetch_dna_entries_for_proteins(self, ids: list[str] | None = None) -> None:
             try:
                 batch_ids = nucleotide_ids[i : i + BATCH_SIZE]
                 self.fetch_ncbi_nucleotide(batch_ids)
-                logger.info(f"Successfully fetched batch {i//BATCH_SIZE + 1}")
+                logger.info(f"Successfully fetched batch {i // BATCH_SIZE + 1}")
             except Exception as e:
-                logger.error(f"Error fetching batch {i//BATCH_SIZE + 1}: {str(e)}")
+                logger.error(f"Error fetching batch {i // BATCH_SIZE + 1}: {str(e)}")
                 continue
 
         # Process protein-DNA relationships in batches
@@ -419,11 +419,11 @@ def fetch_dna_entries_for_proteins(self, ids: list[str] | None = None) -> None:
                         batch_create_query, {"relationships": new_relationships}
                     )
                     logger.info(
-                        f"Successfully processed relationship batch {i//BATCH_SIZE + 1}"
+                        f"Successfully processed relationship batch {i // BATCH_SIZE + 1}"
                     )
             except Exception as e:
                 logger.error(
-                    f"Error processing relationship batch {i//BATCH_SIZE + 1}: {str(e)}"
+                    f"Error processing relationship batch {i // BATCH_SIZE + 1}: {str(e)}"
                 )
                 continue
 
@@ -434,12 +434,45 @@ def create_coding_sequences_regions(self) -> None:
         It finds the nucleotide start and end positions and create a Region object for the corresponding DNA sequence.
         Create the region object with the right annotation. And then connect it to the DNA sequence.
         """
+
+        # in case of multiple DNA entires per Protein we need to create a Region for each DNA entry
+        # some of the DNA entries might even not have start and end vlaues on the ENCODES endge, in this case please take the entire sequence length
+        """
+        This Cypher query creates coding sequence regions for DNA sequences that don't already have them.
+        
+        The query:
+        1. Finds all Protein-DNA pairs connected by an ENCODES relationship
+        2. Filters for cases where the DNA doesn't already have a coding sequence Region for that protein
+        3. Creates a new Region node with 'coding sequence' annotation and the protein's ID
+        4. Creates a HAS_REGION relationship from the DNA to the new Region
+        5. Sets the start position to either:
+           - The start value from the ENCODES relationship if it exists
+           - 0 (beginning of sequence) if no start value is specified
+        6. Sets the end position to either:
+           - The end value from the ENCODES relationship if it exists
+           - The full DNA sequence length minus 1 if no end value is specified
+        """
         query = """
-        MATCH (p:Protein)
-        WHERE p.nucleotide_id IS NOT NULL
+        MATCH (d:DNA)-[rel_encode:ENCODES]->(p:Protein)
+        WHERE NOT EXISTS((d)-[:HAS_REGION]->(:Region {annotation: 'coding sequence', sequence_id: p.accession_id}))
         CREATE (r:Region {annotation: 'coding sequence', sequence_id: p.accession_id})
-        WITH p, r
-        MATCH (d:DNA {accession_id: p.nucleotide_id})
-        CREATE (d)-[:HAS_REGION {start: p.nucleotide_start, end: p.nucleotide_end}]->(r)
+        CREATE (d)-[rel:HAS_REGION {
+            start: CASE 
+                WHEN rel_encode.start IS NOT NULL THEN rel_encode.start 
+                ELSE 0 
+            END, 
+            end: CASE 
+                WHEN rel_encode.end IS NOT NULL THEN rel_encode.end 
+                ELSE size(d.sequence) - 1 
+            END
+        }]->(r)
         """
         self.db.execute_write(query)
+
+        # Log the number of regions created
+        count_query = """
+        MATCH (d:DNA)-[:HAS_REGION]->(r:Region {annotation: 'coding sequence'})
+        RETURN count(r) as region_count
+        """
+        result = self.db.execute_read(count_query)
+        logger.info(f"Created {result[0]['region_count']} coding sequence regions")

From 5c87e130478db5c77fb9aa68aadd1b7ea270947e Mon Sep 17 00:00:00 2001
From: Niklas Abraham GPU 
Date: Wed, 26 Mar 2025 16:58:54 +0000
Subject: [PATCH 4/9] fixed standard error

---
 docs/usage/embeddings_analysis.ipynb     | 217 +++++++++++------------
 docs/usage/mutation_analysis.ipynb       |  32 +++-
 src/pyeed/analysis/sequence_alignment.py |  20 ++-
 src/pyeed/analysis/standard_numbering.py |   5 +-
 4 files changed, 139 insertions(+), 135 deletions(-)

diff --git a/docs/usage/embeddings_analysis.ipynb b/docs/usage/embeddings_analysis.ipynb
index d831243c..b7fa1f61 100644
--- a/docs/usage/embeddings_analysis.ipynb
+++ b/docs/usage/embeddings_analysis.ipynb
@@ -18,9 +18,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
    "source": [
     "import sys\n",
     "import numpy as np\n",
@@ -47,7 +56,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -70,7 +79,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -87,7 +96,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
@@ -111,84 +120,84 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "\u001b[32m2025-03-26 11:33:06.267\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mFound 0 sequences in the database.\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:06.268\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m89\u001b[0m - \u001b[1mFetching 68 sequences from ncbi_protein.\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:06.282\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mStarting requests for 7 batches.\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.458\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KJO56189.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.484\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLP91446.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.507\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46346.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.530\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA74912.2 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.552\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AFN21551.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.576\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACB22021.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.600\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76794.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.624\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76795.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.647\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CCG28759.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.669\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLG19745.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.797\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAP20891.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.820\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAJ85677.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.842\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein SAQ02853.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.866\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CDR98216.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.913\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109963600.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.937\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA41038.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.956\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109874025.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:07.981\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46344.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.005\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APG33178.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.029\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKC98298.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.475\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32891.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.499\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76796.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.523\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAD24670.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.546\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ARF45649.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.570\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CTA52364.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.595\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ADL13944.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.619\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AGQ50511.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.643\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKA60778.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.668\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APT65830.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.684\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein HAH6232254.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.718\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein QDO66746.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.742\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CBX53726.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.767\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32889.2 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.791\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA64682.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.816\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71322.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.840\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71323.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.864\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71324.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.890\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AEC32455.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.914\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22538.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:08.937\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22539.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:09.646\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ABB97007.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:09.669\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACJ43254.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:09.695\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC05975.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:09.718\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BCD58813.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:09.743\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK17194.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:09.767\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD33116.2 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:09.790\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAB92324.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:09.815\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL03985.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:09.838\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF19151.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:09.863\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05613.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.058\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05614.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.083\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05612.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.107\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05611.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.129\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAM15527.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.152\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29433.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.176\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29434.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.200\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29435.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.224\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29436.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.248\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43229.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.273\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43230.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.366\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAG44570.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.390\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK14792.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.414\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK30619.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.438\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BAB16308.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.462\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF66653.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.486\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85660.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.509\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85661.1 in database\u001b[0m\n",
-      "\u001b[32m2025-03-26 11:33:10.534\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC67290.1 in database\u001b[0m\n"
+      "\u001b[32m2025-03-26 11:37:31.838\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m87\u001b[0m - \u001b[1mFound 0 sequences in the database.\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:31.839\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.main\u001b[0m:\u001b[36mfetch_from_primary_db\u001b[0m:\u001b[36m89\u001b[0m - \u001b[1mFetching 68 sequences from ncbi_protein.\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:31.880\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.primary_db_adapter\u001b[0m:\u001b[36mexecute_requests\u001b[0m:\u001b[36m140\u001b[0m - \u001b[1mStarting requests for 7 batches.\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:32.848\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAP20891.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:32.891\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAJ85677.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:32.937\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein SAQ02853.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:32.957\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CDR98216.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.001\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109963600.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.050\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA41038.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.068\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein WP_109874025.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.087\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46344.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.107\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APG33178.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.159\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKC98298.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.212\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KJO56189.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.238\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLP91446.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.263\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA46346.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.287\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA74912.2 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.311\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AFN21551.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.334\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACB22021.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.362\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76794.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.385\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76795.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.440\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CCG28759.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.464\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein KLG19745.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:33.980\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32891.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.008\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA76796.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.032\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAD24670.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.055\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ARF45649.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.079\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CTA52364.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.102\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ADL13944.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.127\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AGQ50511.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.152\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AKA60778.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.177\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein APT65830.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.229\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein HAH6232254.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.263\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein QDO66746.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.288\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CBX53726.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.312\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC32889.2 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.337\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA64682.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.361\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71322.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.386\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71323.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.409\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAA71324.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.433\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AEC32455.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.456\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22538.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.479\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD22539.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:34.997\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ABB97007.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.021\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein ACJ43254.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.046\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAC05975.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.069\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BCD58813.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.093\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK17194.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.126\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAD33116.2 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.150\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAB92324.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.175\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL03985.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.200\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF19151.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.224\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05613.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.257\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05614.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.282\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05612.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.307\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF05611.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.330\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAM15527.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.354\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29433.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.378\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29434.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.403\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29435.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.427\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAL29436.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.451\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43229.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.475\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC43230.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.893\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAG44570.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.911\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK14792.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.928\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAK30619.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.946\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein BAB16308.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.964\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein AAF66653.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:35.983\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85660.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:36.004\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC85661.1 in database\u001b[0m\n",
+      "\u001b[32m2025-03-26 11:37:36.025\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mpyeed.adapter.ncbi_protein_mapper\u001b[0m:\u001b[36madd_to_db\u001b[0m:\u001b[36m301\u001b[0m - \u001b[1mAdded/updated NCBI protein CAC67290.1 in database\u001b[0m\n"
      ]
     }
    ],
@@ -207,7 +216,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -215,41 +224,13 @@
      "output_type": "stream",
      "text": [
       "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/modeling_utils.py:3437: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "ename": "OSError",
-     "evalue": "facebook/esm2_t48_3B_UR50D is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=`",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mHTTPError\u001b[0m                                 Traceback (most recent call last)",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/utils/_http.py:409\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m    408\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 409\u001b[0m     \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mraise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    410\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m HTTPError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/requests/models.py:1024\u001b[0m, in \u001b[0;36mResponse.raise_for_status\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m   1023\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m http_error_msg:\n\u001b[0;32m-> 1024\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m HTTPError(http_error_msg, response\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m)\n",
-      "\u001b[0;31mHTTPError\u001b[0m: 404 Client Error: Not Found for url: https://huggingface.co/facebook/esm2_t48_3B_UR50D/resolve/main/config.json",
-      "\nThe above exception was the direct cause of the following exception:\n",
-      "\u001b[0;31mRepositoryNotFoundError\u001b[0m                   Traceback (most recent call last)",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/utils/hub.py:403\u001b[0m, in \u001b[0;36mcached_file\u001b[0;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m    401\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m    402\u001b[0m     \u001b[38;5;66;03m# Load from URL or cache if already cached\u001b[39;00m\n\u001b[0;32m--> 403\u001b[0m     resolved_file \u001b[38;5;241m=\u001b[39m \u001b[43mhf_hub_download\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    404\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpath_or_repo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    405\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    406\u001b[0m \u001b[43m        \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mif\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m==\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01melse\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    407\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    408\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    409\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    410\u001b[0m \u001b[43m        \u001b[49m\u001b[43muser_agent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43muser_agent\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    411\u001b[0m \u001b[43m        \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    412\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    413\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    414\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    415\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    416\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    417\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m GatedRepoError \u001b[38;5;28;01mas\u001b[39;00m e:\n",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:862\u001b[0m, in \u001b[0;36mhf_hub_download\u001b[0;34m(repo_id, filename, subfolder, repo_type, revision, library_name, library_version, cache_dir, local_dir, user_agent, force_download, proxies, etag_timeout, token, local_files_only, headers, endpoint, resume_download, force_filename, local_dir_use_symlinks)\u001b[0m\n\u001b[1;32m    861\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 862\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_hf_hub_download_to_cache_dir\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    863\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# Destination\u001b[39;49;00m\n\u001b[1;32m    864\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    865\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# File info\u001b[39;49;00m\n\u001b[1;32m    866\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrepo_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    867\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfilename\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    868\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrepo_type\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrepo_type\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    869\u001b[0m \u001b[43m        \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    870\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# HTTP info\u001b[39;49;00m\n\u001b[1;32m    871\u001b[0m \u001b[43m        \u001b[49m\u001b[43mendpoint\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mendpoint\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    872\u001b[0m \u001b[43m        \u001b[49m\u001b[43metag_timeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    873\u001b[0m \u001b[43m        \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    874\u001b[0m \u001b[43m        \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    875\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    876\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;66;43;03m# Additional options\u001b[39;49;00m\n\u001b[1;32m    877\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    878\u001b[0m \u001b[43m        \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    879\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:969\u001b[0m, in \u001b[0;36m_hf_hub_download_to_cache_dir\u001b[0;34m(cache_dir, repo_id, filename, repo_type, revision, endpoint, etag_timeout, headers, proxies, token, local_files_only, force_download)\u001b[0m\n\u001b[1;32m    968\u001b[0m     \u001b[38;5;66;03m# Otherwise, raise appropriate error\u001b[39;00m\n\u001b[0;32m--> 969\u001b[0m     \u001b[43m_raise_on_head_call_error\u001b[49m\u001b[43m(\u001b[49m\u001b[43mhead_call_error\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    971\u001b[0m \u001b[38;5;66;03m# From now on, etag, commit_hash, url and size are not None.\u001b[39;00m\n",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:1486\u001b[0m, in \u001b[0;36m_raise_on_head_call_error\u001b[0;34m(head_call_error, force_download, local_files_only)\u001b[0m\n\u001b[1;32m   1481\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(head_call_error, (RepositoryNotFoundError, GatedRepoError)) \u001b[38;5;129;01mor\u001b[39;00m (\n\u001b[1;32m   1482\u001b[0m     \u001b[38;5;28misinstance\u001b[39m(head_call_error, HfHubHTTPError) \u001b[38;5;129;01mand\u001b[39;00m head_call_error\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m401\u001b[39m\n\u001b[1;32m   1483\u001b[0m ):\n\u001b[1;32m   1484\u001b[0m     \u001b[38;5;66;03m# Repo not found or gated => let's raise the actual error\u001b[39;00m\n\u001b[1;32m   1485\u001b[0m     \u001b[38;5;66;03m# Unauthorized => likely a token issue => let's raise the actual error\u001b[39;00m\n\u001b[0;32m-> 1486\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m head_call_error\n\u001b[1;32m   1487\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   1488\u001b[0m     \u001b[38;5;66;03m# Otherwise: most likely a connection issue or Hub downtime => let's warn the user\u001b[39;00m\n",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:1376\u001b[0m, in \u001b[0;36m_get_metadata_or_catch_error\u001b[0;34m(repo_id, filename, repo_type, revision, endpoint, proxies, etag_timeout, headers, token, local_files_only, relative_filename, storage_folder)\u001b[0m\n\u001b[1;32m   1375\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m-> 1376\u001b[0m     metadata \u001b[38;5;241m=\u001b[39m \u001b[43mget_hf_file_metadata\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1377\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43metag_timeout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\n\u001b[1;32m   1378\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1379\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m EntryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m http_error:\n",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/utils/_validators.py:114\u001b[0m, in \u001b[0;36mvalidate_hf_hub_args.._inner_fn\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m     kwargs \u001b[38;5;241m=\u001b[39m smoothly_deprecate_use_auth_token(fn_name\u001b[38;5;241m=\u001b[39mfn\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m, has_token\u001b[38;5;241m=\u001b[39mhas_token, kwargs\u001b[38;5;241m=\u001b[39mkwargs)\n\u001b[0;32m--> 114\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfn\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:1296\u001b[0m, in \u001b[0;36mget_hf_file_metadata\u001b[0;34m(url, token, proxies, timeout, library_name, library_version, user_agent, headers)\u001b[0m\n\u001b[1;32m   1295\u001b[0m \u001b[38;5;66;03m# Retrieve metadata\u001b[39;00m\n\u001b[0;32m-> 1296\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1297\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mHEAD\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1298\u001b[0m \u001b[43m    \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1299\u001b[0m \u001b[43m    \u001b[49m\u001b[43mheaders\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mhf_headers\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1300\u001b[0m \u001b[43m    \u001b[49m\u001b[43mallow_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1301\u001b[0m \u001b[43m    \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   1302\u001b[0m \u001b[43m    \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1303\u001b[0m \u001b[43m    \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1304\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1305\u001b[0m hf_raise_for_status(r)\n",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:280\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m    279\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m follow_relative_redirects:\n\u001b[0;32m--> 280\u001b[0m     response \u001b[38;5;241m=\u001b[39m \u001b[43m_request_wrapper\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    281\u001b[0m \u001b[43m        \u001b[49m\u001b[43mmethod\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmethod\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    282\u001b[0m \u001b[43m        \u001b[49m\u001b[43murl\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43murl\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    283\u001b[0m \u001b[43m        \u001b[49m\u001b[43mfollow_relative_redirects\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    284\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    285\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    287\u001b[0m     \u001b[38;5;66;03m# If redirection, we redirect only relative paths.\u001b[39;00m\n\u001b[1;32m    288\u001b[0m     \u001b[38;5;66;03m# This is useful in case of a renamed repository.\u001b[39;00m\n",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/file_download.py:304\u001b[0m, in \u001b[0;36m_request_wrapper\u001b[0;34m(method, url, follow_relative_redirects, **params)\u001b[0m\n\u001b[1;32m    303\u001b[0m response \u001b[38;5;241m=\u001b[39m get_session()\u001b[38;5;241m.\u001b[39mrequest(method\u001b[38;5;241m=\u001b[39mmethod, url\u001b[38;5;241m=\u001b[39murl, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams)\n\u001b[0;32m--> 304\u001b[0m \u001b[43mhf_raise_for_status\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresponse\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    305\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\n",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/huggingface_hub/utils/_http.py:458\u001b[0m, in \u001b[0;36mhf_raise_for_status\u001b[0;34m(response, endpoint_name)\u001b[0m\n\u001b[1;32m    450\u001b[0m     message \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m    451\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mstatus_code\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m Client Error.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    452\u001b[0m         \u001b[38;5;241m+\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    456\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m make sure you are authenticated.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    457\u001b[0m     )\n\u001b[0;32m--> 458\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m _format(RepositoryNotFoundError, message, response) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m    460\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m response\u001b[38;5;241m.\u001b[39mstatus_code \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m400\u001b[39m:\n",
-      "\u001b[0;31mRepositoryNotFoundError\u001b[0m: 404 Client Error. (Request ID: Root=1-67e3e5f6-44cf0d8d57a4fe053b72a484;c071d07d-bc72-4882-a792-4d6486057291)\n\nRepository Not Found for url: https://huggingface.co/facebook/esm2_t48_3B_UR50D/resolve/main/config.json.\nPlease make sure you specified the correct `repo_id` and `repo_type`.\nIf you are trying to access a private or gated repo, make sure you are authenticated.",
-      "\nThe above exception was the direct cause of the following exception:\n",
-      "\u001b[0;31mOSError\u001b[0m                                   Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43meedb\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcalculate_sequence_embeddings\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfacebook/esm2_t48_3B_UR50D\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/main.py:208\u001b[0m, in \u001b[0;36mPyeed.calculate_sequence_embeddings\u001b[0;34m(self, batch_size, model_name)\u001b[0m\n\u001b[1;32m    197\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    198\u001b[0m \u001b[38;5;124;03mCalculates embeddings for all sequences in the database that do not have embeddings, processing in batches.\u001b[39;00m\n\u001b[1;32m    199\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    204\u001b[0m \u001b[38;5;124;03m        Available models can be found at https://huggingface.co/facebook/esm2_t6_8M_UR50D.\u001b[39;00m\n\u001b[1;32m    205\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m    207\u001b[0m \u001b[38;5;66;03m# Load the model, tokenizer, and device\u001b[39;00m\n\u001b[0;32m--> 208\u001b[0m model, tokenizer, device \u001b[38;5;241m=\u001b[39m \u001b[43mload_model_and_tokenizer\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel_name\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    210\u001b[0m \u001b[38;5;66;03m# Cypher query to retrieve proteins without embeddings and with valid sequences\u001b[39;00m\n\u001b[1;32m    211\u001b[0m query \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\"\"\u001b[39m\n\u001b[1;32m    212\u001b[0m \u001b[38;5;124mMATCH (p:Protein)\u001b[39m\n\u001b[1;32m    213\u001b[0m \u001b[38;5;124mWHERE p.embedding IS NULL AND p.sequence IS NOT NULL\u001b[39m\n\u001b[1;32m    214\u001b[0m \u001b[38;5;124mRETURN p.accession_id AS accession, p.sequence AS sequence\u001b[39m\n\u001b[1;32m    215\u001b[0m \u001b[38;5;124m\u001b[39m\u001b[38;5;124m\"\"\"\u001b[39m\n",
-      "File \u001b[0;32m~/Niklas/pyeed/src/pyeed/embedding.py:72\u001b[0m, in \u001b[0;36mload_model_and_tokenizer\u001b[0;34m(model_name)\u001b[0m\n\u001b[1;32m     65\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m     66\u001b[0m     \u001b[38;5;66;03m# Otherwise, assume it's an ESM-2 model on Hugging Face\u001b[39;00m\n\u001b[1;32m     67\u001b[0m     full_model_name \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m     68\u001b[0m         model_name\n\u001b[1;32m     69\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m model_name\u001b[38;5;241m.\u001b[39mstartswith(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfacebook/\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     70\u001b[0m         \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfacebook/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodel_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m     71\u001b[0m     )\n\u001b[0;32m---> 72\u001b[0m     model: Any \u001b[38;5;241m=\u001b[39m \u001b[43mEsmModel\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfrom_pretrained\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfull_model_name\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43muse_auth_token\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     73\u001b[0m     tokenizer \u001b[38;5;241m=\u001b[39m EsmTokenizer\u001b[38;5;241m.\u001b[39mfrom_pretrained(full_model_name, use_auth_token\u001b[38;5;241m=\u001b[39mtoken)\n\u001b[1;32m     74\u001b[0m     model \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mto(device)\n",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/modeling_utils.py:3464\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, weights_only, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m   3461\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m commit_hash \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   3462\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(config, PretrainedConfig):\n\u001b[1;32m   3463\u001b[0m         \u001b[38;5;66;03m# We make a call to the config file first (which may be absent) to get the commit hash as soon as possible\u001b[39;00m\n\u001b[0;32m-> 3464\u001b[0m         resolved_config_file \u001b[38;5;241m=\u001b[39m \u001b[43mcached_file\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   3465\u001b[0m \u001b[43m            \u001b[49m\u001b[43mpretrained_model_name_or_path\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3466\u001b[0m \u001b[43m            \u001b[49m\u001b[43mCONFIG_NAME\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3467\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcache_dir\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcache_dir\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3468\u001b[0m \u001b[43m            \u001b[49m\u001b[43mforce_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mforce_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3469\u001b[0m \u001b[43m            \u001b[49m\u001b[43mresume_download\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mresume_download\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3470\u001b[0m \u001b[43m            \u001b[49m\u001b[43mproxies\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mproxies\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3471\u001b[0m \u001b[43m            \u001b[49m\u001b[43mlocal_files_only\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlocal_files_only\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3472\u001b[0m \u001b[43m            \u001b[49m\u001b[43mtoken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtoken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3473\u001b[0m \u001b[43m            \u001b[49m\u001b[43mrevision\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrevision\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3474\u001b[0m \u001b[43m            \u001b[49m\u001b[43msubfolder\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msubfolder\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   3475\u001b[0m \u001b[43m            \u001b[49m\u001b[43m_raise_exceptions_for_gated_repo\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   3476\u001b[0m \u001b[43m            \u001b[49m\u001b[43m_raise_exceptions_for_missing_entries\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   3477\u001b[0m \u001b[43m            \u001b[49m\u001b[43m_raise_exceptions_for_connection_errors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m   3478\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   3479\u001b[0m         commit_hash \u001b[38;5;241m=\u001b[39m extract_commit_hash(resolved_config_file, commit_hash)\n\u001b[1;32m   3480\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n",
-      "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/utils/hub.py:426\u001b[0m, in \u001b[0;36mcached_file\u001b[0;34m(path_or_repo_id, filename, cache_dir, force_download, resume_download, proxies, token, revision, local_files_only, subfolder, repo_type, user_agent, _raise_exceptions_for_gated_repo, _raise_exceptions_for_missing_entries, _raise_exceptions_for_connection_errors, _commit_hash, **deprecated_kwargs)\u001b[0m\n\u001b[1;32m    421\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m    422\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou are trying to access a gated repo.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mMake sure to have access to it at \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    423\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhttps://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mstr\u001b[39m(e)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    424\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m    425\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RepositoryNotFoundError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m--> 426\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m    427\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not a local folder and is not a valid model identifier \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    428\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlisted on \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/models\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mIf this is a private repository, make sure to pass a token \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    429\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhaving permission to this repo either by logging in with `huggingface-cli login` or by passing \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    430\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`token=`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    431\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n\u001b[1;32m    432\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m RevisionNotFoundError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[1;32m    433\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mEnvironmentError\u001b[39;00m(\n\u001b[1;32m    434\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mrevision\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not a valid git identifier (branch name, tag name or commit id) that exists \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    435\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfor this model name. Check the model page at \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    436\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mhttps://huggingface.co/\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpath_or_repo_id\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m for available revisions.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    437\u001b[0m     ) \u001b[38;5;28;01mfrom\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21;01me\u001b[39;00m\n",
-      "\u001b[0;31mOSError\u001b[0m: facebook/esm2_t48_3B_UR50D is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'\nIf this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=`"
+      "  warnings.warn(\n",
+      "Loading checkpoint shards: 100%|██████████| 2/2 [00:00<00:00,  3.69it/s]\n",
+      "Some weights of EsmModel were not initialized from the model checkpoint at facebook/esm2_t36_3B_UR50D and are newly initialized: ['esm.pooler.dense.bias', 'esm.pooler.dense.weight']\n",
+      "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
+      "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/transformers/tokenization_utils_base.py:1899: FutureWarning: The `use_auth_token` argument is deprecated and will be removed in v5 of Transformers. Please use `token` instead.\n",
+      "  warnings.warn(\n",
+      "Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.\n"
      ]
     }
    ],
@@ -275,7 +256,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "(1152,)\n"
+      "(2560,)\n"
      ]
     }
    ],
@@ -302,7 +283,7 @@
     },
     {
      "data": {
-      "image/png": "",
+      "image/png": "",
       "text/plain": [
        "
" ] @@ -380,7 +361,7 @@ "output_type": "stream", "text": [ "Resulst for index AAP20891.1 are:\n", - "[('AAP20891.1', 0.0), ('ADL13944.1', 5.6168107638088216e-05), ('AFN21551.1', 6.611455559601964e-05), ('AAF05613.1', 0.00010320505315297712), ('CAC67290.1', 0.00012422009260193434), ('AAL29433.1', 0.00012499919288500028), ('ABB97007.1', 0.00012965265937237014), ('CAA74912.2', 0.00013823680560853813), ('CBX53726.1', 0.00015387097994867815), ('AGQ50511.1', 0.00015501224370340072)]\n" + "[('AAP20891.1', 0.0), ('ADL13944.1', 1.2696941380951898e-05), ('AGQ50511.1', 2.3084859425925863e-05), ('CBX53726.1', 2.3443578533011156e-05), ('AAL29433.1', 3.0809776502382924e-05), ('CAA76796.1', 3.2400445545976986e-05), ('CAC67290.1', 4.856582147116928e-05), ('AFN21551.1', 4.953471590429803e-05), ('CAA74912.2', 5.021707417551813e-05), ('CTA52364.1', 6.113568903631794e-05)]\n" ] } ], @@ -478,7 +459,7 @@ "outputs": [ { "ename": "ClientError", - "evalue": "{code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `db.index.vector.queryNodes`: Caused by: java.lang.IllegalArgumentException: Index query vector has 1152 dimensions, but indexed vectors have 960.}", + "evalue": "{code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `db.index.vector.queryNodes`: Caused by: java.lang.IllegalArgumentException: Index query vector has 2560 dimensions, but indexed vectors have 960.}", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", @@ -495,7 +476,7 @@ "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/io/_bolt.py:850\u001b[0m, in \u001b[0;36mBolt.fetch_message\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 846\u001b[0m \u001b[38;5;66;03m# Receive exactly one message\u001b[39;00m\n\u001b[1;32m 847\u001b[0m tag, fields \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39minbox\u001b[38;5;241m.\u001b[39mpop(\n\u001b[1;32m 848\u001b[0m hydration_hooks\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mresponses[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mhydration_hooks\n\u001b[1;32m 849\u001b[0m )\n\u001b[0;32m--> 850\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_process_message\u001b[49m\u001b[43m(\u001b[49m\u001b[43mtag\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfields\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 851\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39midle_since \u001b[38;5;241m=\u001b[39m monotonic()\n\u001b[1;32m 852\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m res\n", "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/io/_bolt5.py:369\u001b[0m, in \u001b[0;36mBolt5x0._process_message\u001b[0;34m(self, tag, fields)\u001b[0m\n\u001b[1;32m 367\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_server_state_manager\u001b[38;5;241m.\u001b[39mstate \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mbolt_states\u001b[38;5;241m.\u001b[39mFAILED\n\u001b[1;32m 368\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 369\u001b[0m \u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mon_failure\u001b[49m\u001b[43m(\u001b[49m\u001b[43msummary_metadata\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 370\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m (ServiceUnavailable, DatabaseUnavailable):\n\u001b[1;32m 371\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpool:\n", "File \u001b[0;32m~/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/neo4j/_sync/io/_common.py:245\u001b[0m, in \u001b[0;36mResponse.on_failure\u001b[0;34m(self, metadata)\u001b[0m\n\u001b[1;32m 243\u001b[0m handler \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandlers\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mon_summary\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 244\u001b[0m Util\u001b[38;5;241m.\u001b[39mcallback(handler)\n\u001b[0;32m--> 245\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m Neo4jError\u001b[38;5;241m.\u001b[39mhydrate(\u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmetadata)\n", - "\u001b[0;31mClientError\u001b[0m: {code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `db.index.vector.queryNodes`: Caused by: java.lang.IllegalArgumentException: Index query vector has 1152 dimensions, but indexed vectors have 960.}" + "\u001b[0;31mClientError\u001b[0m: {code: Neo.ClientError.Procedure.ProcedureCallFailed} {message: Failed to invoke procedure `db.index.vector.queryNodes`: Caused by: java.lang.IllegalArgumentException: Index query vector has 2560 dimensions, but indexed vectors have 960.}" ] } ], diff --git a/docs/usage/mutation_analysis.ipynb b/docs/usage/mutation_analysis.ipynb index 9b31c996..e2c3d066 100644 --- a/docs/usage/mutation_analysis.ipynb +++ b/docs/usage/mutation_analysis.ipynb @@ -229,19 +229,35 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [ { - "name": "stdout", - "output_type": "stream", - "text": [ - "{'from_positions': [241, 125, 272], 'to_positions': [241, 125, 272], 'from_monomers': ['R', 'V', 'D'], 'to_monomers': ['S', 'I', 'N']}\n" + "ename": "NameError", + "evalue": "name 'mutations_protein' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mmutations_protein\u001b[49m)\n", + "\u001b[0;31mNameError\u001b[0m: name 'mutations_protein' is not defined" ] } ], "source": [ - "print(mutations_protein)" + "print(mutations_protein)\n", + "\n", + "\n", + "# remove double realtionship, there are many doubles between the same DNA and the same Organismen\n", + "# just keep the first one and remove the rest\n", + "query_remove_double_relationship = \"\"\"\n", + "MATCH (d:DNA {accession_id: 'KT405476.1'})-[r:ORIGINATES_FROM]-(e)\n", + "WITH d, r, e\n", + "ORDER BY id(r)\n", + "LIMIT 1\n", + "DELETE r\n", + "\"\"\"\n", + "\n" ] }, { @@ -295,7 +311,7 @@ ], "metadata": { "kernelspec": { - "display_name": "pyeed_niklas", + "display_name": "pyeed_niklas_env", "language": "python", "name": "python3" }, @@ -309,7 +325,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.10.16" } }, "nbformat": 4, diff --git a/src/pyeed/analysis/sequence_alignment.py b/src/pyeed/analysis/sequence_alignment.py index 3bfd019c..09e8aab1 100644 --- a/src/pyeed/analysis/sequence_alignment.py +++ b/src/pyeed/analysis/sequence_alignment.py @@ -5,9 +5,11 @@ from Bio.Align import PairwiseAligner as BioPairwiseAligner from Bio.Align.substitution_matrices import Array as BioSubstitutionMatrix from joblib import Parallel, cpu_count, delayed +from loguru import logger +from rich.progress import Progress + from pyeed.dbconnect import DatabaseConnector from pyeed.tools.utility import chunks -from rich.progress import Progress class PairwiseAligner: @@ -126,6 +128,14 @@ def align_multipairwise( if ids is not None and db is not None: sequences = self._get_id_sequence_dict(db, ids, node_type, region_ids_neo4j) + logger.info( + f"Length of sequences: {len(sequences)} and length of pairs: {len(pairs)} and length of ids: {len(ids)} and the length of the region_ids_neo4j: {len(region_ids_neo4j)}" + ) + logger.info(f"IDS: {ids}") + logger.info(f"Region IDs: {region_ids_neo4j}") + logger.info(f"Pairs: {pairs}") + logger.info(f"Sequences: {sequences.keys()}") + if not sequences: raise ValueError( "Either sequences or ids (with a database connection) must be provided." @@ -290,13 +300,11 @@ def _get_id_sequence_dict( if region_ids_neo4j is not None: query = f""" MATCH (p:{node_type})-[e:HAS_REGION]->(r:Region) - WHERE id(r) IN $region_ids_neo4j AND p.accession_id IN $ids + WHERE id(r) IN {region_ids_neo4j} AND p.accession_id IN {ids} RETURN p.accession_id AS accession_id, e.start AS start, e.end AS end, p.sequence AS sequence """ - nodes = db.execute_read( - query, - parameters={"region_ids_neo4j": region_ids_neo4j, "ids": ids}, - ) + nodes = db.execute_read(query) + else: query = f""" MATCH (p:{node_type}) diff --git a/src/pyeed/analysis/standard_numbering.py b/src/pyeed/analysis/standard_numbering.py index 6f81869f..88450be6 100644 --- a/src/pyeed/analysis/standard_numbering.py +++ b/src/pyeed/analysis/standard_numbering.py @@ -13,6 +13,7 @@ from typing import Any, Dict, List, Optional, Tuple from loguru import logger + from pyeed.analysis.sequence_alignment import PairwiseAligner from pyeed.dbconnect import DatabaseConnector from pyeed.model import StandardNumbering @@ -458,7 +459,7 @@ def apply_standard_numbering_pairwise( region_ids_neo4j=region_ids_neo4j, ) - logger.info(f"Pairwise alignment results: {results_pairwise}") + # logger.info(f"Pairwise alignment results: {results_pairwise}") if results_pairwise is None: raise ValueError("Pairwise alignment failed - no results returned") @@ -484,8 +485,6 @@ def apply_standard_numbering_pairwise( base_sequence_id, converted_alignment ) - logger.info(f"Positions: {positions}") - # Ensure the standard numbering node exists in the database. StandardNumbering.get_or_save( name=self.name, From ee613422ff001e07bc1a3a0516d1d690f231ddfe Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Wed, 23 Apr 2025 16:02:24 +0000 Subject: [PATCH 5/9] bug fixing --- docs/usage/mutation_analysis.ipynb | 68 +- src/pyeed/analysis/embedding_analysis.py | 5 +- src/pyeed/analysis/mutation_detection.py | 1745 ++++++++++++++++++++++ src/pyeed/analysis/sequence_alignment.py | 68 +- src/pyeed/analysis/standard_numbering.py | 10 +- src/pyeed/embedding.py | 20 +- src/pyeed/main.py | 14 +- 7 files changed, 1857 insertions(+), 73 deletions(-) diff --git a/docs/usage/mutation_analysis.ipynb b/docs/usage/mutation_analysis.ipynb index e2c3d066..a086d7d6 100644 --- a/docs/usage/mutation_analysis.ipynb +++ b/docs/usage/mutation_analysis.ipynb @@ -11,9 +11,18 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", + " from .autonotebook import tqdm as notebook_tqdm\n" + ] + } + ], "source": [ "import sys\n", "from loguru import logger\n", @@ -37,7 +46,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -45,7 +54,7 @@ "output_type": "stream", "text": [ "📡 Connected to database.\n", - "All data has been wiped from the database.\n" + "The provided date does not match the current date. Date is you gave is 2025-03-19 actual date is 2025-04-09\n" ] } ], @@ -75,7 +84,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -101,18 +110,21 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 4, "metadata": {}, "outputs": [ { "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "6ed852d438ab480fa4d1c6129eacfd26", - "version_major": 2, - "version_minor": 0 - }, + "text/html": [ + "
/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/rich/live.py:231: UserWarning: install \n",
+       "\"ipywidgets\" for Jupyter support\n",
+       "  warnings.warn('install \"ipywidgets\" for Jupyter support')\n",
+       "
\n" + ], "text/plain": [ - "Output()" + "/home/nab/anaconda3/envs/pyeed_niklas_env/lib/python3.10/site-packages/rich/live.py:231: UserWarning: install \n", + "\"ipywidgets\" for Jupyter support\n", + " warnings.warn('install \"ipywidgets\" for Jupyter support')\n" ] }, "metadata": {}, @@ -122,7 +134,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Region ids: [143, 129, 128, 69, 9]\n", + "Region ids: [5206, 5205, 5203, 5201, 5207]\n", "len of ids: 5\n" ] }, @@ -181,7 +193,7 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -198,7 +210,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -229,18 +241,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": {}, "outputs": [ { - "ename": "NameError", - "evalue": "name 'mutations_protein' is not defined", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[1], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[43mmutations_protein\u001b[49m)\n", - "\u001b[0;31mNameError\u001b[0m: name 'mutations_protein' is not defined" + "name": "stdout", + "output_type": "stream", + "text": [ + "{'from_positions': [272, 241, 125], 'to_positions': [272, 241, 125], 'from_monomers': ['D', 'R', 'V'], 'to_monomers': ['N', 'S', 'I']}\n" ] } ], @@ -278,21 +286,21 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Mutation on position 705 -> 705 with a nucleotide change of G -> A\n", - "Mutation on position 395 -> 395 with a nucleotide change of T -> G\n", - "Mutation on position 137 -> 137 with a nucleotide change of A -> G\n", "Mutation on position 17 -> 17 with a nucleotide change of T -> C\n", - "Mutation on position 473 -> 473 with a nucleotide change of T -> C\n", + "Mutation on position 395 -> 395 with a nucleotide change of T -> G\n", + "Mutation on position 198 -> 198 with a nucleotide change of C -> A\n", "Mutation on position 716 -> 716 with a nucleotide change of G -> A\n", + "Mutation on position 705 -> 705 with a nucleotide change of G -> A\n", + "Mutation on position 473 -> 473 with a nucleotide change of T -> C\n", "Mutation on position 720 -> 720 with a nucleotide change of A -> C\n", - "Mutation on position 198 -> 198 with a nucleotide change of C -> A\n" + "Mutation on position 137 -> 137 with a nucleotide change of A -> G\n" ] } ], diff --git a/src/pyeed/analysis/embedding_analysis.py b/src/pyeed/analysis/embedding_analysis.py index fa9d6c0e..ebcac96a 100644 --- a/src/pyeed/analysis/embedding_analysis.py +++ b/src/pyeed/analysis/embedding_analysis.py @@ -6,9 +6,10 @@ import scipy.spatial as sp from matplotlib.figure import Figure from numpy.typing import NDArray -from pyeed.dbconnect import DatabaseConnector from scipy.spatial.distance import cosine +from pyeed.dbconnect import DatabaseConnector + logger = logging.getLogger(__name__) @@ -352,6 +353,7 @@ def find_nearest_neighbors_based_on_vector_index( query_protein_id: str, index_name: str = "embedding_index", number_of_neighbors: int = 50, + skip: int = 0, ) -> list[tuple[str, float]]: """ This function finds the nearest neighbors of a query protein based on the vector index. @@ -411,6 +413,7 @@ def find_nearest_neighbors_based_on_vector_index( CALL db.index.vector.queryNodes('{index_name}', {number_of_neighbors}, embedding) YIELD node AS fprotein, score RETURN fprotein.accession_id, score + SKIP {skip} """ results = db.execute_read(query_find_nearest_neighbors) neighbors: list[tuple[str, float]] = [ diff --git a/src/pyeed/analysis/mutation_detection.py b/src/pyeed/analysis/mutation_detection.py index c2562ae1..74c5e0fb 100644 --- a/src/pyeed/analysis/mutation_detection.py +++ b/src/pyeed/analysis/mutation_detection.py @@ -1,6 +1,7 @@ from typing import Any, Optional from loguru import logger + from pyeed.dbconnect import DatabaseConnector @@ -292,3 +293,1747 @@ def get_mutations_between_sequences( ) return mutations + + +if __name__ == "__main__": + # debugging + + seq_a = "TGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAA" + seq_b = "CACAGATAAAACACTCTCCAGGAAACCCGGGGCGGTTCAATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTGCAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCAGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAACACGGGATAATACCGCACCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGAAAATGTTGAATACTCAT" + pos_a = [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 47, + 48, + 49, + 50, + 51, + 52, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 143, + 144, + 145, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 273, + 274, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 293, + 294, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 322, + 323, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 348, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 372, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 467, + 468, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 659, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675, + 676, + 677, + 678, + 679, + 680, + 681, + 682, + 683, + 684, + 685, + 686, + 687, + 688, + 689, + 690, + 691, + 692, + 693, + 694, + 695, + 696, + 697, + 698, + 699, + 700, + 701, + 702, + 703, + 704, + 705, + 706, + 707, + 708, + 709, + 710, + 711, + 712, + 713, + 714, + 715, + 716, + 717, + 718, + 719, + 720, + 721, + 722, + 723, + 724, + 725, + 726, + 727, + 728, + 729, + 730, + 731, + 732, + 733, + 734, + 735, + 736, + 737, + 738, + 739, + 740, + 741, + 742, + 743, + 744, + 745, + 746, + 747, + 748, + 749, + 750, + 751, + 752, + 753, + 754, + 755, + 756, + 757, + 758, + 759, + 760, + 761, + 762, + 763, + 764, + 765, + 766, + 767, + 768, + 769, + 770, + 771, + 772, + 773, + 774, + 775, + 776, + 777, + 778, + 779, + 780, + 781, + 782, + 783, + 784, + 785, + 786, + 787, + 788, + 789, + 790, + 791, + 792, + 793, + 794, + 795, + 796, + 797, + 798, + 799, + 800, + 801, + 802, + 803, + 804, + 805, + 806, + 807, + 808, + 809, + 810, + 811, + 812, + 813, + 814, + 815, + 816, + 817, + 818, + 819, + 820, + 821, + 822, + 823, + 824, + 825, + 826, + 827, + 828, + 829, + 830, + 831, + 832, + 833, + 834, + 835, + 836, + 837, + 838, + 839, + 840, + 841, + 842, + 843, + 844, + 845, + 846, + 847, + 848, + 849, + 850, + 851, + 852, + 853, + 854, + 855, + 856, + 857, + 858, + 859, + 860, + ] + pos_b = [ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 11, + 12, + 13, + 14, + 15, + 16, + 17, + 18, + 19, + 20, + 21, + 22, + 23, + 24, + 25, + 26, + 27, + 28, + 29, + 30, + 31, + 32, + 33, + 34, + 35, + 36, + 37, + 38, + 39, + 40, + 41, + 42, + 43, + 44, + 45, + 46, + 46.1, + 47, + 48, + 49, + 50, + 51, + 52, + 52.1, + 53, + 54, + 55, + 56, + 57, + 58, + 59, + 60, + 61, + 62, + 63, + 64, + 65, + 66, + 67, + 68, + 69, + 70, + 71, + 72, + 73, + 74, + 75, + 76, + 77, + 78, + 79, + 80, + 81, + 82, + 83, + 84, + 85, + 86, + 87, + 88, + 89, + 90, + 91, + 92, + 93, + 94, + 95, + 96, + 97, + 98, + 99, + 100, + 101, + 102, + 103, + 104, + 105, + 106, + 107, + 108, + 109, + 110, + 111, + 112, + 113, + 114, + 115, + 116, + 117, + 118, + 119, + 120, + 121, + 122, + 123, + 124, + 125, + 126, + 127, + 128, + 129, + 130, + 131, + 132, + 133, + 134, + 135, + 136, + 137, + 138, + 139, + 140, + 141, + 142, + 146, + 147, + 148, + 149, + 150, + 151, + 152, + 153, + 154, + 155, + 156, + 157, + 158, + 159, + 160, + 161, + 162, + 163, + 164, + 165, + 166, + 167, + 168, + 169, + 170, + 171, + 172, + 173, + 174, + 175, + 176, + 177, + 178, + 179, + 180, + 181, + 182, + 183, + 184, + 185, + 186, + 187, + 188, + 189, + 190, + 191, + 192, + 193, + 193.1, + 193.2, + 193.3, + 194, + 195, + 196, + 197, + 198, + 199, + 200, + 201, + 202, + 203, + 204, + 205, + 206, + 207, + 208, + 209, + 210, + 211, + 212, + 213, + 214, + 215, + 216, + 217, + 218, + 219, + 220, + 221, + 222, + 223, + 224, + 225, + 226, + 227, + 228, + 229, + 230, + 231, + 232, + 233, + 234, + 235, + 236, + 237, + 238, + 239, + 240, + 241, + 242, + 242.1, + 242.2, + 242.3, + 242.4, + 242.5, + 242.6, + 243, + 244, + 245, + 246, + 247, + 248, + 249, + 250, + 251, + 252, + 253, + 253.1, + 253.2, + 253.3, + 254, + 255, + 256, + 257, + 258, + 259, + 260, + 261, + 262, + 263, + 264, + 265, + 266, + 267, + 268, + 269, + 270, + 271, + 272, + 275, + 276, + 277, + 278, + 279, + 280, + 281, + 282, + 283, + 284, + 285, + 286, + 287, + 288, + 289, + 290, + 291, + 292, + 295, + 296, + 297, + 298, + 299, + 300, + 301, + 302, + 303, + 304, + 305, + 306, + 307, + 308, + 309, + 310, + 311, + 312, + 313, + 314, + 315, + 316, + 317, + 318, + 319, + 320, + 321, + 324, + 325, + 326, + 327, + 328, + 329, + 330, + 331, + 332, + 333, + 334, + 335, + 336, + 337, + 338, + 339, + 340, + 341, + 342, + 343, + 344, + 345, + 346, + 347, + 349, + 350, + 351, + 352, + 353, + 354, + 355, + 356, + 357, + 358, + 359, + 360, + 361, + 362, + 363, + 364, + 365, + 366, + 367, + 368, + 369, + 370, + 371, + 373, + 374, + 375, + 376, + 377, + 378, + 379, + 380, + 381, + 382, + 383, + 384, + 385, + 386, + 387, + 388, + 389, + 390, + 391, + 392, + 393, + 394, + 395, + 396, + 397, + 398, + 399, + 400, + 401, + 402, + 403, + 404, + 405, + 406, + 407, + 408, + 409, + 410, + 411, + 412, + 413, + 414, + 415, + 416, + 417, + 418, + 419, + 420, + 421, + 421.1, + 421.2, + 422, + 423, + 424, + 425, + 426, + 427, + 428, + 429, + 430, + 431, + 432, + 433, + 434, + 435, + 436, + 437, + 438, + 439, + 440, + 441, + 442, + 443, + 444, + 445, + 446, + 447, + 448, + 449, + 450, + 451, + 452, + 453, + 454, + 455, + 456, + 457, + 458, + 459, + 460, + 461, + 462, + 463, + 464, + 465, + 466, + 466.1, + 467, + 468, + 468.1, + 469, + 470, + 471, + 472, + 473, + 474, + 475, + 476, + 477, + 478, + 479, + 480, + 481, + 482, + 483, + 484, + 485, + 486, + 487, + 488, + 489, + 490, + 491, + 492, + 493, + 494, + 495, + 496, + 497, + 498, + 499, + 500, + 501, + 502, + 503, + 504, + 505, + 506, + 507, + 508, + 509, + 510, + 511, + 512, + 513, + 514, + 515, + 516, + 517, + 518, + 519, + 520, + 521, + 522, + 523, + 524, + 525, + 526, + 527, + 528, + 529, + 530, + 531, + 532, + 533, + 534, + 535, + 536, + 537, + 538, + 539, + 540, + 541, + 542, + 543, + 544, + 545, + 546, + 547, + 548, + 549, + 550, + 551, + 552, + 553, + 554, + 555, + 556, + 557, + 558, + 559, + 560, + 561, + 561.1, + 561.2, + 562, + 563, + 564, + 565, + 566, + 567, + 568, + 569, + 570, + 571, + 572, + 572.1, + 572.2, + 573, + 574, + 575, + 576, + 577, + 578, + 579, + 580, + 581, + 582, + 583, + 584, + 585, + 586, + 587, + 588, + 589, + 590, + 591, + 592, + 593, + 594, + 595, + 596, + 597, + 598, + 599, + 600, + 601, + 602, + 603, + 604, + 605, + 606, + 607, + 608, + 609, + 610, + 611, + 612, + 613, + 614, + 615, + 616, + 616.1, + 617, + 618, + 619, + 620, + 621, + 622, + 623, + 624, + 625, + 626, + 627, + 628, + 629, + 630, + 631, + 632, + 633, + 634, + 635, + 636, + 637, + 638, + 639, + 640, + 641, + 642, + 643, + 644, + 645, + 646, + 647, + 648, + 649, + 650, + 651, + 652, + 653, + 654, + 655, + 656, + 657, + 658, + 660, + 661, + 662, + 663, + 664, + 665, + 666, + 667, + 668, + 669, + 670, + 671, + 672, + 673, + 674, + 675, + 676, + 677, + 680, + 681, + 682, + 683, + 684, + 685, + 686, + 687, + 688, + 689, + 690, + 691, + 692, + 693, + 694, + 695, + 696, + 697, + 698, + 699, + 700, + 701, + 702, + 703, + 704, + 705, + 706, + 707, + 708, + 709, + 710, + 711, + 712, + 713, + 714, + 715, + 716, + 717, + 719, + 720, + 721, + 722, + 723, + 724, + 725, + 726, + 727, + 728, + 732, + 733, + 734, + 735, + 736, + 737, + 738, + 739, + 740, + 741, + 742, + 743, + 744, + 745, + 746, + 747, + 748, + 749, + 750, + 751, + 752, + 753, + 754, + 755, + 756, + 757, + 758, + 759, + 760, + 761, + 762, + 763, + 764, + 765, + 766, + 767, + 768, + 769, + 771, + 772, + 773, + 774, + 775, + 776, + 777, + 778, + 779, + 780, + 781, + 782, + 783, + 784, + 785, + 786, + 787, + 788, + 789, + 790, + 791, + 792, + 793, + 794, + 795, + 796, + 797, + 798, + 799, + 801, + 802, + 803, + 804, + 805, + 806, + 807, + 808, + 809, + 810, + 811, + 812, + 813, + 814, + 815, + 816, + 817, + 818, + 819, + 820, + 821, + 822, + 823, + 824, + 825, + 826, + 827, + 828, + 829, + 830, + 831, + 832, + 833, + 834, + 835, + 836, + 837, + 838, + 839, + 840, + 841, + 842, + 843, + 844, + 845, + 846, + 847, + 848, + 849, + 850, + 851, + 852, + 853, + 854, + 855, + 856, + 857, + 858, + 859, + 860, + ] + + pos_a = [str(i) for i in pos_a] + pos_b = [str(i) for i in pos_b] + + print(seq_a) + print(seq_b) + + mutation_detection = MutationDetection() + + print(mutation_detection.find_mutations(seq_a, seq_b, pos_a, pos_b)) diff --git a/src/pyeed/analysis/sequence_alignment.py b/src/pyeed/analysis/sequence_alignment.py index 09e8aab1..0b5537a1 100644 --- a/src/pyeed/analysis/sequence_alignment.py +++ b/src/pyeed/analysis/sequence_alignment.py @@ -5,7 +5,6 @@ from Bio.Align import PairwiseAligner as BioPairwiseAligner from Bio.Align.substitution_matrices import Array as BioSubstitutionMatrix from joblib import Parallel, cpu_count, delayed -from loguru import logger from rich.progress import Progress from pyeed.dbconnect import DatabaseConnector @@ -21,18 +20,42 @@ class PairwiseAligner: def __init__( self, mode: str = "global", - match: int = 1, - mismatch: int = -1, - gap_open: int = -1, - gap_exted: int = 0, - substitution_matrix: str = "None", + match: float = 1.0, + mismatch: float = -1.0, + gap_open: float = -10.0, + gap_extend: float = -0.5, + node_type: str = "Protein", ) -> None: + """Initialize the PairwiseAligner. + + Args: + mode (str): Alignment mode ('global' or 'local'). Defaults to 'global'. + match (float): Match score for DNA. Defaults to 1.0. + mismatch (float): Mismatch penalty for DNA. Defaults to -1.0. + gap_open (float): Gap opening penalty. Defaults to -10.0. + gap_extend (float): Gap extension penalty. Defaults to -0.5. + sequence_type (str): Type of sequence ('protein' or 'dna'). Defaults to 'protein'. + node_type (str): Type of node in database. Defaults to 'Protein'. + """ + self.node_type = node_type + self.mode = mode - self.match = match - self.mismatch = mismatch - self.gap_open = gap_open - self.gap_extend = gap_exted - self.substitution_matrix = substitution_matrix + + # Set parameters based on sequence type + if self.node_type == "DNA": + # DNA-specific parameters + self.match = 1.0 + self.mismatch = -1.0 + self.gap_open = -5.0 + self.gap_extend = -2.0 + self.substitution_matrix = "None" + else: # protein + # Protein-specific parameters with BLOSUM62 + self.match = None # Not used when using substitution matrix + self.mismatch = None # Not used when using substitution matrix + self.gap_open = -10.0 + self.gap_extend = -0.5 + self.substitution_matrix = "BLOSUM62" def _align( self, @@ -128,14 +151,6 @@ def align_multipairwise( if ids is not None and db is not None: sequences = self._get_id_sequence_dict(db, ids, node_type, region_ids_neo4j) - logger.info( - f"Length of sequences: {len(sequences)} and length of pairs: {len(pairs)} and length of ids: {len(ids)} and the length of the region_ids_neo4j: {len(region_ids_neo4j)}" - ) - logger.info(f"IDS: {ids}") - logger.info(f"Region IDs: {region_ids_neo4j}") - logger.info(f"Pairs: {pairs}") - logger.info(f"Sequences: {sequences.keys()}") - if not sequences: raise ValueError( "Either sequences or ids (with a database connection) must be provided." @@ -234,14 +249,19 @@ def _get_aligner(self) -> BioPairwiseAligner: from the class instance.""" aligner = BioPairwiseAligner() # type: ignore aligner.mode = self.mode - aligner.match_score = self.match - aligner.mismatch_score = self.mismatch + + if self.node_type == "DNA": + aligner.match_score = self.match + aligner.mismatch_score = self.mismatch + else: # protein + # Load BLOSUM62 matrix for proteins + from Bio.Align import substitution_matrices + + aligner.substitution_matrix = substitution_matrices.load("BLOSUM62") + aligner.open_gap_score = self.gap_open aligner.extend_gap_score = self.gap_extend - if self.substitution_matrix != "None": - aligner.substitution_matrix = self._load_substitution_matrix() - return aligner def _map_alignment_results( diff --git a/src/pyeed/analysis/standard_numbering.py b/src/pyeed/analysis/standard_numbering.py index 88450be6..fef383e7 100644 --- a/src/pyeed/analysis/standard_numbering.py +++ b/src/pyeed/analysis/standard_numbering.py @@ -402,11 +402,10 @@ def apply_standard_numbering_pairwise( if node_type == "DNA" and region_ids_neo4j is not None: query = """ MATCH (s:StandardNumbering {name: $name}) - MATCH (r:Region) + MATCH (d:DNA)-[e:HAS_REGION]-(r:Region)-[:HAS_STANDARD_NUMBERING]-(s) WHERE id(r) IN $region_ids_neo4j - MATCH (r:Region)<-[:HAS_STANDARD_NUMBERING]-(s) - WHERE r.accession_id IN $list_of_seq_ids - RETURN r.accession_id AS accession_id + AND d.accession_id IN $list_of_seq_ids + RETURN d.accession_id AS accession_id """ results = db.execute_read( @@ -443,8 +442,7 @@ def apply_standard_numbering_pairwise( logger.info(f"Pairs: {pairs}") # Run the pairwise alignment using the PairwiseAligner. - pairwise_aligner = PairwiseAligner() - + pairwise_aligner = PairwiseAligner(node_type=node_type) input = (list_of_seq_ids or []) + [base_sequence_id] if not input: raise ValueError("No input sequences provided") diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py index fc1cb79a..bc92eb3c 100644 --- a/src/pyeed/embedding.py +++ b/src/pyeed/embedding.py @@ -78,9 +78,9 @@ def load_model_and_tokenizer( model = model.to(device) # Check if multiple GPUs are available and wrap the model accordingly - if torch.cuda.device_count() > 1 and device.type == "cuda": - logger.info(f"Using {torch.cuda.device_count()} GPUs for parallel inference.") - model = torch.nn.DataParallel(model) + # if torch.cuda.device_count() > 1 and device.type == "cuda": + # logger.info(f"Using {torch.cuda.device_count()} GPUs for parallel inference.") + # model = torch.nn.DataParallel(model) return model, tokenizer, device @@ -113,17 +113,17 @@ def get_batch_embeddings( List of embeddings as NumPy arrays. """ # First, determine the base model type - base_model = model.module if isinstance(model, torch.nn.DataParallel) else model + # base_model = model.module if isinstance(model, torch.nn.DataParallel) else model - if isinstance(base_model, ESMC): + if isinstance(model, ESMC): # For ESMC models embedding_list = [] with torch.no_grad(): for sequence in batch_sequences: protein = ESMProtein(sequence=sequence) # Use the model directly - DataParallel handles internal distribution - protein_tensor = base_model.encode(protein) - logits_output = base_model.logits( + protein_tensor = model.encode(protein) + logits_output = model.logits( protein_tensor, LogitsConfig(sequence=True, return_embeddings=True) ) if logits_output.embeddings is None: @@ -135,14 +135,14 @@ def get_batch_embeddings( embeddings = embeddings.mean(axis=1) embedding_list.append(embeddings[0]) return embedding_list - elif isinstance(base_model, ESM3): + elif isinstance(model, ESM3): # For ESM3 models embedding_list = [] with torch.no_grad(): for sequence in batch_sequences: protein = ESMProtein(sequence=sequence) - sequence_encoding = base_model.encode(protein) - result = base_model.forward_and_sample( + sequence_encoding = model.encode(protein) + result = model.forward_and_sample( sequence_encoding, SamplingConfig(return_per_residue_embeddings=True), ) diff --git a/src/pyeed/main.py b/src/pyeed/main.py index 18c83fc6..40f570cd 100644 --- a/src/pyeed/main.py +++ b/src/pyeed/main.py @@ -458,17 +458,27 @@ def create_coding_sequences_regions(self) -> None: CREATE (r:Region {annotation: 'coding sequence', sequence_id: p.accession_id}) CREATE (d)-[rel:HAS_REGION { start: CASE - WHEN rel_encode.start IS NOT NULL THEN rel_encode.start + WHEN rel_encode.start IS NOT NULL THEN rel_encode.start - 1 ELSE 0 END, end: CASE - WHEN rel_encode.end IS NOT NULL THEN rel_encode.end + WHEN rel_encode.end IS NOT NULL THEN rel_encode.end - 1 ELSE size(d.sequence) - 1 END }]->(r) """ self.db.execute_write(query) + # for dna where ther is no protein encoded and no Region with coding sequence annotation, create a Region with the entire sequence length + # make the start at 0 and the end at the sequence length minus 1 + query = """ + MATCH (d:DNA) + WHERE NOT EXISTS((d)-[:HAS_REGION]->(:Region {annotation: 'coding sequence', sequence_id: d.accession_id})) + CREATE (r:Region {annotation: 'coding sequence', sequence_id: d.accession_id}) + CREATE (d)-[:HAS_REGION {start: 0, end: size(d.sequence) - 1}]->(r) + """ + self.db.execute_write(query) + # Log the number of regions created count_query = """ MATCH (d:DNA)-[:HAS_REGION]->(r:Region {annotation: 'coding sequence'}) From 1470beacbec943e5a0e32f635122153f7b1b5fa6 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Thu, 8 May 2025 11:21:58 +0000 Subject: [PATCH 6/9] okay passed all ruff stuff --- src/pyeed/adapter/uniprot_mapper.py | 15 ++++---- src/pyeed/analysis/embedding_analysis.py | 3 +- src/pyeed/analysis/mutation_detection.py | 1 - src/pyeed/analysis/sequence_alignment.py | 3 +- src/pyeed/analysis/standard_numbering.py | 1 - src/pyeed/embedding.py | 45 +++++++++++------------- 6 files changed, 31 insertions(+), 37 deletions(-) diff --git a/src/pyeed/adapter/uniprot_mapper.py b/src/pyeed/adapter/uniprot_mapper.py index 7964d6da..23d8533a 100644 --- a/src/pyeed/adapter/uniprot_mapper.py +++ b/src/pyeed/adapter/uniprot_mapper.py @@ -15,8 +15,8 @@ Molecule, Organism, Protein, - Region, Reaction, + Region, Site, ) @@ -158,17 +158,18 @@ def add_catalytic_activity(self, record: dict[str, Any], protein: Protein) -> No try: for reference in record["comments"]: if reference["type"] == "CATALYTIC_ACTIVITY": - catalytic_annotation = CatalyticActivity.get_or_save( - catalytic_id=int(reference["id"]) - if reference.get("id") - else None, - name=reference["reaction"]["name"], + catalytic_annotation = Reaction.get_or_save( + rhea_id=str(reference["id"]) if reference.get("id") else None, + # Optionally, you can add name=reference["reaction"]["name"] if Reaction supports it ) - protein.catalytic_annotation.connect(catalytic_annotation) + # If protein has a reaction relationship, connect it + if hasattr(protein, "reaction"): + protein.reaction.connect(catalytic_annotation) except Exception as e: logger.error( f"Error saving catalytic activity for {protein.accession_id}: {e}" + ) def get_substrates_and_products_from_rhea( self, rhea_id: str diff --git a/src/pyeed/analysis/embedding_analysis.py b/src/pyeed/analysis/embedding_analysis.py index d8d104c0..73aaefb3 100644 --- a/src/pyeed/analysis/embedding_analysis.py +++ b/src/pyeed/analysis/embedding_analysis.py @@ -6,9 +6,8 @@ import scipy.spatial as sp from matplotlib.figure import Figure from numpy.typing import NDArray -from scipy.spatial.distance import cosine - from pyeed.dbconnect import DatabaseConnector +from scipy.spatial.distance import cosine logger = logging.getLogger(__name__) diff --git a/src/pyeed/analysis/mutation_detection.py b/src/pyeed/analysis/mutation_detection.py index 74c5e0fb..16829cbb 100644 --- a/src/pyeed/analysis/mutation_detection.py +++ b/src/pyeed/analysis/mutation_detection.py @@ -1,7 +1,6 @@ from typing import Any, Optional from loguru import logger - from pyeed.dbconnect import DatabaseConnector diff --git a/src/pyeed/analysis/sequence_alignment.py b/src/pyeed/analysis/sequence_alignment.py index a19e0cee..b0e94ed1 100644 --- a/src/pyeed/analysis/sequence_alignment.py +++ b/src/pyeed/analysis/sequence_alignment.py @@ -5,10 +5,9 @@ from Bio.Align import PairwiseAligner as BioPairwiseAligner from Bio.Align.substitution_matrices import Array as BioSubstitutionMatrix from joblib import Parallel, cpu_count, delayed -from rich.progress import Progress - from pyeed.dbconnect import DatabaseConnector from pyeed.tools.utility import chunks +from rich.progress import Progress class PairwiseAligner: diff --git a/src/pyeed/analysis/standard_numbering.py b/src/pyeed/analysis/standard_numbering.py index fef383e7..4bf9a8e8 100644 --- a/src/pyeed/analysis/standard_numbering.py +++ b/src/pyeed/analysis/standard_numbering.py @@ -13,7 +13,6 @@ from typing import Any, Dict, List, Optional, Tuple from loguru import logger - from pyeed.analysis.sequence_alignment import PairwiseAligner from pyeed.dbconnect import DatabaseConnector from pyeed.model import StandardNumbering diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py index 5b8a7b92..ad73ada3 100644 --- a/src/pyeed/embedding.py +++ b/src/pyeed/embedding.py @@ -1,22 +1,23 @@ import gc -import logging import os -from typing import Any, Tuple, Union +from typing import TYPE_CHECKING, Any, Tuple, Union import numpy as np import torch from esm.models.esm3 import ESM3 from esm.models.esmc import ESMC -from esm.sdk.api import ESM3InferenceClient, ESMProtein, LogitsConfig, SamplingConfig +from esm.sdk.api import ESMProtein, LogitsConfig, SamplingConfig from huggingface_hub import HfFolder, login from loguru import logger from numpy.typing import NDArray -from torch.nn import DataParallel, Module from transformers import EsmModel, EsmTokenizer from pyeed.dbconnect import DatabaseConnector -logger = logging.getLogger(__name__) +if TYPE_CHECKING: + from esm.models.esm3 import ESM3 + from esm.models.esmc import ESMC + from transformers import EsmModel def get_hf_token() -> str: @@ -39,7 +40,7 @@ def get_hf_token() -> str: def process_batches_on_gpu( data: list[tuple[str, str]], batch_size: int, - model: Module, + model: Union["EsmModel", "ESMC", torch.nn.DataParallel, "ESM3"], tokenizer: EsmTokenizer, db: DatabaseConnector, device: torch.device, @@ -57,8 +58,9 @@ def process_batches_on_gpu( """ logger.debug(f"Processing {len(data)} sequences on {device}.") - model = model.to(device) - + # Only call .to(device) if the model is ESMC or DataParallel + if isinstance(model, (ESMC, torch.nn.DataParallel)): + model = model.to(device) # Split data into smaller batches for batch_start in range(0, len(data), batch_size): batch_end = min(batch_start + batch_size, len(data)) @@ -120,10 +122,8 @@ def load_model_and_tokenizer( model: Any = ESMC.from_pretrained(model_name) model = model.to(device) elif "esm3-sm-open-v1" in model_name.lower(): - model: Any = ESM3.from_pretrained("esm3_sm_open_v1").to(device) - - tokenizer = None - + model: Any = ESM3.from_pretrained("esm3_sm_open_v1") + model = model.to(device) else: full_model_name = ( model_name @@ -147,7 +147,6 @@ def get_batch_embeddings( EsmModel, ESMC, torch.nn.DataParallel, - ESM3InferenceClient, ESM3, ], tokenizer_or_alphabet: Union[EsmTokenizer, None], @@ -269,12 +268,12 @@ def calculate_single_sequence_embedding_all_layers( def calculate_single_sequence_embedding_first_layer( - sequence: str, model_name: str = "facebook/esm2_t33_650M_UR50D" + sequence: str, device: torch.device, model_name: str = "facebook/esm2_t33_650M_UR50D" ) -> NDArray[np.float64]: """ Calculates an embedding for a single sequence using the first layer. """ - model, tokenizer, device = load_model_and_tokenizer(model_name) + model, tokenizer, device = load_model_and_tokenizer(model_name, device) return get_single_embedding_first_layer(sequence, model, tokenizer, device) @@ -284,7 +283,6 @@ def get_single_embedding_first_layer( """ Generates normalized embeddings for each token in the sequence across all layers. """ - embeddings_list = [] with torch.no_grad(): if isinstance(model, ESMC): @@ -419,7 +417,6 @@ def get_single_embedding_all_layers( NDArray[np.float64]: A numpy array containing the normalized token embeddings concatenated across all layers. """ - embeddings_list = [] with torch.no_grad(): if isinstance(model, ESMC): # For ESM-3: Use ESMProtein and request hidden states via LogitsConfig @@ -440,12 +437,14 @@ def get_single_embedding_all_layers( ) # logits_output.hidden_states should be a tuple of tensors: (layer, batch, seq_len, hidden_dim) + embeddings_list = [] for layer_tensor in logits_output.hidden_states: # Remove batch dimension and (if applicable) any special tokens emb = layer_tensor[0].to(torch.float32).cpu().numpy() # If your model adds special tokens, adjust the slicing (e.g., emb[1:-1]) emb = emb / np.linalg.norm(emb, axis=1, keepdims=True) embeddings_list.append(emb) + return np.array(embeddings_list) elif isinstance(model, ESM3): raise NotImplementedError("ESM3 is not supported for all layers") @@ -457,13 +456,11 @@ def get_single_embedding_all_layers( hidden_states = ( outputs.hidden_states ) # Tuple: (layer0, layer1, ..., layerN) - for layer_tensor in hidden_states: - # Remove batch dimension and special tokens ([CLS] and [SEP]) - emb = layer_tensor[0, 1:-1, :].detach().cpu().numpy() - emb = emb / np.linalg.norm(emb, axis=1, keepdims=True) - embeddings_list.append(emb) - - return np.array(embeddings_list) + # Remove the unused variable 'embeddings_list' and directly return the result + return np.array([ + layer_tensor[0, 1:-1, :].detach().cpu().numpy() / np.linalg.norm(layer_tensor[0, 1:-1, :].detach().cpu().numpy(), axis=1, keepdims=True) + for layer_tensor in hidden_states + ]) # The rest of your existing functions will need to be adapted in a similar way From 82b51216a808804efa4fd8d08c3b53245356bdc0 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Thu, 8 May 2025 11:41:36 +0000 Subject: [PATCH 7/9] fixed ruff format --- src/pyeed/embedding.py | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py index ad73ada3..f20e4e17 100644 --- a/src/pyeed/embedding.py +++ b/src/pyeed/embedding.py @@ -190,7 +190,7 @@ def get_batch_embeddings( embeddings = embeddings.mean(axis=1) embedding_list.append(embeddings[0]) return embedding_list - + elif isinstance(base_model, ESM3): # For ESM3 models embedding_list = [] @@ -268,7 +268,9 @@ def calculate_single_sequence_embedding_all_layers( def calculate_single_sequence_embedding_first_layer( - sequence: str, device: torch.device, model_name: str = "facebook/esm2_t33_650M_UR50D" + sequence: str, + device: torch.device, + model_name: str = "facebook/esm2_t33_650M_UR50D", ) -> NDArray[np.float64]: """ Calculates an embedding for a single sequence using the first layer. @@ -457,10 +459,17 @@ def get_single_embedding_all_layers( outputs.hidden_states ) # Tuple: (layer0, layer1, ..., layerN) # Remove the unused variable 'embeddings_list' and directly return the result - return np.array([ - layer_tensor[0, 1:-1, :].detach().cpu().numpy() / np.linalg.norm(layer_tensor[0, 1:-1, :].detach().cpu().numpy(), axis=1, keepdims=True) - for layer_tensor in hidden_states - ]) + return np.array( + [ + layer_tensor[0, 1:-1, :].detach().cpu().numpy() + / np.linalg.norm( + layer_tensor[0, 1:-1, :].detach().cpu().numpy(), + axis=1, + keepdims=True, + ) + for layer_tensor in hidden_states + ] + ) # The rest of your existing functions will need to be adapted in a similar way From daead6de3113d8f508a7f0e803a621fbe5e74dd3 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Thu, 8 May 2025 11:58:01 +0000 Subject: [PATCH 8/9] fixes mypy --- src/pyeed/analysis/mutation_detection.py | 1745 +--------------------- src/pyeed/analysis/sequence_alignment.py | 68 +- src/pyeed/embedding.py | 154 +- 3 files changed, 48 insertions(+), 1919 deletions(-) diff --git a/src/pyeed/analysis/mutation_detection.py b/src/pyeed/analysis/mutation_detection.py index 16829cbb..5fdc9dd7 100644 --- a/src/pyeed/analysis/mutation_detection.py +++ b/src/pyeed/analysis/mutation_detection.py @@ -292,1747 +292,4 @@ def get_mutations_between_sequences( ) return mutations - - -if __name__ == "__main__": - # debugging - - seq_a = "TGAGTATTCAACATTTTCGTGTCGCCCTTATTCCCTTTTTTGCGGCATTTTGCCTTCCTGTTTTTGCTCACCCAGAAACGCTGGTGAAAGTAAAAGATGCTGAAGATCAGTTGGGTGCACGAGTGGGTTACATCGAACTGGATCTCAACAGCGGTAAGATCCTTGAGAGTTTTCGCCCCGAAGAACGTTTTCCAATGATGAGCACTTTTAAAGTTCTGCTATGTGGTGCGGTATTATCCCGTGTTGACGCCGGGCAAGAGCAACTCGGTCGCCGCATACACTATTCTCAGAATGACTTGGTTGAGTACTCACCAGTCACAGAAAAGCATCTTACGGATGGCATGACAGTAAGAGAATTATGCAGTGCTGCCATAACCATGAGTGATAACACTGCGGCCAACTTACTTCTGACAACGATCGGAGGACCGAAGGAGCTAACCGCTTTTTTGCACAACATGGGGGATCATGTAACTCGCCTTGATCGTTGGGAACCGGAGCTGAATGAAGCCATACCAAACGACGAGCGTGACACCACGATGCCTGCAGCAATGGCAACAACGTTGCGCAAACTATTAACTGGCGAACTACTTACTCTAGCTTCCCGGCAACAATTAATAGACTGGATGGAGGCGGATAAAGTTGCAGGACCACTTCTGCGCTCGGCCCTTCCGGCTGGCTGGTTTATTGCTGATAAATCTGGAGCCGGTGAGCGTGGGTCTCGCGGTATCATTGCAGCACTGGGGCCAGATGGTAAGCCCTCCCGTATCGTAGTTATCTACACGACGGGGAGTCAGGCAACTATGGATGAACGAAATAGACAGATCGCTGAGATAGGTGCCTCACTGATTAAGCATTGGTAA" - seq_b = "CACAGATAAAACACTCTCCAGGAAACCCGGGGCGGTTCAATCTGTCTATTTCGTTCATCCATAGTTGCCTGACTCCCCGTCGTGTAGATAACTACGATACGGGAGGGCTTACCATCTGGCCCCAGTGCTGCAATGATACCGCGAGACCCACGCTCACCGGCTCCAGATTTATCAGCAATAAACCAGCCAGCCGGAAGGGCCGAGCGCAGAAGTGGTCCTGCAACTTTATCCGCCTCCATCCAGTCTATTAATTGTTGCCGGGAAGCTAGAGTAAGTAGTTCGCCAGTTAATAGTTTGCGCAACGTTGTTGCCATTGCTGCAGGCATCGTGGTGTCACGCTCGTCGTTTGGTATGGCTTCATTCAGCTCCGGTTCCCAACGATCAAGGCGAGTTACATGATCCCCCATGTTGTGCAAAAAAGCGGTTAGCTCCTTCGGTCCTCCGATCGTTGTCAGAAGTAAGTTGGCAGCAGTGTTATCACTCATGGTTATGGCAGCACTGCATAATTCTCTTACTGTCATGCCATCCGTAAGATGCTTTTCTGTGACTGGTGAGTACTCAACCAAGTCATTCTGAGAATAGTGTATGCGGCGACCGAGTTGCTCTTGCCCGGCGTCAACACGGGATAATACCGCACCACATAGCAGAACTTTAAAAGTGCTCATCATTGGAAAACGTTCTTCGGGGCGAAAACTCTCAAGGATCTTACCGCTGTTGAGATCCAGTTCGATGTAACCCACTCGTGCACCCAACTGATCTTCAGCATCTTTTACTTTCACCAGCGTTTCTGGGTGAGCAAAAACAGGAAGGCAAAATGCCGCAAAAAAGGGAATAAGGGCGACACGAAAATGTTGAATACTCAT" - pos_a = [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30, - 31, - 32, - 33, - 34, - 35, - 36, - 37, - 38, - 39, - 40, - 41, - 42, - 43, - 44, - 45, - 46, - 47, - 48, - 49, - 50, - 51, - 52, - 53, - 54, - 55, - 56, - 57, - 58, - 59, - 60, - 61, - 62, - 63, - 64, - 65, - 66, - 67, - 68, - 69, - 70, - 71, - 72, - 73, - 74, - 75, - 76, - 77, - 78, - 79, - 80, - 81, - 82, - 83, - 84, - 85, - 86, - 87, - 88, - 89, - 90, - 91, - 92, - 93, - 94, - 95, - 96, - 97, - 98, - 99, - 100, - 101, - 102, - 103, - 104, - 105, - 106, - 107, - 108, - 109, - 110, - 111, - 112, - 113, - 114, - 115, - 116, - 117, - 118, - 119, - 120, - 121, - 122, - 123, - 124, - 125, - 126, - 127, - 128, - 129, - 130, - 131, - 132, - 133, - 134, - 135, - 136, - 137, - 138, - 139, - 140, - 141, - 142, - 143, - 144, - 145, - 146, - 147, - 148, - 149, - 150, - 151, - 152, - 153, - 154, - 155, - 156, - 157, - 158, - 159, - 160, - 161, - 162, - 163, - 164, - 165, - 166, - 167, - 168, - 169, - 170, - 171, - 172, - 173, - 174, - 175, - 176, - 177, - 178, - 179, - 180, - 181, - 182, - 183, - 184, - 185, - 186, - 187, - 188, - 189, - 190, - 191, - 192, - 193, - 194, - 195, - 196, - 197, - 198, - 199, - 200, - 201, - 202, - 203, - 204, - 205, - 206, - 207, - 208, - 209, - 210, - 211, - 212, - 213, - 214, - 215, - 216, - 217, - 218, - 219, - 220, - 221, - 222, - 223, - 224, - 225, - 226, - 227, - 228, - 229, - 230, - 231, - 232, - 233, - 234, - 235, - 236, - 237, - 238, - 239, - 240, - 241, - 242, - 243, - 244, - 245, - 246, - 247, - 248, - 249, - 250, - 251, - 252, - 253, - 254, - 255, - 256, - 257, - 258, - 259, - 260, - 261, - 262, - 263, - 264, - 265, - 266, - 267, - 268, - 269, - 270, - 271, - 272, - 273, - 274, - 275, - 276, - 277, - 278, - 279, - 280, - 281, - 282, - 283, - 284, - 285, - 286, - 287, - 288, - 289, - 290, - 291, - 292, - 293, - 294, - 295, - 296, - 297, - 298, - 299, - 300, - 301, - 302, - 303, - 304, - 305, - 306, - 307, - 308, - 309, - 310, - 311, - 312, - 313, - 314, - 315, - 316, - 317, - 318, - 319, - 320, - 321, - 322, - 323, - 324, - 325, - 326, - 327, - 328, - 329, - 330, - 331, - 332, - 333, - 334, - 335, - 336, - 337, - 338, - 339, - 340, - 341, - 342, - 343, - 344, - 345, - 346, - 347, - 348, - 349, - 350, - 351, - 352, - 353, - 354, - 355, - 356, - 357, - 358, - 359, - 360, - 361, - 362, - 363, - 364, - 365, - 366, - 367, - 368, - 369, - 370, - 371, - 372, - 373, - 374, - 375, - 376, - 377, - 378, - 379, - 380, - 381, - 382, - 383, - 384, - 385, - 386, - 387, - 388, - 389, - 390, - 391, - 392, - 393, - 394, - 395, - 396, - 397, - 398, - 399, - 400, - 401, - 402, - 403, - 404, - 405, - 406, - 407, - 408, - 409, - 410, - 411, - 412, - 413, - 414, - 415, - 416, - 417, - 418, - 419, - 420, - 421, - 422, - 423, - 424, - 425, - 426, - 427, - 428, - 429, - 430, - 431, - 432, - 433, - 434, - 435, - 436, - 437, - 438, - 439, - 440, - 441, - 442, - 443, - 444, - 445, - 446, - 447, - 448, - 449, - 450, - 451, - 452, - 453, - 454, - 455, - 456, - 457, - 458, - 459, - 460, - 461, - 462, - 463, - 464, - 465, - 466, - 467, - 468, - 469, - 470, - 471, - 472, - 473, - 474, - 475, - 476, - 477, - 478, - 479, - 480, - 481, - 482, - 483, - 484, - 485, - 486, - 487, - 488, - 489, - 490, - 491, - 492, - 493, - 494, - 495, - 496, - 497, - 498, - 499, - 500, - 501, - 502, - 503, - 504, - 505, - 506, - 507, - 508, - 509, - 510, - 511, - 512, - 513, - 514, - 515, - 516, - 517, - 518, - 519, - 520, - 521, - 522, - 523, - 524, - 525, - 526, - 527, - 528, - 529, - 530, - 531, - 532, - 533, - 534, - 535, - 536, - 537, - 538, - 539, - 540, - 541, - 542, - 543, - 544, - 545, - 546, - 547, - 548, - 549, - 550, - 551, - 552, - 553, - 554, - 555, - 556, - 557, - 558, - 559, - 560, - 561, - 562, - 563, - 564, - 565, - 566, - 567, - 568, - 569, - 570, - 571, - 572, - 573, - 574, - 575, - 576, - 577, - 578, - 579, - 580, - 581, - 582, - 583, - 584, - 585, - 586, - 587, - 588, - 589, - 590, - 591, - 592, - 593, - 594, - 595, - 596, - 597, - 598, - 599, - 600, - 601, - 602, - 603, - 604, - 605, - 606, - 607, - 608, - 609, - 610, - 611, - 612, - 613, - 614, - 615, - 616, - 617, - 618, - 619, - 620, - 621, - 622, - 623, - 624, - 625, - 626, - 627, - 628, - 629, - 630, - 631, - 632, - 633, - 634, - 635, - 636, - 637, - 638, - 639, - 640, - 641, - 642, - 643, - 644, - 645, - 646, - 647, - 648, - 649, - 650, - 651, - 652, - 653, - 654, - 655, - 656, - 657, - 658, - 659, - 660, - 661, - 662, - 663, - 664, - 665, - 666, - 667, - 668, - 669, - 670, - 671, - 672, - 673, - 674, - 675, - 676, - 677, - 678, - 679, - 680, - 681, - 682, - 683, - 684, - 685, - 686, - 687, - 688, - 689, - 690, - 691, - 692, - 693, - 694, - 695, - 696, - 697, - 698, - 699, - 700, - 701, - 702, - 703, - 704, - 705, - 706, - 707, - 708, - 709, - 710, - 711, - 712, - 713, - 714, - 715, - 716, - 717, - 718, - 719, - 720, - 721, - 722, - 723, - 724, - 725, - 726, - 727, - 728, - 729, - 730, - 731, - 732, - 733, - 734, - 735, - 736, - 737, - 738, - 739, - 740, - 741, - 742, - 743, - 744, - 745, - 746, - 747, - 748, - 749, - 750, - 751, - 752, - 753, - 754, - 755, - 756, - 757, - 758, - 759, - 760, - 761, - 762, - 763, - 764, - 765, - 766, - 767, - 768, - 769, - 770, - 771, - 772, - 773, - 774, - 775, - 776, - 777, - 778, - 779, - 780, - 781, - 782, - 783, - 784, - 785, - 786, - 787, - 788, - 789, - 790, - 791, - 792, - 793, - 794, - 795, - 796, - 797, - 798, - 799, - 800, - 801, - 802, - 803, - 804, - 805, - 806, - 807, - 808, - 809, - 810, - 811, - 812, - 813, - 814, - 815, - 816, - 817, - 818, - 819, - 820, - 821, - 822, - 823, - 824, - 825, - 826, - 827, - 828, - 829, - 830, - 831, - 832, - 833, - 834, - 835, - 836, - 837, - 838, - 839, - 840, - 841, - 842, - 843, - 844, - 845, - 846, - 847, - 848, - 849, - 850, - 851, - 852, - 853, - 854, - 855, - 856, - 857, - 858, - 859, - 860, - ] - pos_b = [ - 1, - 2, - 3, - 4, - 5, - 6, - 7, - 8, - 9, - 10, - 11, - 12, - 13, - 14, - 15, - 16, - 17, - 18, - 19, - 20, - 21, - 22, - 23, - 24, - 25, - 26, - 27, - 28, - 29, - 30, - 31, - 32, - 33, - 34, - 35, - 36, - 37, - 38, - 39, - 40, - 41, - 42, - 43, - 44, - 45, - 46, - 46.1, - 47, - 48, - 49, - 50, - 51, - 52, - 52.1, - 53, - 54, - 55, - 56, - 57, - 58, - 59, - 60, - 61, - 62, - 63, - 64, - 65, - 66, - 67, - 68, - 69, - 70, - 71, - 72, - 73, - 74, - 75, - 76, - 77, - 78, - 79, - 80, - 81, - 82, - 83, - 84, - 85, - 86, - 87, - 88, - 89, - 90, - 91, - 92, - 93, - 94, - 95, - 96, - 97, - 98, - 99, - 100, - 101, - 102, - 103, - 104, - 105, - 106, - 107, - 108, - 109, - 110, - 111, - 112, - 113, - 114, - 115, - 116, - 117, - 118, - 119, - 120, - 121, - 122, - 123, - 124, - 125, - 126, - 127, - 128, - 129, - 130, - 131, - 132, - 133, - 134, - 135, - 136, - 137, - 138, - 139, - 140, - 141, - 142, - 146, - 147, - 148, - 149, - 150, - 151, - 152, - 153, - 154, - 155, - 156, - 157, - 158, - 159, - 160, - 161, - 162, - 163, - 164, - 165, - 166, - 167, - 168, - 169, - 170, - 171, - 172, - 173, - 174, - 175, - 176, - 177, - 178, - 179, - 180, - 181, - 182, - 183, - 184, - 185, - 186, - 187, - 188, - 189, - 190, - 191, - 192, - 193, - 193.1, - 193.2, - 193.3, - 194, - 195, - 196, - 197, - 198, - 199, - 200, - 201, - 202, - 203, - 204, - 205, - 206, - 207, - 208, - 209, - 210, - 211, - 212, - 213, - 214, - 215, - 216, - 217, - 218, - 219, - 220, - 221, - 222, - 223, - 224, - 225, - 226, - 227, - 228, - 229, - 230, - 231, - 232, - 233, - 234, - 235, - 236, - 237, - 238, - 239, - 240, - 241, - 242, - 242.1, - 242.2, - 242.3, - 242.4, - 242.5, - 242.6, - 243, - 244, - 245, - 246, - 247, - 248, - 249, - 250, - 251, - 252, - 253, - 253.1, - 253.2, - 253.3, - 254, - 255, - 256, - 257, - 258, - 259, - 260, - 261, - 262, - 263, - 264, - 265, - 266, - 267, - 268, - 269, - 270, - 271, - 272, - 275, - 276, - 277, - 278, - 279, - 280, - 281, - 282, - 283, - 284, - 285, - 286, - 287, - 288, - 289, - 290, - 291, - 292, - 295, - 296, - 297, - 298, - 299, - 300, - 301, - 302, - 303, - 304, - 305, - 306, - 307, - 308, - 309, - 310, - 311, - 312, - 313, - 314, - 315, - 316, - 317, - 318, - 319, - 320, - 321, - 324, - 325, - 326, - 327, - 328, - 329, - 330, - 331, - 332, - 333, - 334, - 335, - 336, - 337, - 338, - 339, - 340, - 341, - 342, - 343, - 344, - 345, - 346, - 347, - 349, - 350, - 351, - 352, - 353, - 354, - 355, - 356, - 357, - 358, - 359, - 360, - 361, - 362, - 363, - 364, - 365, - 366, - 367, - 368, - 369, - 370, - 371, - 373, - 374, - 375, - 376, - 377, - 378, - 379, - 380, - 381, - 382, - 383, - 384, - 385, - 386, - 387, - 388, - 389, - 390, - 391, - 392, - 393, - 394, - 395, - 396, - 397, - 398, - 399, - 400, - 401, - 402, - 403, - 404, - 405, - 406, - 407, - 408, - 409, - 410, - 411, - 412, - 413, - 414, - 415, - 416, - 417, - 418, - 419, - 420, - 421, - 421.1, - 421.2, - 422, - 423, - 424, - 425, - 426, - 427, - 428, - 429, - 430, - 431, - 432, - 433, - 434, - 435, - 436, - 437, - 438, - 439, - 440, - 441, - 442, - 443, - 444, - 445, - 446, - 447, - 448, - 449, - 450, - 451, - 452, - 453, - 454, - 455, - 456, - 457, - 458, - 459, - 460, - 461, - 462, - 463, - 464, - 465, - 466, - 466.1, - 467, - 468, - 468.1, - 469, - 470, - 471, - 472, - 473, - 474, - 475, - 476, - 477, - 478, - 479, - 480, - 481, - 482, - 483, - 484, - 485, - 486, - 487, - 488, - 489, - 490, - 491, - 492, - 493, - 494, - 495, - 496, - 497, - 498, - 499, - 500, - 501, - 502, - 503, - 504, - 505, - 506, - 507, - 508, - 509, - 510, - 511, - 512, - 513, - 514, - 515, - 516, - 517, - 518, - 519, - 520, - 521, - 522, - 523, - 524, - 525, - 526, - 527, - 528, - 529, - 530, - 531, - 532, - 533, - 534, - 535, - 536, - 537, - 538, - 539, - 540, - 541, - 542, - 543, - 544, - 545, - 546, - 547, - 548, - 549, - 550, - 551, - 552, - 553, - 554, - 555, - 556, - 557, - 558, - 559, - 560, - 561, - 561.1, - 561.2, - 562, - 563, - 564, - 565, - 566, - 567, - 568, - 569, - 570, - 571, - 572, - 572.1, - 572.2, - 573, - 574, - 575, - 576, - 577, - 578, - 579, - 580, - 581, - 582, - 583, - 584, - 585, - 586, - 587, - 588, - 589, - 590, - 591, - 592, - 593, - 594, - 595, - 596, - 597, - 598, - 599, - 600, - 601, - 602, - 603, - 604, - 605, - 606, - 607, - 608, - 609, - 610, - 611, - 612, - 613, - 614, - 615, - 616, - 616.1, - 617, - 618, - 619, - 620, - 621, - 622, - 623, - 624, - 625, - 626, - 627, - 628, - 629, - 630, - 631, - 632, - 633, - 634, - 635, - 636, - 637, - 638, - 639, - 640, - 641, - 642, - 643, - 644, - 645, - 646, - 647, - 648, - 649, - 650, - 651, - 652, - 653, - 654, - 655, - 656, - 657, - 658, - 660, - 661, - 662, - 663, - 664, - 665, - 666, - 667, - 668, - 669, - 670, - 671, - 672, - 673, - 674, - 675, - 676, - 677, - 680, - 681, - 682, - 683, - 684, - 685, - 686, - 687, - 688, - 689, - 690, - 691, - 692, - 693, - 694, - 695, - 696, - 697, - 698, - 699, - 700, - 701, - 702, - 703, - 704, - 705, - 706, - 707, - 708, - 709, - 710, - 711, - 712, - 713, - 714, - 715, - 716, - 717, - 719, - 720, - 721, - 722, - 723, - 724, - 725, - 726, - 727, - 728, - 732, - 733, - 734, - 735, - 736, - 737, - 738, - 739, - 740, - 741, - 742, - 743, - 744, - 745, - 746, - 747, - 748, - 749, - 750, - 751, - 752, - 753, - 754, - 755, - 756, - 757, - 758, - 759, - 760, - 761, - 762, - 763, - 764, - 765, - 766, - 767, - 768, - 769, - 771, - 772, - 773, - 774, - 775, - 776, - 777, - 778, - 779, - 780, - 781, - 782, - 783, - 784, - 785, - 786, - 787, - 788, - 789, - 790, - 791, - 792, - 793, - 794, - 795, - 796, - 797, - 798, - 799, - 801, - 802, - 803, - 804, - 805, - 806, - 807, - 808, - 809, - 810, - 811, - 812, - 813, - 814, - 815, - 816, - 817, - 818, - 819, - 820, - 821, - 822, - 823, - 824, - 825, - 826, - 827, - 828, - 829, - 830, - 831, - 832, - 833, - 834, - 835, - 836, - 837, - 838, - 839, - 840, - 841, - 842, - 843, - 844, - 845, - 846, - 847, - 848, - 849, - 850, - 851, - 852, - 853, - 854, - 855, - 856, - 857, - 858, - 859, - 860, - ] - - pos_a = [str(i) for i in pos_a] - pos_b = [str(i) for i in pos_b] - - print(seq_a) - print(seq_b) - - mutation_detection = MutationDetection() - - print(mutation_detection.find_mutations(seq_a, seq_b, pos_a, pos_b)) + \ No newline at end of file diff --git a/src/pyeed/analysis/sequence_alignment.py b/src/pyeed/analysis/sequence_alignment.py index b0e94ed1..e6b9d565 100644 --- a/src/pyeed/analysis/sequence_alignment.py +++ b/src/pyeed/analysis/sequence_alignment.py @@ -19,43 +19,20 @@ class PairwiseAligner: def __init__( self, mode: str = "global", - match: float = 1.0, - mismatch: float = -1.0, - gap_open: float = -10.0, - gap_extend: float = -0.5, + match: int = 1, + mismatch: int = -1, + gap_open: int = -1, + gap_exted: int = 0, + substitution_matrix: str = "None", node_type: str = "Protein", ) -> None: - """Initialize the PairwiseAligner. - - Args: - mode (str): Alignment mode ('global' or 'local'). Defaults to 'global'. - match (float): Match score for DNA. Defaults to 1.0. - mismatch (float): Mismatch penalty for DNA. Defaults to -1.0. - gap_open (float): Gap opening penalty. Defaults to -10.0. - gap_extend (float): Gap extension penalty. Defaults to -0.5. - sequence_type (str): Type of sequence ('protein' or 'dna'). Defaults to 'protein'. - node_type (str): Type of node in database. Defaults to 'Protein'. - """ - self.node_type = node_type - self.mode = mode - - # Set parameters based on sequence type - if self.node_type == "DNA": - # DNA-specific parameters - self.match = 1.0 - self.mismatch = -1.0 - self.gap_open = -5.0 - self.gap_extend = -2.0 - self.substitution_matrix = "None" - else: # protein - # Protein-specific parameters with BLOSUM62 - self.match = None # Not used when using substitution matrix - self.mismatch = None # Not used when using substitution matrix - self.gap_open = -10.0 - self.gap_extend = -0.5 - self.substitution_matrix = "BLOSUM62" - + self.match = match + self.mismatch = mismatch + self.gap_open = gap_open + self.gap_extend = gap_exted + self.substitution_matrix = substitution_matrix + self.node_type = node_type def _align( self, seq1: Dict[str, str], @@ -270,19 +247,14 @@ def _get_aligner(self) -> BioPairwiseAligner: from the class instance.""" aligner = BioPairwiseAligner() # type: ignore aligner.mode = self.mode - - if self.node_type == "DNA": - aligner.match_score = self.match - aligner.mismatch_score = self.mismatch - else: # protein - # Load BLOSUM62 matrix for proteins - from Bio.Align import substitution_matrices - - aligner.substitution_matrix = substitution_matrices.load("BLOSUM62") - + aligner.match_score = self.match + aligner.mismatch_score = self.mismatch aligner.open_gap_score = self.gap_open aligner.extend_gap_score = self.gap_extend + if self.substitution_matrix != "None": + aligner.substitution_matrix = self._load_substitution_matrix() + return aligner def _map_alignment_results( @@ -341,11 +313,13 @@ def _get_id_sequence_dict( if region_ids_neo4j is not None: query = f""" MATCH (p:{node_type})-[e:HAS_REGION]->(r:Region) - WHERE id(r) IN {region_ids_neo4j} AND p.accession_id IN {ids} + WHERE id(r) IN $region_ids_neo4j AND p.accession_id IN $ids RETURN p.accession_id AS accession_id, e.start AS start, e.end AS end, p.sequence AS sequence """ - nodes = db.execute_read(query) - + nodes = db.execute_read( + query, + parameters={"region_ids_neo4j": region_ids_neo4j, "ids": ids}, + ) else: query = f""" MATCH (p:{node_type}) diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py index f20e4e17..5d1ff5a9 100644 --- a/src/pyeed/embedding.py +++ b/src/pyeed/embedding.py @@ -1,24 +1,20 @@ import gc import os -from typing import TYPE_CHECKING, Any, Tuple, Union +from typing import Any, Tuple, Union import numpy as np import torch from esm.models.esm3 import ESM3 from esm.models.esmc import ESMC -from esm.sdk.api import ESMProtein, LogitsConfig, SamplingConfig +from esm.sdk.api import ESM3InferenceClient, ESMProtein, LogitsConfig, SamplingConfig from huggingface_hub import HfFolder, login from loguru import logger from numpy.typing import NDArray +from torch.nn import DataParallel, Module from transformers import EsmModel, EsmTokenizer from pyeed.dbconnect import DatabaseConnector -if TYPE_CHECKING: - from esm.models.esm3 import ESM3 - from esm.models.esmc import ESMC - from transformers import EsmModel - def get_hf_token() -> str: """Get or request Hugging Face token.""" @@ -40,7 +36,7 @@ def get_hf_token() -> str: def process_batches_on_gpu( data: list[tuple[str, str]], batch_size: int, - model: Union["EsmModel", "ESMC", torch.nn.DataParallel, "ESM3"], + model: Module, tokenizer: EsmTokenizer, db: DatabaseConnector, device: torch.device, @@ -58,9 +54,8 @@ def process_batches_on_gpu( """ logger.debug(f"Processing {len(data)} sequences on {device}.") - # Only call .to(device) if the model is ESMC or DataParallel - if isinstance(model, (ESMC, torch.nn.DataParallel)): - model = model.to(device) + model = model.to(device) + # Split data into smaller batches for batch_start in range(0, len(data), batch_size): batch_end = min(batch_start + batch_size, len(data)) @@ -114,30 +109,22 @@ def load_model_and_tokenizer( Tuple: (model, tokenizer, device) """ token = get_hf_token() - # Default device is the first CUDA device if available, else CPU. - device = torch.device("cuda" if torch.cuda.is_available() else "cpu") tokenizer = None if "esmc" in model_name.lower(): - model: Any = ESMC.from_pretrained(model_name) - model = model.to(device) + model = ESMC.from_pretrained(model_name) elif "esm3-sm-open-v1" in model_name.lower(): - model: Any = ESM3.from_pretrained("esm3_sm_open_v1") - model = model.to(device) + model = ESM3.from_pretrained("esm3_sm_open_v1") else: full_model_name = ( model_name if model_name.startswith("facebook/") else f"facebook/{model_name}" ) - model: Any = EsmModel.from_pretrained(full_model_name, use_auth_token=token) + model = EsmModel.from_pretrained(full_model_name, use_auth_token=token) tokenizer = EsmTokenizer.from_pretrained(full_model_name, use_auth_token=token) - model = model.to(device) - # Check if multiple GPUs are available and wrap the model accordingly - # if torch.cuda.device_count() > 1 and device.type == "cuda": - # logger.info(f"Using {torch.cuda.device_count()} GPUs for parallel inference.") - # model = torch.nn.DataParallel(model) + model = model.to(device) return model, tokenizer, device @@ -146,7 +133,8 @@ def get_batch_embeddings( model: Union[ EsmModel, ESMC, - torch.nn.DataParallel, + DataParallel[Module], + ESM3InferenceClient, ESM3, ], tokenizer_or_alphabet: Union[EsmTokenizer, None], @@ -190,7 +178,6 @@ def get_batch_embeddings( embeddings = embeddings.mean(axis=1) embedding_list.append(embeddings[0]) return embedding_list - elif isinstance(base_model, ESM3): # For ESM3 models embedding_list = [] @@ -267,75 +254,6 @@ def calculate_single_sequence_embedding_all_layers( return get_single_embedding_all_layers(sequence, model, tokenizer, device) -def calculate_single_sequence_embedding_first_layer( - sequence: str, - device: torch.device, - model_name: str = "facebook/esm2_t33_650M_UR50D", -) -> NDArray[np.float64]: - """ - Calculates an embedding for a single sequence using the first layer. - """ - model, tokenizer, device = load_model_and_tokenizer(model_name, device) - return get_single_embedding_first_layer(sequence, model, tokenizer, device) - - -def get_single_embedding_first_layer( - sequence: str, model: Any, tokenizer: Any, device: torch.device -) -> NDArray[np.float64]: - """ - Generates normalized embeddings for each token in the sequence across all layers. - """ - - with torch.no_grad(): - if isinstance(model, ESMC): - # ESM-3 logic - from esm.sdk.api import ESMProtein, LogitsConfig - - protein = ESMProtein(sequence=sequence) - protein_tensor = model.encode(protein) - logits_output = model.logits( - protein_tensor, - LogitsConfig( - sequence=True, - return_embeddings=True, - return_hidden_states=True, - ), - ) - if logits_output.hidden_states is None: - raise ValueError( - "Model did not return hidden states. Check LogitsConfig settings." - ) - embedding = ( - logits_output.hidden_states[0][0].to(torch.float32).cpu().numpy() - ) - - elif isinstance(model, ESM3): - # ESM-3 logic - from esm.sdk.api import ESMProtein, SamplingConfig - - protein = ESMProtein(sequence=sequence) - protein_tensor = model.encode(protein) - embedding = model.forward_and_sample( - protein_tensor, - SamplingConfig(return_per_residue_embeddings=True), - ) - if embedding is None or embedding.per_residue_embedding is None: - raise ValueError("Model did not return embeddings") - embedding = embedding.per_residue_embedding.to(torch.float32).cpu().numpy() - - else: - # ESM-2 logic - inputs = tokenizer(sequence, return_tensors="pt").to(device) - outputs = model(**inputs, output_hidden_states=True) - # Get the first layer's hidden states for all residues (excluding special tokens) - embedding = outputs.hidden_states[0][0, 1:-1, :].detach().cpu().numpy() - - # Ensure embedding is a numpy array and normalize it - embedding = np.asarray(embedding, dtype=np.float64) - embedding = embedding / np.linalg.norm(embedding, axis=1, keepdims=True) - return embedding - - def get_single_embedding_last_hidden_state( sequence: str, model: Any, tokenizer: Any, device: torch.device ) -> NDArray[np.float64]: @@ -362,9 +280,12 @@ def get_single_embedding_last_hidden_state( logits_output = model.logits( protein_tensor, LogitsConfig( - sequence=True, return_embeddings=True, return_hidden_states=True + sequence=True, + return_embeddings=True, + return_hidden_states=True, ), ) + # Ensure hidden_states is not None before accessing it if logits_output.hidden_states is None: raise ValueError( "Model did not return hidden states. Check LogitsConfig settings." @@ -373,24 +294,10 @@ def get_single_embedding_last_hidden_state( embedding = ( logits_output.hidden_states[-1][0].to(torch.float32).cpu().numpy() ) - elif isinstance(model, ESM3): - # ESM-3 logic - from esm.sdk.api import ESMProtein, SamplingConfig - - protein = ESMProtein(sequence=sequence) - sequence_encoding = model.encode(protein) - - embedding = model.forward_and_sample( - sequence_encoding, SamplingConfig(return_per_residue_embeddings=True) - ) - - if embedding is None or embedding.per_residue_embedding is None: - raise ValueError("Model did not return embeddings") - embedding = embedding.per_residue_embedding.to(torch.float32).cpu().numpy() else: # ESM-2 logic inputs = tokenizer(sequence, return_tensors="pt").to(device) - outputs = model(**inputs, output_hidden_states=True, return_dict=True) + outputs = model(**inputs) embedding = outputs.last_hidden_state[0, 1:-1, :].detach().cpu().numpy() # normalize the embedding @@ -419,6 +326,7 @@ def get_single_embedding_all_layers( NDArray[np.float64]: A numpy array containing the normalized token embeddings concatenated across all layers. """ + embeddings_list = [] with torch.no_grad(): if isinstance(model, ESMC): # For ESM-3: Use ESMProtein and request hidden states via LogitsConfig @@ -439,17 +347,12 @@ def get_single_embedding_all_layers( ) # logits_output.hidden_states should be a tuple of tensors: (layer, batch, seq_len, hidden_dim) - embeddings_list = [] for layer_tensor in logits_output.hidden_states: # Remove batch dimension and (if applicable) any special tokens emb = layer_tensor[0].to(torch.float32).cpu().numpy() # If your model adds special tokens, adjust the slicing (e.g., emb[1:-1]) emb = emb / np.linalg.norm(emb, axis=1, keepdims=True) embeddings_list.append(emb) - return np.array(embeddings_list) - - elif isinstance(model, ESM3): - raise NotImplementedError("ESM3 is not supported for all layers") else: # For ESM-2: Get hidden states with output_hidden_states=True @@ -458,18 +361,13 @@ def get_single_embedding_all_layers( hidden_states = ( outputs.hidden_states ) # Tuple: (layer0, layer1, ..., layerN) - # Remove the unused variable 'embeddings_list' and directly return the result - return np.array( - [ - layer_tensor[0, 1:-1, :].detach().cpu().numpy() - / np.linalg.norm( - layer_tensor[0, 1:-1, :].detach().cpu().numpy(), - axis=1, - keepdims=True, - ) - for layer_tensor in hidden_states - ] - ) + for layer_tensor in hidden_states: + # Remove batch dimension and special tokens ([CLS] and [SEP]) + emb = layer_tensor[0, 1:-1, :].detach().cpu().numpy() + emb = emb / np.linalg.norm(emb, axis=1, keepdims=True) + embeddings_list.append(emb) + + return np.array(embeddings_list) # The rest of your existing functions will need to be adapted in a similar way @@ -514,4 +412,4 @@ def update_protein_embeddings_in_db( """ # Execute the update query with parameters - db.execute_write(query, {"updates": updates}) + db.execute_write(query, {"updates": updates}) \ No newline at end of file From bc7ac9801d8a517dbcc66ef0e33d39b6b75ad53b Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Thu, 8 May 2025 12:08:15 +0000 Subject: [PATCH 9/9] fixed ruff format --- src/pyeed/analysis/mutation_detection.py | 1 - src/pyeed/analysis/sequence_alignment.py | 1 + src/pyeed/embedding.py | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/pyeed/analysis/mutation_detection.py b/src/pyeed/analysis/mutation_detection.py index 5fdc9dd7..c2562ae1 100644 --- a/src/pyeed/analysis/mutation_detection.py +++ b/src/pyeed/analysis/mutation_detection.py @@ -292,4 +292,3 @@ def get_mutations_between_sequences( ) return mutations - \ No newline at end of file diff --git a/src/pyeed/analysis/sequence_alignment.py b/src/pyeed/analysis/sequence_alignment.py index e6b9d565..0ca43d02 100644 --- a/src/pyeed/analysis/sequence_alignment.py +++ b/src/pyeed/analysis/sequence_alignment.py @@ -33,6 +33,7 @@ def __init__( self.gap_extend = gap_exted self.substitution_matrix = substitution_matrix self.node_type = node_type + def _align( self, seq1: Dict[str, str], diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py index 5d1ff5a9..28f66a1b 100644 --- a/src/pyeed/embedding.py +++ b/src/pyeed/embedding.py @@ -412,4 +412,4 @@ def update_protein_embeddings_in_db( """ # Execute the update query with parameters - db.execute_write(query, {"updates": updates}) \ No newline at end of file + db.execute_write(query, {"updates": updates})