diff --git a/docs/usage/blast.ipynb b/docs/usage/blast.ipynb
index b56140d7..d6cd57ef 100644
--- a/docs/usage/blast.ipynb
+++ b/docs/usage/blast.ipynb
@@ -1,363 +1,364 @@
 {
-    "cells": [
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "# BLAST Search\n",
-                "\n",
-                "## Setup\n",
-                "\n",
-                "The BLAST service runs in a Docker container and requires:\n",
-                "1. A local BLAST database\n",
-                "2. The Docker service running"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 1,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "# change log level to INFO\n",
-                "import sys\n",
-                "from loguru import logger\n",
-                "\n",
-                "logger.remove()\n",
-                "level = logger.add(sys.stderr, level=\"WARNING\")"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "## Basic Usage\n",
-                "\n",
-                "The `Blast` class provides an interface to search protein or nucleotide sequences against a local BLAST database."
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 2,
-            "metadata": {},
-            "outputs": [
-                {
-                    "data": {
-                        "text/html": [
-                            "<div>\n",
-                            "<style scoped>\n",
-                            "    .dataframe tbody tr th:only-of-type {\n",
-                            "        vertical-align: middle;\n",
-                            "    }\n",
-                            "\n",
-                            "    .dataframe tbody tr th {\n",
-                            "        vertical-align: top;\n",
-                            "    }\n",
-                            "\n",
-                            "    .dataframe thead th {\n",
-                            "        text-align: right;\n",
-                            "    }\n",
-                            "</style>\n",
-                            "<table border=\"1\" class=\"dataframe\">\n",
-                            "  <thead>\n",
-                            "    <tr style=\"text-align: right;\">\n",
-                            "      <th></th>\n",
-                            "      <th>subject_id</th>\n",
-                            "      <th>identity</th>\n",
-                            "      <th>alignment_length</th>\n",
-                            "      <th>mismatches</th>\n",
-                            "      <th>gap_opens</th>\n",
-                            "      <th>query_start</th>\n",
-                            "      <th>query_end</th>\n",
-                            "      <th>subject_start</th>\n",
-                            "      <th>subject_end</th>\n",
-                            "      <th>evalue</th>\n",
-                            "      <th>bit_score</th>\n",
-                            "    </tr>\n",
-                            "  </thead>\n",
-                            "  <tbody>\n",
-                            "    <tr>\n",
-                            "      <th>0</th>\n",
-                            "      <td>seq7</td>\n",
-                            "      <td>81.818</td>\n",
-                            "      <td>22</td>\n",
-                            "      <td>3</td>\n",
-                            "      <td>1</td>\n",
-                            "      <td>31</td>\n",
-                            "      <td>51</td>\n",
-                            "      <td>11</td>\n",
-                            "      <td>32</td>\n",
-                            "      <td>0.003</td>\n",
-                            "      <td>22.3</td>\n",
-                            "    </tr>\n",
-                            "    <tr>\n",
-                            "      <th>1</th>\n",
-                            "      <td>seq1</td>\n",
-                            "      <td>100.000</td>\n",
-                            "      <td>25</td>\n",
-                            "      <td>0</td>\n",
-                            "      <td>0</td>\n",
-                            "      <td>1</td>\n",
-                            "      <td>25</td>\n",
-                            "      <td>1</td>\n",
-                            "      <td>25</td>\n",
-                            "      <td>0.004</td>\n",
-                            "      <td>22.3</td>\n",
-                            "    </tr>\n",
-                            "    <tr>\n",
-                            "      <th>2</th>\n",
-                            "      <td>seq2</td>\n",
-                            "      <td>61.538</td>\n",
-                            "      <td>26</td>\n",
-                            "      <td>10</td>\n",
-                            "      <td>0</td>\n",
-                            "      <td>20</td>\n",
-                            "      <td>45</td>\n",
-                            "      <td>5</td>\n",
-                            "      <td>30</td>\n",
-                            "      <td>0.038</td>\n",
-                            "      <td>19.2</td>\n",
-                            "    </tr>\n",
-                            "  </tbody>\n",
-                            "</table>\n",
-                            "</div>"
-                        ],
-                        "text/plain": [
-                            "  subject_id  identity  alignment_length  mismatches  gap_opens  query_start  \\\n",
-                            "0       seq7    81.818                22           3          1           31   \n",
-                            "1       seq1   100.000                25           0          0            1   \n",
-                            "2       seq2    61.538                26          10          0           20   \n",
-                            "\n",
-                            "   query_end  subject_start  subject_end  evalue  bit_score  \n",
-                            "0         51             11           32   0.003       22.3  \n",
-                            "1         25              1           25   0.004       22.3  \n",
-                            "2         45              5           30   0.038       19.2  "
-                        ]
-                    },
-                    "execution_count": 2,
-                    "metadata": {},
-                    "output_type": "execute_result"
-                }
-            ],
-            "source": [
-                "from pyeed.tools import Blast\n",
-                "\n",
-                "# Example protein sequence\n",
-                "sequence = \"MSEQVAAVAKLRAKASEAAKEAKAREAAKKLAEAAKKAKAKEAAKRAEAKLAEKAKAAKRAEAKAAKEAKRAAAKRAEAKLAEKAKAAK\"\n",
-                "\n",
-                "# Initialize BLAST search\n",
-                "blast = Blast(\n",
-                "    # service_url=\"http://localhost:6001/blast\",\n",
-                "    mode=\"blastp\",  # Use blastp for protein sequences\n",
-                "    db_path=\"/usr/local/bin/data/test_db\",  # Path in Docker container\n",
-                "    db_name=\"protein_db\",  # Name of your BLAST database\n",
-                "    evalue=0.1,  # E-value threshold\n",
-                "    max_target_seqs=10,  # Maximum number of hits to return\n",
-                ")\n",
-                "\n",
-                "# Perform search\n",
-                "results = blast.search(sequence)\n",
-                "results"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "The results are returned as a pandas DataFrame with the following columns:\n",
-                "- subject_id: ID of the matched sequence\n",
-                "- identity: Percentage identity\n",
-                "- alignment_length: Length of the alignment\n",
-                "- mismatches: Number of mismatches\n",
-                "- gap_opens: Number of gap openings\n",
-                "- query_start/end: Start/end positions in query sequence\n",
-                "- subject_start/end: Start/end positions in subject sequence\n",
-                "- evalue: Expectation value\n",
-                "- bit_score: Bit score"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "## Creating a BLAST Database\n",
-                "\n",
-                "Before using BLAST, you need to create a local database. Here's how to create one from a FASTA file:"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "```bash\n",
-                "# For protein sequences\n",
-                "makeblastdb -in proteins.fasta -dbtype prot -out blast_db/my_proteins\n",
-                "\n",
-                "# For nucleotide sequences\n",
-                "makeblastdb -in nucleotides.fasta -dbtype nucl -out blast_db/my_nucleotides\n",
-                "```\n",
-                "\n",
-                "To access the BLAST Docker container shell and create databases:\n",
-                "\n",
-                "```bash\n",
-                "# Enter the BLAST container shell\n",
-                "docker compose exec blast bash\n",
-                "# \n",
-                "# Navigate to database directory\n",
-                "cd /usr/local/bin/data/blast_db\n",
-                "# \n",
-                "# Create protein database\n",
-                "makeblastdb -in proteins.fasta -dbtype prot -out my_proteins\n",
-                "# \n",
-                "# Create nucleotide database \n",
-                "makeblastdb -in nucleotides.fasta -dbtype nucl -out my_nucleotides\n",
-                "```\n",
-                "Make sure your FASTA files are mounted in the container's `/usr/local/bin/data/blast_db` directory.\n"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "## Advanced Usage\n",
-                "\n",
-                "You can customize the BLAST search parameters:"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 3,
-            "metadata": {},
-            "outputs": [
-                {
-                    "data": {
-                        "text/html": [
-                            "<div>\n",
-                            "<style scoped>\n",
-                            "    .dataframe tbody tr th:only-of-type {\n",
-                            "        vertical-align: middle;\n",
-                            "    }\n",
-                            "\n",
-                            "    .dataframe tbody tr th {\n",
-                            "        vertical-align: top;\n",
-                            "    }\n",
-                            "\n",
-                            "    .dataframe thead th {\n",
-                            "        text-align: right;\n",
-                            "    }\n",
-                            "</style>\n",
-                            "<table border=\"1\" class=\"dataframe\">\n",
-                            "  <thead>\n",
-                            "    <tr style=\"text-align: right;\">\n",
-                            "      <th></th>\n",
-                            "      <th>subject_id</th>\n",
-                            "      <th>identity</th>\n",
-                            "      <th>alignment_length</th>\n",
-                            "      <th>mismatches</th>\n",
-                            "      <th>gap_opens</th>\n",
-                            "      <th>query_start</th>\n",
-                            "      <th>query_end</th>\n",
-                            "      <th>subject_start</th>\n",
-                            "      <th>subject_end</th>\n",
-                            "      <th>evalue</th>\n",
-                            "      <th>bit_score</th>\n",
-                            "    </tr>\n",
-                            "  </thead>\n",
-                            "  <tbody>\n",
-                            "    <tr>\n",
-                            "      <th>0</th>\n",
-                            "      <td>seq7</td>\n",
-                            "      <td>81.818</td>\n",
-                            "      <td>22</td>\n",
-                            "      <td>3</td>\n",
-                            "      <td>1</td>\n",
-                            "      <td>31</td>\n",
-                            "      <td>51</td>\n",
-                            "      <td>11</td>\n",
-                            "      <td>32</td>\n",
-                            "      <td>0.003</td>\n",
-                            "      <td>22.3</td>\n",
-                            "    </tr>\n",
-                            "    <tr>\n",
-                            "      <th>1</th>\n",
-                            "      <td>seq1</td>\n",
-                            "      <td>100.000</td>\n",
-                            "      <td>25</td>\n",
-                            "      <td>0</td>\n",
-                            "      <td>0</td>\n",
-                            "      <td>1</td>\n",
-                            "      <td>25</td>\n",
-                            "      <td>1</td>\n",
-                            "      <td>25</td>\n",
-                            "      <td>0.004</td>\n",
-                            "      <td>22.3</td>\n",
-                            "    </tr>\n",
-                            "  </tbody>\n",
-                            "</table>\n",
-                            "</div>"
-                        ],
-                        "text/plain": [
-                            "  subject_id  identity  alignment_length  mismatches  gap_opens  query_start  \\\n",
-                            "0       seq7    81.818                22           3          1           31   \n",
-                            "1       seq1   100.000                25           0          0            1   \n",
-                            "\n",
-                            "   query_end  subject_start  subject_end  evalue  bit_score  \n",
-                            "0         51             11           32   0.003       22.3  \n",
-                            "1         25              1           25   0.004       22.3  "
-                        ]
-                    },
-                    "execution_count": 3,
-                    "metadata": {},
-                    "output_type": "execute_result"
-                }
-            ],
-            "source": [
-                "# Configure BLAST for sensitive protein search\n",
-                "blast = Blast(\n",
-                "    # service_url=\"http://localhost:6001/blast\",\n",
-                "    mode=\"blastp\",\n",
-                "    db_path=\"/usr/local/bin/data/test_db\",\n",
-                "    db_name=\"protein_db\",\n",
-                "    evalue=1e-1,  # More stringent E-value\n",
-                "    max_target_seqs=100,  # Return more hits\n",
-                "    num_threads=4,  # Use 4 CPU threads\n",
-                ")\n",
-                "\n",
-                "# Search with longer timeout\n",
-                "results = blast.search(sequence, timeout=7200)  # 2 hour timeout\n",
-                "\n",
-                "# Filter results\n",
-                "significant_hits = results[results[\"identity\"] > 80]  # Only hits with >90% identity\n",
-                "significant_hits"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "Thereafter, the ids of the hits can be added to the pyeed database, using the `fetch_from_primary_db` function."
-            ]
-        }
-    ],
-    "metadata": {
-        "kernelspec": {
-            "display_name": "pyeed",
-            "language": "python",
-            "name": "python3"
-        },
-        "language_info": {
-            "codemirror_mode": {
-                "name": "ipython",
-                "version": 3
-            },
-            "file_extension": ".py",
-            "mimetype": "text/x-python",
-            "name": "python",
-            "nbconvert_exporter": "python",
-            "pygments_lexer": "ipython3",
-            "version": "3.11.5"
-        }
-    },
-    "nbformat": 4,
-    "nbformat_minor": 2
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# BLAST Search\n",
+    "\n",
+    "## Setup\n",
+    "\n",
+    "The BLAST service runs in a Docker container and requires:\n",
+    "1. A local BLAST database\n",
+    "2. The Docker service running"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# change log level to INFO\n",
+    "import sys\n",
+    "\n",
+    "from loguru import logger\n",
+    "\n",
+    "logger.remove()\n",
+    "level = logger.add(sys.stderr, level=\"WARNING\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Basic Usage\n",
+    "\n",
+    "The `Blast` class provides an interface to search protein or nucleotide sequences against a local BLAST database."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>identity</th>\n",
+       "      <th>alignment_length</th>\n",
+       "      <th>mismatches</th>\n",
+       "      <th>gap_opens</th>\n",
+       "      <th>query_start</th>\n",
+       "      <th>query_end</th>\n",
+       "      <th>subject_start</th>\n",
+       "      <th>subject_end</th>\n",
+       "      <th>evalue</th>\n",
+       "      <th>bit_score</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>seq7</td>\n",
+       "      <td>81.818</td>\n",
+       "      <td>22</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>31</td>\n",
+       "      <td>51</td>\n",
+       "      <td>11</td>\n",
+       "      <td>32</td>\n",
+       "      <td>0.003</td>\n",
+       "      <td>22.3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>seq1</td>\n",
+       "      <td>100.000</td>\n",
+       "      <td>25</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>25</td>\n",
+       "      <td>1</td>\n",
+       "      <td>25</td>\n",
+       "      <td>0.004</td>\n",
+       "      <td>22.3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>seq2</td>\n",
+       "      <td>61.538</td>\n",
+       "      <td>26</td>\n",
+       "      <td>10</td>\n",
+       "      <td>0</td>\n",
+       "      <td>20</td>\n",
+       "      <td>45</td>\n",
+       "      <td>5</td>\n",
+       "      <td>30</td>\n",
+       "      <td>0.038</td>\n",
+       "      <td>19.2</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  subject_id  identity  alignment_length  mismatches  gap_opens  query_start  \\\n",
+       "0       seq7    81.818                22           3          1           31   \n",
+       "1       seq1   100.000                25           0          0            1   \n",
+       "2       seq2    61.538                26          10          0           20   \n",
+       "\n",
+       "   query_end  subject_start  subject_end  evalue  bit_score  \n",
+       "0         51             11           32   0.003       22.3  \n",
+       "1         25              1           25   0.004       22.3  \n",
+       "2         45              5           30   0.038       19.2  "
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from pyeed.tools import Blast\n",
+    "\n",
+    "# Example protein sequence\n",
+    "sequence = \"MSEQVAAVAKLRAKASEAAKEAKAREAAKKLAEAAKKAKAKEAAKRAEAKLAEKAKAAKRAEAKAAKEAKRAAAKRAEAKLAEKAKAAK\"\n",
+    "\n",
+    "# Initialize BLAST search\n",
+    "blast = Blast(\n",
+    "    # service_url=\"http://localhost:6001/blast\",\n",
+    "    mode=\"blastp\",  # Use blastp for protein sequences\n",
+    "    db_path=\"/usr/local/bin/data/test_db\",  # Path in Docker container\n",
+    "    db_name=\"protein_db\",  # Name of your BLAST database\n",
+    "    evalue=0.1,  # E-value threshold\n",
+    "    max_target_seqs=10,  # Maximum number of hits to return\n",
+    ")\n",
+    "\n",
+    "# Perform search\n",
+    "results = blast.search(sequence)\n",
+    "results"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "The results are returned as a pandas DataFrame with the following columns:\n",
+    "- subject_id: ID of the matched sequence\n",
+    "- identity: Percentage identity\n",
+    "- alignment_length: Length of the alignment\n",
+    "- mismatches: Number of mismatches\n",
+    "- gap_opens: Number of gap openings\n",
+    "- query_start/end: Start/end positions in query sequence\n",
+    "- subject_start/end: Start/end positions in subject sequence\n",
+    "- evalue: Expectation value\n",
+    "- bit_score: Bit score"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Creating a BLAST Database\n",
+    "\n",
+    "Before using BLAST, you need to create a local database. Here's how to create one from a FASTA file:"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "```bash\n",
+    "# For protein sequences\n",
+    "makeblastdb -in proteins.fasta -dbtype prot -out blast_db/my_proteins\n",
+    "\n",
+    "# For nucleotide sequences\n",
+    "makeblastdb -in nucleotides.fasta -dbtype nucl -out blast_db/my_nucleotides\n",
+    "```\n",
+    "\n",
+    "To access the BLAST Docker container shell and create databases:\n",
+    "\n",
+    "```bash\n",
+    "# Enter the BLAST container shell\n",
+    "docker compose exec blast bash\n",
+    "# \n",
+    "# Navigate to database directory\n",
+    "cd /usr/local/bin/data/blast_db\n",
+    "# \n",
+    "# Create protein database\n",
+    "makeblastdb -in proteins.fasta -dbtype prot -out my_proteins\n",
+    "# \n",
+    "# Create nucleotide database \n",
+    "makeblastdb -in nucleotides.fasta -dbtype nucl -out my_nucleotides\n",
+    "```\n",
+    "Make sure your FASTA files are mounted in the container's `/usr/local/bin/data/blast_db` directory.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Advanced Usage\n",
+    "\n",
+    "You can customize the BLAST search parameters:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>subject_id</th>\n",
+       "      <th>identity</th>\n",
+       "      <th>alignment_length</th>\n",
+       "      <th>mismatches</th>\n",
+       "      <th>gap_opens</th>\n",
+       "      <th>query_start</th>\n",
+       "      <th>query_end</th>\n",
+       "      <th>subject_start</th>\n",
+       "      <th>subject_end</th>\n",
+       "      <th>evalue</th>\n",
+       "      <th>bit_score</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>seq7</td>\n",
+       "      <td>81.818</td>\n",
+       "      <td>22</td>\n",
+       "      <td>3</td>\n",
+       "      <td>1</td>\n",
+       "      <td>31</td>\n",
+       "      <td>51</td>\n",
+       "      <td>11</td>\n",
+       "      <td>32</td>\n",
+       "      <td>0.003</td>\n",
+       "      <td>22.3</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>seq1</td>\n",
+       "      <td>100.000</td>\n",
+       "      <td>25</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>25</td>\n",
+       "      <td>1</td>\n",
+       "      <td>25</td>\n",
+       "      <td>0.004</td>\n",
+       "      <td>22.3</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "  subject_id  identity  alignment_length  mismatches  gap_opens  query_start  \\\n",
+       "0       seq7    81.818                22           3          1           31   \n",
+       "1       seq1   100.000                25           0          0            1   \n",
+       "\n",
+       "   query_end  subject_start  subject_end  evalue  bit_score  \n",
+       "0         51             11           32   0.003       22.3  \n",
+       "1         25              1           25   0.004       22.3  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Configure BLAST for sensitive protein search\n",
+    "blast = Blast(\n",
+    "    # service_url=\"http://localhost:6001/blast\",\n",
+    "    mode=\"blastp\",\n",
+    "    db_path=\"/usr/local/bin/data/test_db\",\n",
+    "    db_name=\"protein_db\",\n",
+    "    evalue=1e-1,  # More stringent E-value\n",
+    "    max_target_seqs=100,  # Return more hits\n",
+    "    num_threads=4,  # Use 4 CPU threads\n",
+    ")\n",
+    "\n",
+    "# Search with longer timeout\n",
+    "results = blast.search(sequence, timeout=7200)  # 2 hour timeout\n",
+    "\n",
+    "# Filter results\n",
+    "significant_hits = results[results[\"identity\"] > 80]  # Only hits with >90% identity\n",
+    "significant_hits"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Thereafter, the ids of the hits can be added to the pyeed database, using the `fetch_from_primary_db` function."
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pyeed",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
 }
diff --git a/docs/usage/clustalo.ipynb b/docs/usage/clustalo.ipynb
index 64ed62ee..d3ba2fba 100644
--- a/docs/usage/clustalo.ipynb
+++ b/docs/usage/clustalo.ipynb
@@ -1,171 +1,171 @@
 {
-    "cells": [
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "# Multiple Sequence Alignment with Clustal Omega\n",
-                "\n",
-                "PyEED provides a convenient interface to Clustal Omega for multiple sequence alignment. This notebook demonstrates how to:\n",
-                "1. Align sequences from a dictionary\n",
-                "2. Align sequences directly from the database"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 13,
-            "metadata": {},
-            "outputs": [],
-            "source": [
-                "from pyeed import Pyeed\n",
-                "from pyeed.tools.clustalo import ClustalOmega\n",
-                "\n",
-                "# change log level to INFO\n",
-                "import sys\n",
-                "from loguru import logger\n",
-                "\n",
-                "logger.remove()\n",
-                "level = logger.add(sys.stderr, level=\"INFO\")"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "## Direct Sequence Alignment\n",
-                "\n",
-                "You can align sequences directly by providing a dictionary of sequences:"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 14,
-            "metadata": {},
-            "outputs": [
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "Aligned sequences:\n",
-                        "seq1  AKFVMPDRAWHLYTGNECSKQRLYVWFHDGAPILKTQSDNMGAYRCPLFHVTKNWEI\n",
-                        "seq2  AKFVMPDRQWHLYTGQECSKQRLYVWFHDGAPILKTQSDNMGAYRCPLFHVTKNWEI\n",
-                        "seq3  AKFVMPDRQWHLYTGNECSKQRLYVWFHDGAPILKTQADNMGAYRCALFHVTK----\n"
-                    ]
-                }
-            ],
-            "source": [
-                "# Initialize ClustalOmega\n",
-                "clustalo = ClustalOmega()\n",
-                "\n",
-                "# Example sequences\n",
-                "sequences = {\n",
-                "    \"seq1\": \"AKFVMPDRAWHLYTGNECSKQRLYVWFHDGAPILKTQSDNMGAYRCPLFHVTKNWEI\",\n",
-                "    \"seq2\": \"AKFVMPDRQWHLYTGQECSKQRLYVWFHDGAPILKTQSDNMGAYRCPLFHVTKNWEI\",\n",
-                "    \"seq3\": \"AKFVMPDRQWHLYTGNECSKQRLYVWFHDGAPILKTQADNMGAYRCALFHVTK\",\n",
-                "}\n",
-                "\n",
-                "# Perform alignment\n",
-                "alignment = clustalo.align(sequences)\n",
-                "print(\"Aligned sequences:\")\n",
-                "print(alignment)"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "## Database-based Alignment\n",
-                "\n",
-                "You can also align sequences directly from the database by providing a list of accession IDs:"
-            ]
-        },
-        {
-            "cell_type": "code",
-            "execution_count": 15,
-            "metadata": {},
-            "outputs": [
-                {
-                    "name": "stdout",
-                    "output_type": "stream",
-                    "text": [
-                        "Pyeed Graph Object Mapping constraints not defined. Use _install_labels() to set up model constraints.\n",
-                        "📡 Connected to database.\n",
-                        "Database alignment:\n",
-                        "AAP20891.1      MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-                        "CAJ85677.1      MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-                        "SAQ02853.1      MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-                        "CDR98216.1      MSIQHFRVALIPFFAAFCFPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYMTGSQATMDERNRQIAEIGASLIKHW\n",
-                        "WP_109963600.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDSWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGTGKRGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-                        "CAA41038.1      MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDHWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-                        "WP_109874025.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDSWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-                        "CAA46344.1      MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDSWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
-                        "APG33178.1      MSIQHFRVALIPFFAAFCFPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDSWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYMTGSQATMDERNRQIAEIGASLIKHW\n",
-                        "AKC98298.1      MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDHWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n"
-                    ]
-                }
-            ],
-            "source": [
-                "# Connect to database\n",
-                "pyeed = Pyeed(uri=\"bolt://129.69.129.130:7687\", user=\"neo4j\", password=\"12345678\")\n",
-                "\n",
-                "# Get protein IDs from database\n",
-                "from pyeed.model import Protein\n",
-                "\n",
-                "accession_ids = [protein.accession_id for protein in Protein.nodes.all()][:10]\n",
-                "\n",
-                "# Align sequences from database\n",
-                "alignment = clustalo.align_from_db(accession_ids, pyeed.db)\n",
-                "print(\"Database alignment:\")\n",
-                "print(alignment)"
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "## Understanding Alignment Results\n",
-                "\n",
-                "The alignment result is a `MultipleSequenceAlignment` object with:\n",
-                "- List of `Sequence` objects\n",
-                "- Each sequence has an ID and aligned sequence\n",
-                "- Gaps are represented by '-' characters\n",
-                "- Sequences are padded to equal length\n",
-                "\n",
-                "The alignment preserves sequence order and maintains sequence IDs from the input."
-            ]
-        },
-        {
-            "cell_type": "markdown",
-            "metadata": {},
-            "source": [
-                "## Configuration\n",
-                "\n",
-                "ClustalOmega requires the PyEED Docker service to be running. Make sure to:\n",
-                "1. Have Docker installed\n",
-                "2. Start the service with `docker-compose up -d`\n",
-                "3. The service runs on port 5001 by default"
-            ]
-        }
-    ],
-    "metadata": {
-        "kernelspec": {
-            "display_name": "pyeed_niklas",
-            "language": "python",
-            "name": "python3"
-        },
-        "language_info": {
-            "codemirror_mode": {
-                "name": "ipython",
-                "version": 3
-            },
-            "file_extension": ".py",
-            "mimetype": "text/x-python",
-            "name": "python",
-            "nbconvert_exporter": "python",
-            "pygments_lexer": "ipython3",
-            "version": "3.12.8"
-        }
-    },
-    "nbformat": 4,
-    "nbformat_minor": 2
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Multiple Sequence Alignment with Clustal Omega\n",
+    "\n",
+    "PyEED provides a convenient interface to Clustal Omega for multiple sequence alignment. This notebook demonstrates how to:\n",
+    "1. Align sequences from a dictionary\n",
+    "2. Align sequences directly from the database"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# change log level to INFO\n",
+    "import sys\n",
+    "\n",
+    "from loguru import logger\n",
+    "\n",
+    "from pyeed import Pyeed\n",
+    "from pyeed.model import Protein\n",
+    "from pyeed.tools.clustalo import ClustalOmega\n",
+    "\n",
+    "logger.remove()\n",
+    "level = logger.add(sys.stderr, level=\"INFO\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Direct Sequence Alignment\n",
+    "\n",
+    "You can align sequences directly by providing a dictionary of sequences:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Aligned sequences:\n",
+      "seq1  AKFVMPDRAWHLYTGNECSKQRLYVWFHDGAPILKTQSDNMGAYRCPLFHVTKNWEI\n",
+      "seq2  AKFVMPDRQWHLYTGQECSKQRLYVWFHDGAPILKTQSDNMGAYRCPLFHVTKNWEI\n",
+      "seq3  AKFVMPDRQWHLYTGNECSKQRLYVWFHDGAPILKTQADNMGAYRCALFHVTK----\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Initialize ClustalOmega\n",
+    "clustalo = ClustalOmega()\n",
+    "\n",
+    "# Example sequences\n",
+    "sequences = {\n",
+    "    \"seq1\": \"AKFVMPDRAWHLYTGNECSKQRLYVWFHDGAPILKTQSDNMGAYRCPLFHVTKNWEI\",\n",
+    "    \"seq2\": \"AKFVMPDRQWHLYTGQECSKQRLYVWFHDGAPILKTQSDNMGAYRCPLFHVTKNWEI\",\n",
+    "    \"seq3\": \"AKFVMPDRQWHLYTGNECSKQRLYVWFHDGAPILKTQADNMGAYRCALFHVTK\",\n",
+    "}\n",
+    "\n",
+    "# Perform alignment\n",
+    "alignment = clustalo.align(sequences)\n",
+    "print(\"Aligned sequences:\")\n",
+    "print(alignment)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Database-based Alignment\n",
+    "\n",
+    "You can also align sequences directly from the database by providing a list of accession IDs:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Pyeed Graph Object Mapping constraints not defined. Use _install_labels() to set up model constraints.\n",
+      "📡 Connected to database.\n",
+      "Database alignment:\n",
+      "AAP20891.1      MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "CAJ85677.1      MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "SAQ02853.1      MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "CDR98216.1      MSIQHFRVALIPFFAAFCFPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDRWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYMTGSQATMDERNRQIAEIGASLIKHW\n",
+      "WP_109963600.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDSWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGTGKRGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "CAA41038.1      MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDHWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "WP_109874025.1  MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDSWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "CAA46344.1      MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDSWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGASERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n",
+      "APG33178.1      MSIQHFRVALIPFFAAFCFPVFAHPETLVKVKDAEDQLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVKYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDSWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYMTGSQATMDERNRQIAEIGASLIKHW\n",
+      "AKC98298.1      MSIQHFRVALIPFFAAFCLPVFAHPETLVKVKDAEDKLGARVGYIELDLNSGKILESFRPEERFPMMSTFKVLLCGAVLSRVDAGQEQLGRRIHYSQNDLVEYSPVTEKHLTDGMTVRELCSAAITMSDNTAANLLLTTIGGPKELTAFLHNMGDHVTRLDHWEPELNEAIPNDERDTTMPAAMATTLRKLLTGELLTLASRQQLIDWMEADKVAGPLLRSALPAGWFIADKSGAGERGSRGIIAALGPDGKPSRIVVIYTTGSQATMDERNRQIAEIGASLIKHW\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Connect to database\n",
+    "pyeed = Pyeed(uri=\"bolt://129.69.129.130:7687\", user=\"neo4j\", password=\"12345678\")\n",
+    "\n",
+    "# Get protein IDs from database\n",
+    "accession_ids = [protein.accession_id for protein in Protein.nodes.all()][:10]\n",
+    "\n",
+    "# Align sequences from database\n",
+    "alignment = clustalo.align_from_db(accession_ids, pyeed.db)\n",
+    "print(\"Database alignment:\")\n",
+    "print(alignment)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Understanding Alignment Results\n",
+    "\n",
+    "The alignment result is a `MultipleSequenceAlignment` object with:\n",
+    "- List of `Sequence` objects\n",
+    "- Each sequence has an ID and aligned sequence\n",
+    "- Gaps are represented by '-' characters\n",
+    "- Sequences are padded to equal length\n",
+    "\n",
+    "The alignment preserves sequence order and maintains sequence IDs from the input."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Configuration\n",
+    "\n",
+    "ClustalOmega requires the PyEED Docker service to be running. Make sure to:\n",
+    "1. Have Docker installed\n",
+    "2. Start the service with `docker-compose up -d`\n",
+    "3. The service runs on port 5001 by default"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "pyeed_niklas",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.8"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
 }
diff --git a/docs/usage/embeddings_analysis.ipynb b/docs/usage/embeddings_analysis.ipynb
index 65a2398c..0b72e743 100644
--- a/docs/usage/embeddings_analysis.ipynb
+++ b/docs/usage/embeddings_analysis.ipynb
@@ -24,9 +24,10 @@
    "source": [
     "import sys\n",
     "\n",
-    "from loguru import logger\n",
-    "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "from loguru import logger\n",
+    "\n",
     "from pyeed import Pyeed\n",
     "from pyeed.analysis.embedding_analysis import EmbeddingTool\n",
     "\n",
diff --git a/docs/usage/mmseqs.ipynb b/docs/usage/mmseqs.ipynb
index 1185c6fe..2253fd8a 100644
--- a/docs/usage/mmseqs.ipynb
+++ b/docs/usage/mmseqs.ipynb
@@ -20,6 +20,7 @@
             "outputs": [],
             "source": [
                 "from pyeed import Pyeed\n",
+                "from pyeed.model import Protein\n",
                 "from pyeed.tools.mmseqs import MMSeqs"
             ]
         },
@@ -134,8 +135,6 @@
                 "pyeed = Pyeed(uri=\"bolt://localhost:7687\", user=\"neo4j\", password=\"12345678\")\n",
                 "\n",
                 "# Get first 100 protein IDs from database\n",
-                "from pyeed.model import Protein\n",
-                "\n",
                 "accession_ids = [protein.accession_id for protein in Protein.nodes.all()][:100]\n",
                 "\n",
                 "# Cluster sequences\n",
diff --git a/docs/usage/mutation_analysis.ipynb b/docs/usage/mutation_analysis.ipynb
index 9b31c996..7d10d360 100644
--- a/docs/usage/mutation_analysis.ipynb
+++ b/docs/usage/mutation_analysis.ipynb
@@ -16,6 +16,7 @@
    "outputs": [],
    "source": [
     "import sys\n",
+    "\n",
     "from loguru import logger\n",
     "\n",
     "from pyeed import Pyeed\n",
diff --git a/docs/usage/network_analysis.ipynb b/docs/usage/network_analysis.ipynb
index 4d45db71..0b254610 100644
--- a/docs/usage/network_analysis.ipynb
+++ b/docs/usage/network_analysis.ipynb
@@ -11,6 +11,7 @@
     "import matplotlib.pyplot as plt\n",
     "import networkx as nx\n",
     "from loguru import logger\n",
+    "\n",
     "from pyeed import Pyeed\n",
     "from pyeed.analysis.network_analysis import NetworkAnalysis\n",
     "from pyeed.analysis.sequence_alignment import PairwiseAligner\n",
diff --git a/docs/usage/standard_numbering.ipynb b/docs/usage/standard_numbering.ipynb
index cd84cad9..54374cd6 100644
--- a/docs/usage/standard_numbering.ipynb
+++ b/docs/usage/standard_numbering.ipynb
@@ -23,10 +23,10 @@
     "%reload_ext autoreload\n",
     "%autoreload 2\n",
     "import sys\n",
+    "\n",
     "from loguru import logger\n",
     "\n",
     "from pyeed import Pyeed\n",
-    "from pyeed.analysis.mutation_detection import MutationDetection\n",
     "from pyeed.analysis.standard_numbering import StandardNumberingTool\n",
     "\n",
     "logger.remove()\n",
diff --git a/pyproject.toml b/pyproject.toml
index 86edb7ea..b9897071 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -36,7 +36,10 @@ esm = "^3.1.3"
 rdflib = "^6.0.0"
 docker = "5.0.0"
 absl-py = "1.0.0"
+crc64iso = "0.0.2"
 SPARQLWrapper = "2.0.0"
+pysam = "0.23.0"
+types-requests = "2.32.0.20250328"
 
 [tool.poetry.group.dev.dependencies]
 mkdocstrings = {extras = ["python"], version = "^0.26.2"}
diff --git a/src/pyeed/adapter/ncbi_protein_mapper.py b/src/pyeed/adapter/ncbi_protein_mapper.py
index e11d4fe7..3ecf485c 100644
--- a/src/pyeed/adapter/ncbi_protein_mapper.py
+++ b/src/pyeed/adapter/ncbi_protein_mapper.py
@@ -281,6 +281,8 @@ def add_to_db(self, response: Response) -> None:
                 protein = Protein(**protein_data)
                 protein.save()
 
+            if not isinstance(organism, Organism):
+                raise TypeError(f"Expected Organism, but got {type(organism)}")
             protein.organism.connect(organism)
 
             # Add features
diff --git a/src/pyeed/adapter/ncbi_to_uniprot_mapper.py b/src/pyeed/adapter/ncbi_to_uniprot_mapper.py
new file mode 100644
index 00000000..1373547a
--- /dev/null
+++ b/src/pyeed/adapter/ncbi_to_uniprot_mapper.py
@@ -0,0 +1,132 @@
+import json
+import logging
+import os
+import sys
+from typing import List
+
+import httpx
+from crc64iso import crc64iso
+from pysam import FastaFile
+
+logger = logging.getLogger(__name__)
+
+
+class NCBIToUniprotMapper:
+    def __init__(self, ids: List[str], file: str):
+        self.ids = ids
+        self.file = file
+        self.uniparc_url = "https://www.ebi.ac.uk/proteins/api/uniparc?offset=0&size=100&sequencechecksum="
+        self.ncbi_url = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
+
+    def download_fasta(self, refseq_id: str) -> None:
+        """
+        Downloads a FASTA file for a given RefSeq ID using httpx and saves it locally.
+
+        Args:
+            refseq_id str: NCBI ID
+        """
+
+        params = {
+            "db": "protein",
+            "id": refseq_id,
+            "rettype": "fasta",
+            "retmode": "text",
+        }
+
+        try:
+            response = httpx.get(self.ncbi_url, params=params, timeout=10.0)
+
+            if response.status_code == 200:
+                filename = f"{refseq_id}.fasta"
+                with open(filename, "w") as f:
+                    f.write(response.text)
+                print(f"✅ Downloaded: {filename}")
+            else:
+                print(
+                    f"❌ Failed to download {refseq_id} (Status: {response.status_code})"
+                )
+
+        except httpx.HTTPError as e:
+            print(f"❌ HTTP error occurred while downloading {refseq_id}: {e}")
+
+    def get_checksum(self, refseq_id: str) -> str:
+        """Fetches and calculates the checksum for a given RefSeq ID.
+
+        Args:
+            refseq_id str: NCBI ID
+
+        Returns:
+            str: checksum ID
+        """
+
+        self.download_fasta(refseq_id)
+        fa = FastaFile(f"{refseq_id}.fasta")
+        seq = fa.fetch(fa.references[0])
+        return f"{crc64iso.crc64(seq)}"
+
+    def checksum_list(self, refseq_ids: List[str]) -> List[str]:
+        """Creates a list of checksum IDs and deletes the FASTA files after processing.
+
+        Args:
+            refseq_ids str: NCBI IDs
+
+        Returns:
+            List[str]: cheksum IDs
+        """
+
+        checksums = []
+        for refseq_id in refseq_ids:
+            checksums.append(self.get_checksum(refseq_id))
+            fasta_file_path = f"{refseq_id}.fasta"
+            fai_file_path = f"{refseq_id}.fasta.fai"
+
+            if os.path.exists(fasta_file_path):
+                os.remove(fasta_file_path)  # Delete the fasta file
+
+            if os.path.exists(fai_file_path):
+                os.remove(fai_file_path)
+        return checksums
+
+    def execute_request(self) -> None:
+        """Fetches the uniparc and uniprot ids for the given refseq ids and saves them in a json file."""
+
+        checksum_list = self.checksum_list(self.ids)
+
+        id_mapping_uniprot = {}
+        id_mapping_uniparc = {}
+        counter = 0
+
+        for checksum in checksum_list:
+            url = f"{self.uniparc_url}{checksum}"
+
+            # perform request and get response as JSON
+            with httpx.Client() as client:
+                response = client.get(url, headers={"Accept": "application/json"})
+
+            # check if the request was successful
+            if response.status_code != 200:
+                print(f"Request failed with status code {response.status_code}")
+                response.raise_for_status()  # Raise exception for any non-200 response
+                sys.exit()
+
+            # Check if the response body is empty
+            if not response.content.strip():  # Check if the body is empty
+                print("The response body is empty.")
+                sys.exit()
+
+            # extracts the uniprot and the uniparc id from the repsonse and saves them in a dictionary
+            response_body = response.json()
+            for item in response_body:
+                uniparc_id = item.get("accession", None)
+                for ref in item.get("dbReference", []):
+                    if ref.get("type") == "UniProtKB/TrEMBL":
+                        uniprot_id = ref.get("id", None)
+                        id_mapping_uniparc[self.ids[counter]] = uniparc_id
+                        id_mapping_uniprot[self.ids[counter]] = uniprot_id
+            counter += 1
+
+        with open(f"{self.file}_uniprot.json", "w") as f:
+            json.dump(id_mapping_uniprot, f)
+
+        with open(f"{self.file}_uniparc.json", "w") as f:
+            json.dump(id_mapping_uniparc, f)
diff --git a/src/pyeed/adapter/uniprot_mapper.py b/src/pyeed/adapter/uniprot_mapper.py
index 477249eb..f52d01b3 100644
--- a/src/pyeed/adapter/uniprot_mapper.py
+++ b/src/pyeed/adapter/uniprot_mapper.py
@@ -1,9 +1,9 @@
 import json
 from collections import defaultdict
-from typing import Any, List
+from typing import Any, List, Optional
 
 import requests
-from bs4 import BeautifulSoup
+from bs4 import BeautifulSoup, Tag
 from httpx import Response
 from loguru import logger
 from SPARQLWrapper import JSON, SPARQLWrapper
@@ -82,13 +82,15 @@ def add_sites(self, record: dict[str, Any], protein: Protein) -> None:
             site.save()
 
             protein.site.connect(site, {"positions": positions})
-    
-    def get_substrates_and_products_from_rhea(self, rhea_id: str) -> dict[str, List[str]]:
+
+    def get_substrates_and_products_from_rhea(
+        self, rhea_id: str
+    ) -> dict[str, List[str]]:
         """Fetch substrates and products from Rhea by parsing the side URI (_L = substrate, _R = product).
-        
+
         Args:
             rhea_id (str or int): The Rhea reaction ID (e.g., 49528)
-        
+
         Returns:
             dict: {
                 'substrates': [list of chebi URIs],
@@ -118,7 +120,11 @@ def get_substrates_and_products_from_rhea(self, rhea_id: str) -> dict[str, List[
         sparql.setReturnFormat(JSON)
         sparql.addCustomHttpHeader("User-Agent", "MyPythonClient/1.0")
 
-        results = sparql.query().convert()
+        results_raw = sparql.query().convert()
+        if not isinstance(results_raw, dict):
+            raise TypeError("Expected dict from SPARQL query")
+
+        results: dict[str, Any] = results_raw
 
         substrates = set()
         products = set()
@@ -134,17 +140,13 @@ def get_substrates_and_products_from_rhea(self, rhea_id: str) -> dict[str, List[
             elif side_uri.endswith("_R"):
                 products.add(chebi_uri)
 
-        return {
-            "substrates": sorted(substrates),
-            "products": sorted(products)
-        }
+        return {"substrates": sorted(substrates), "products": sorted(products)}
 
-    
-    def get_smiles_from_chebi_web(self, chebi_url: str) -> str:
+    def get_smiles_from_chebi_web(self, chebi_url: str) -> Optional[str]:
         """
         Extract SMILES from the official ChEBI page using HTML scraping.
         """
-        chebi_id = chebi_url.split('_')[-1]
+        chebi_id = chebi_url.split("_")[-1]
         url = f"https://www.ebi.ac.uk/chebi/searchId.do?chebiId=CHEBI:{chebi_id}"
 
         response = requests.get(url)
@@ -152,12 +154,21 @@ def get_smiles_from_chebi_web(self, chebi_url: str) -> str:
 
         # Look for table rows that contain the SMILES label
         for table in soup.find_all("table", class_="chebiTableContent"):
+            if not isinstance(table, Tag):
+                continue
             for row in table.find_all("tr"):
+                if not isinstance(row, Tag):
+                    continue
                 headers = row.find_all("td", class_="chebiDataHeader")
-                if headers and "SMILES" in headers[0].text:
-                    data_cell = row.find_all("td")[-1]  # Get the last <td> in row
-                    return data_cell.text.strip()
-                
+                if (
+                    headers
+                    and isinstance(headers[0], Tag)
+                    and "SMILES" in headers[0].text
+                ):
+                    data_cells = row.find_all("td")
+                    if data_cells:
+                        return f"{data_cells[-1].text.strip()}"
+        return None
 
     def add_reaction(self, record: dict[str, Any], protein: Protein) -> None:
         for reference in record.get("comments", []):  # Safe retrieval with .get()
@@ -168,39 +179,39 @@ def add_reaction(self, record: dict[str, Any], protein: Protein) -> None:
                     if db_ref.get("id", "").startswith("RHEA:"):
                         rhea_id = db_ref["id"]
                         break  # Stop after finding the first match
-                
+
                 catalytic_annotation = Reaction.get_or_save(
                     rhea_id=rhea_id,
                 )
-                self.add_molecule(rhea_id, catalytic_annotation)
-                protein.reaction.connect(catalytic_annotation)
+                if rhea_id is not None:
+                    self.add_molecule(rhea_id, catalytic_annotation)
+                    protein.reaction.connect(catalytic_annotation)
 
     def add_molecule(self, rhea_id: str, reaction: Reaction) -> None:
-    
         chebi = self.get_substrates_and_products_from_rhea(rhea_id)
 
         substrate_ids = chebi["substrates"]
         product_ids = chebi["products"]
-        
+
         for i in substrate_ids:
             smiles = self.get_smiles_from_chebi_web(i)
-            
-            chebi_id = i.split('_')[-1]
+
+            chebi_id = i.split("_")[-1]
             chebi_id = f"CHEBI:{chebi_id}"
             substrate = Molecule.get_or_save(
                 chebi_id=chebi_id,
-                smiles = smiles,
+                smiles=smiles,
             )
             reaction.substrate.connect(substrate)
-        
+
         for i in product_ids:
             smiles = self.get_smiles_from_chebi_web(i)
 
-            chebi_id = i.split('_')[-1]
+            chebi_id = i.split("_")[-1]
             chebi_id = f"CHEBI:{chebi_id}"
             product = Molecule.get_or_save(
                 chebi_id=chebi_id,
-                smiles = smiles,
+                smiles=smiles,
             )
             reaction.product.connect(product)
 
diff --git a/src/pyeed/analysis/mutation_detection.py b/src/pyeed/analysis/mutation_detection.py
index 5c6809e8..c2562ae1 100644
--- a/src/pyeed/analysis/mutation_detection.py
+++ b/src/pyeed/analysis/mutation_detection.py
@@ -1,7 +1,6 @@
 from typing import Any, Optional
 
 from loguru import logger
-
 from pyeed.dbconnect import DatabaseConnector
 
 
diff --git a/src/pyeed/analysis/network_analysis.py b/src/pyeed/analysis/network_analysis.py
index fd354ebe..dd66b45c 100644
--- a/src/pyeed/analysis/network_analysis.py
+++ b/src/pyeed/analysis/network_analysis.py
@@ -66,57 +66,51 @@ def create_graph(
         base_query = """
         MATCH (n)
         """
-        
+
         # Add node filters
         node_filters = []
         if nodes:
             node_filters.append("labels(n)[0] IN $node_types")
         if ids:
             node_filters.append("n.accession_id IN $accession_ids")
-            
+
         if node_filters:
             base_query += "WHERE " + " AND ".join(node_filters)
-            
+
         # Add relationship pattern and filters
         base_query += """
         OPTIONAL MATCH (n)-[r]->(m)
         """
-        
+
         # Add relationship type filter if specified
         if relationships:
             base_query += "WHERE type(r) IN $relationships "
-            
+
         # Return both nodes and relationships in a single query
         base_query += """
         RETURN 
             collect(DISTINCT {id: ID(n), labels: labels(n), properties: properties(n)}) as nodes,
             collect(DISTINCT {source: ID(n), target: ID(m), type: type(r), properties: properties(r)}) as relationships
         """
-        
+
         logger.info("Executing combined query for nodes and relationships")
         results = self.db.execute_read(
             base_query,
-            {
-                "node_types": nodes,
-                "accession_ids": ids,
-                "relationships": relationships
-            }
+            {"node_types": nodes, "accession_ids": ids, "relationships": relationships},
         )
-        
+
         if not results or not results[0]:
             logger.warning("No results found in the database")
             return self.graph
-            
+
         # Process nodes
         nodes_data = results[0]["nodes"]
         for node in nodes_data:
             self.graph.add_node(
-                node["id"],
-                labels=node["labels"],
-                properties=node["properties"]
+                node["id"], labels=node["labels"], properties=node["properties"]
             )
         logger.info(f"Added {len(nodes_data)} nodes to the graph")
-        
+
         # Process relationships
         relationships_data = results[0]["relationships"]
         for rel in relationships_data:
@@ -125,10 +119,10 @@ def create_graph(
                     rel["source"],
                     rel["target"],
                     type=rel["type"],
-                    properties=rel["properties"]
+                    properties=rel["properties"],
                 )
         logger.info(f"Added {len(relationships_data)} relationships to the graph")
-        
+
         return self.graph
 
     def compute_degree_centrality(self) -> dict[Any, float]:
@@ -263,8 +257,8 @@ def calculate_positions_2d(
         filtered_graph.remove_edges_from(self_referential_edges)
 
         # Find isolated nodes
-        #isolated_nodes = self.find_isolated_nodes(filtered_graph)
-        #filtered_graph.remove_nodes_from(isolated_nodes)
+        # isolated_nodes = self.find_isolated_nodes(filtered_graph)
+        # filtered_graph.remove_nodes_from(isolated_nodes)
 
         # Use spring layout for force-directed graph
         weight_attr = attribute if attribute is not None else None
diff --git a/src/pyeed/analysis/sequence_alignment.py b/src/pyeed/analysis/sequence_alignment.py
index cb6acff4..440cbb1e 100644
--- a/src/pyeed/analysis/sequence_alignment.py
+++ b/src/pyeed/analysis/sequence_alignment.py
@@ -5,10 +5,9 @@
 from Bio.Align import PairwiseAligner as BioPairwiseAligner
 from Bio.Align.substitution_matrices import Array as BioSubstitutionMatrix
 from joblib import Parallel, cpu_count, delayed
-from rich.progress import Progress
-
 from pyeed.dbconnect import DatabaseConnector
 from pyeed.tools.utility import chunks
+from rich.progress import Progress
 
 
 class PairwiseAligner:
diff --git a/src/pyeed/dbconnect.py b/src/pyeed/dbconnect.py
index d208deec..8abcab52 100644
--- a/src/pyeed/dbconnect.py
+++ b/src/pyeed/dbconnect.py
@@ -227,8 +227,11 @@ def _get_driver(uri: str, user: str | None, password: str | None) -> Driver:
         Creates a new Neo4j driver instance.
         """
         auth = (user, password) if user and password else None
-        return GraphDatabase.driver(uri, auth=auth, connection_timeout=60,  # Increase initial connection timeout
-        max_connection_lifetime=86400,  # Keep connections alive longer
+        return GraphDatabase.driver(
+            uri,
+            auth=auth,
+            connection_timeout=60,  # Increase initial connection timeout
+            max_connection_lifetime=86400,  # Keep connections alive longer
         )
 
     @property
diff --git a/src/pyeed/embedding.py b/src/pyeed/embedding.py
index a0229385..28f66a1b 100644
--- a/src/pyeed/embedding.py
+++ b/src/pyeed/embedding.py
@@ -10,6 +10,7 @@
 from huggingface_hub import HfFolder, login
 from loguru import logger
 from numpy.typing import NDArray
+from torch.nn import DataParallel, Module
 from transformers import EsmModel, EsmTokenizer
 
 from pyeed.dbconnect import DatabaseConnector
@@ -32,7 +33,14 @@ def get_hf_token() -> str:
         raise RuntimeError("Failed to get Hugging Face token")
 
 
-def process_batches_on_gpu(data, batch_size, model, tokenizer, device, db):
+def process_batches_on_gpu(
+    data: list[tuple[str, str]],
+    batch_size: int,
+    model: Module,
+    tokenizer: EsmTokenizer,
+    db: DatabaseConnector,
+    device: torch.device,
+) -> None:
     """
     Splits data into batches and processes them on a single GPU.
 
@@ -88,8 +96,8 @@ def process_batches_on_gpu(data, batch_size, model, tokenizer, device, db):
 
 def load_model_and_tokenizer(
     model_name: str,
-    device: str,
-) -> Tuple[Any, Union[Any, None], str]:
+    device: torch.device,
+) -> Tuple[Any, Union[Any, None], torch.device]:
     """
     Loads the model and assigns it to a specific GPU.
 
@@ -125,7 +133,7 @@ def get_batch_embeddings(
     model: Union[
         EsmModel,
         ESMC,
-        torch.nn.DataParallel,
+        DataParallel[Module],
         ESM3InferenceClient,
         ESM3,
     ],
@@ -209,7 +217,9 @@ def get_batch_embeddings(
 
 
 def calculate_single_sequence_embedding_last_hidden_state(
-    sequence: str, model_name: str = "facebook/esm2_t33_650M_UR50D"
+    sequence: str,
+    device: torch.device,
+    model_name: str = "facebook/esm2_t33_650M_UR50D",
 ) -> NDArray[np.float64]:
     """
     Calculates an embedding for a single sequence.
@@ -221,12 +231,14 @@ def calculate_single_sequence_embedding_last_hidden_state(
     Returns:
         NDArray[np.float64]: Normalized embedding vector for the sequence
     """
-    model, tokenizer, device = load_model_and_tokenizer(model_name)
+    model, tokenizer, device = load_model_and_tokenizer(model_name, device)
     return get_single_embedding_last_hidden_state(sequence, model, tokenizer, device)
 
 
 def calculate_single_sequence_embedding_all_layers(
-    sequence: str, model_name: str = "facebook/esm2_t33_650M_UR50D"
+    sequence: str,
+    device: torch.device,
+    model_name: str = "facebook/esm2_t33_650M_UR50D",
 ) -> NDArray[np.float64]:
     """
     Calculates embeddings for a single sequence across all layers.
@@ -238,7 +250,7 @@ def calculate_single_sequence_embedding_all_layers(
     Returns:
         NDArray[np.float64]: A numpy array containing layer embeddings for the sequence.
     """
-    model, tokenizer, device = load_model_and_tokenizer(model_name)
+    model, tokenizer, device = load_model_and_tokenizer(model_name, device)
     return get_single_embedding_all_layers(sequence, model, tokenizer, device)
 
 
diff --git a/src/pyeed/main.py b/src/pyeed/main.py
index cf0daed2..1189fcb3 100644
--- a/src/pyeed/main.py
+++ b/src/pyeed/main.py
@@ -9,6 +9,7 @@
 
 from pyeed.adapter.ncbi_dna_mapper import NCBIDNAToPyeed
 from pyeed.adapter.ncbi_protein_mapper import NCBIProteinToPyeed
+from pyeed.adapter.ncbi_to_uniprot_mapper import NCBIToUniprotMapper
 from pyeed.adapter.primary_db_adapter import PrimaryDBAdapter
 from pyeed.adapter.uniprot_mapper import UniprotToPyeed
 from pyeed.dbchat import DBChat
@@ -190,14 +191,27 @@ def fetch_ncbi_nucleotide(self, ids: list[str]) -> None:
         nest_asyncio.apply()
         asyncio.get_event_loop().run_until_complete(adapter.execute_requests())
 
+    def database_id_mapper(self, ids: list[str], file: str) -> None:
+        """
+        Maps IDs from one database to another using the UniProt ID mapping service
+
+        Args:
+            ids (list[str]): List of IDs to map.
+        """
+
+        mapper = NCBIToUniprotMapper(ids, file)
+        mapper.execute_request()
+
+        nest_asyncio.apply()
+
     def calculate_sequence_embeddings(
         self,
         batch_size: int = 16,
         model_name: str = "facebook/esm2_t33_650M_UR50D",
-        num_gpus: int = None,  # Number of GPUs to use
-        ) -> None:
+        num_gpus: int = 1,  # Number of GPUs to use
+    ) -> None:
         """
-        Calculates embeddings for all sequences in the database that do not have embeddings, 
+        Calculates embeddings for all sequences in the database that do not have embeddings,
         distributing the workload across available GPUs.
 
         Args:
@@ -215,7 +229,12 @@ def calculate_sequence_embeddings(
             logger.warning("No GPU available! Running on CPU.")
 
         # Load separate models for each GPU
-        devices = [f"cuda:{i}" for i in range(num_gpus)] if num_gpus > 0 else ["cpu"]
+        devices = (
+            [torch.device(f"cuda:{i}") for i in range(num_gpus)]
+            if num_gpus > 0
+            else [torch.device("cpu")]
+        )
+
         models_and_tokenizers = [
             load_model_and_tokenizer(model_name, device) for device in devices
         ]
@@ -228,18 +247,19 @@ def calculate_sequence_embeddings(
         """
         results = self.db.execute_read(query)
         data = [(result["accession"], result["sequence"]) for result in results]
-        
+
         if not data:
             logger.info("No sequences to process.")
             return
-        
+
         accessions, sequences = zip(*data)
         total_sequences = len(sequences)
         logger.debug(f"Total sequences to process: {total_sequences}")
 
         # Split the data into num_gpus chunks
         gpu_batches = [
-            list(zip(accessions[i::num_gpus], sequences[i::num_gpus])) for i in range(num_gpus)
+            list(zip(accessions[i::num_gpus], sequences[i::num_gpus]))
+            for i in range(num_gpus)
         ]
 
         start_time = time.time()
@@ -259,17 +279,18 @@ def calculate_sequence_embeddings(
                         batch_size,
                         model,
                         tokenizer,
+                        self.db,
                         device,
-                        self.db
                     )
                 )
-            
+
             for future in futures:
                 future.result()  # Wait for all threads to complete
 
-
         end_time = time.time()
-        logger.info(f"Total embedding calculation time: {end_time - start_time:.2f} seconds")
+        logger.info(
+            f"Total embedding calculation time: {end_time - start_time:.2f} seconds"
+        )
 
         # Cleanup
         for model, _, _ in models_and_tokenizers:
diff --git a/src/pyeed/model.py b/src/pyeed/model.py
index 95f9ffce..5a3bf188 100644
--- a/src/pyeed/model.py
+++ b/src/pyeed/model.py
@@ -1,5 +1,5 @@
 from enum import Enum
-from typing import Any
+from typing import Any, cast
 
 # from pyeed.nodes_and_relations import StrictStructuredNode
 from neomodel import (
@@ -112,13 +112,12 @@ def save(self, *args: Any, **kwargs: Any) -> None:
                 elif isinstance(base_property, FloatProperty):
                     if not all(isinstance(item, float) for item in prop):
                         raise TypeError(f"All items in '{field}' must be floats")
-                    
-            #Validate BoleanProperty
+
+            # Validate BoleanProperty
             elif isinstance(neo_type, BooleanProperty) and not isinstance(prop, bool):
                 raise TypeError(
                     f"Expected a boolean for '{field}', got {type(prop).__name__}"
                 )
-                
 
         super().save(*args, **kwargs)  # Don't return the result
 
@@ -154,6 +153,22 @@ class Organism(StrictStructuredNode):
     taxonomy_id = IntegerProperty(required=True, unique_index=True)
     name = StringProperty()
 
+    @classmethod
+    def get_or_save(cls, **kwargs: Any) -> "Organism":
+        taxonomy_id = kwargs.get("taxonomy_id")
+        name = kwargs.get("name")
+        try:
+            organism = cast(Organism, cls.nodes.get(taxonomy_id=taxonomy_id))
+            return organism
+        except cls.DoesNotExist:
+            try:
+                organism = cls(taxonomy_id=taxonomy_id, name=name)
+                organism.save()
+                return organism
+            except Exception as e:
+                print(f"Error during saving of the organism: {e}")
+                raise
+
 
 class Mutation(StructuredRel):  # type: ignore
     """A relationship representing mutations between two sequences."""
@@ -381,33 +396,35 @@ class Reaction(StrictStructuredNode):
     """
     A node representing a reaction.
     """
-    
+
     rhea_id = StringProperty(unique_index=True, required=True)
     chebi_id = ArrayProperty(StringProperty())
 
     # Relationships
     substrate = RelationshipTo("Molecule", "SUBSTRATE")
     product = RelationshipTo("Molecule", "PRODUCT")
-    
-    
+
     @property
     def label(self) -> str:
         """The label of the reaction."""
-        return {self.rhea_id}
+        return f"{self.rhea_id}"
+
 
 class Molecule(StrictStructuredNode):
     """
     A node representing a molecule in the database.
     """
-    
+
     chebi_id = StringProperty(unique_index=True, required=True)
     rhea_compound_id = StringProperty()
     smiles = StringProperty()
 
     @classmethod
-    def get_or_save(cls, chebi_id, smiles) -> "Molecule":
+    def get_or_save(cls, **kwargs: Any) -> "Molecule":
+        chebi_id = kwargs.get("chebi_id")
+        smiles = kwargs.get("smiles")
         try:
-            molecule = cls.nodes.get(chebi_id=chebi_id)
+            molecule = cast(Molecule, cls.nodes.get(chebi_id=chebi_id))
             return molecule
         except cls.DoesNotExist:
             try:
@@ -417,13 +434,13 @@ def get_or_save(cls, chebi_id, smiles) -> "Molecule":
             except Exception as e:
                 print(f"Error during saving of the molecule: {e}")
                 raise
-    
-    @property 
+
+    @property
     def label(self) -> str:
         """The label of the molecule."""
-        return {self.chebi_id}
-    
-    
+        return f"{self.chebi_id}"
+
+
 class StandardNumbering(StrictStructuredNode):
     name = StringProperty(required=True, unique_index=True)
     definition = StringProperty(required=True)

	subject_id	identity	alignment_length	mismatches	gap_opens	query_start	query_end	subject_start	subject_end	evalue	bit_score
0	seq7	81.818	22	3	1	31	51	11	32	0.003	22.3
1	seq1	100.000	25	0	0	1	25	1	25	0.004	22.3
2	seq2	61.538	26	10	0	20	45	5	30	0.038	19.2