PyEED · haeussma · May 4, 2025 · Mar 11, 2025 · Mar 13, 2025 · Mar 14, 2025
diff --git a/docs/usage/blast.ipynb b/docs/usage/blast.ipynb
diff --git a/docs/usage/clustalo.ipynb b/docs/usage/clustalo.ipynb
diff --git a/docs/usage/embeddings_analysis.ipynb b/docs/usage/embeddings_analysis.ipynb
@@ -24,9 +24,10 @@
    "source": [
     "import sys\n",
     "\n",
-    "from loguru import logger\n",
-    "import pandas as pd\n",
     "import matplotlib.pyplot as plt\n",
+    "import pandas as pd\n",
+    "from loguru import logger\n",
+    "\n",
     "from pyeed import Pyeed\n",
     "from pyeed.analysis.embedding_analysis import EmbeddingTool\n",
     "\n",

diff --git a/docs/usage/mmseqs.ipynb b/docs/usage/mmseqs.ipynb
@@ -20,6 +20,7 @@
             "outputs": [],
             "source": [
                 "from pyeed import Pyeed\n",
+                "from pyeed.model import Protein\n",
                 "from pyeed.tools.mmseqs import MMSeqs"
             ]
         },
@@ -134,8 +135,6 @@
                 "pyeed = Pyeed(uri=\"bolt://localhost:7687\", user=\"neo4j\", password=\"12345678\")\n",
                 "\n",
                 "# Get first 100 protein IDs from database\n",
-                "from pyeed.model import Protein\n",
-                "\n",
                 "accession_ids = [protein.accession_id for protein in Protein.nodes.all()][:100]\n",
                 "\n",
                 "# Cluster sequences\n",

diff --git a/docs/usage/mutation_analysis.ipynb b/docs/usage/mutation_analysis.ipynb
@@ -11,11 +11,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
     "import sys\n",
+    "\n",
     "from loguru import logger\n",
     "\n",
     "from pyeed import Pyeed\n",
@@ -37,7 +38,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -56,7 +57,7 @@
     "\n",
     "eedb = Pyeed(uri, user=user, password=password)\n",
     "\n",
-    "eedb.db.wipe_database(date=\"2025-03-14\")"
+    "eedb.db.wipe_database(date=\"2025-03-19\")"
    ]
   },
   {
@@ -75,14 +76,15 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
     "ids = [\"AAM15527.1\", \"AAF05614.1\", \"AFN21551.1\", \"CAA76794.1\", \"AGQ50511.1\"]\n",
     "\n",
     "eedb.fetch_from_primary_db(ids, db=\"ncbi_protein\")\n",
-    "eedb.fetch_dna_entries_for_proteins()"
+    "eedb.fetch_dna_entries_for_proteins()\n",
+    "eedb.create_coding_sequences_regions()"
    ]
   },
   {
@@ -100,9 +102,42 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 12,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "6ed852d438ab480fa4d1c6129eacfd26",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Output()"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Region ids: [143, 129, 128, 69, 9]\n",
+      "len of ids: 5\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
+      ],
+      "text/plain": []
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "sn_protein = StandardNumberingTool(name=\"test_standard_numbering_protein\")\n",
     "\n",
@@ -111,11 +146,22 @@
     "    base_sequence_id=\"AAM15527.1\", db=eedb.db, list_of_seq_ids=ids\n",
     ")\n",
     "\n",
-    "sn_dna = StandardNumberingTool(name=\"test_standard_numbering_dna\")\n",
+    "sn_dna = StandardNumberingTool(name=\"test_standard_numbering_dna_pairwise\")\n",
     "\n",
-    "sn_dna.apply_standard_numbering(\n",
-    "    base_sequence_id=\"AF190695.1\", db=eedb.db, node_type=\"DNA\"\n",
-    ")\n"
+    "query_get_region_ids = \"\"\"\n",
+    "MATCH (p:Protein)<-[rel:ENCODES]-(d:DNA)-[rel2:HAS_REGION]->(r:Region)\n",
+    "WHERE r.annotation = $region_annotation AND p.accession_id IN $protein_id\n",
+    "RETURN id(r)\n",
+    "\"\"\"\n",
+    "\n",
+    "region_ids = eedb.db.execute_read(query_get_region_ids, parameters={\"protein_id\": ids, \"region_annotation\": \"coding sequence\"})\n",
+    "region_ids = [id['id(r)'] for id in region_ids]\n",
+    "print(f\"Region ids: {region_ids}\")\n",
+    "print(f\"len of ids: {len(ids)}\")\n",
+    "\n",
+    "sn_dna.apply_standard_numbering_pairwise(\n",
+    "    base_sequence_id=\"AF190695.1\", db=eedb.db, node_type=\"DNA\", region_ids_neo4j=region_ids\n",
+    ")"
    ]
   },
   {
@@ -136,7 +182,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -153,18 +199,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
     "md = MutationDetection()\n",
     "\n",
+    "\n",
     "seq1 = \"AF190695.1\"\n",
     "seq2 = \"JX042489.1\"\n",
-    "name_of_standard_numbering_tool = \"test_standard_numbering_dna\"\n",
+    "name_of_standard_numbering_tool = \"test_standard_numbering_dna_pairwise\"\n",
     "\n",
     "mutations_dna = md.get_mutations_between_sequences(\n",
-    "    seq1, seq2, eedb.db, name_of_standard_numbering_tool, node_type=\"DNA\"\n",
+    "    seq1, seq2, eedb.db, name_of_standard_numbering_tool, node_type=\"DNA\", region_ids_neo4j=region_ids\n",
     ")"
    ]
   },
@@ -183,14 +230,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "{'from_positions': [241, 272, 125], 'to_positions': [241, 272, 125], 'from_monomers': ['R', 'D', 'V'], 'to_monomers': ['S', 'N', 'I']}\n"
+      "{'from_positions': [241, 125, 272], 'to_positions': [241, 125, 272], 'from_monomers': ['R', 'V', 'D'], 'to_monomers': ['S', 'I', 'N']}\n"
      ]
     }
    ],
@@ -216,29 +263,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Mutation on position 682 -> 615 with a nucleotide change of T -> C\n",
-      "Mutation on position 407 -> 340 with a nucleotide change of C -> A\n",
-      "Mutation on position 92 -> 25 with a nucleotide change of C -> A\n",
-      "Mutation on position 162 -> 95 with a nucleotide change of G -> T\n",
-      "Mutation on position 929 -> 862 with a nucleotide change of A -> C\n",
-      "Mutation on position 346 -> 279 with a nucleotide change of A -> G\n",
-      "Mutation on position 87 -> 20 with a nucleotide change of C -> A\n",
-      "Mutation on position 88 -> 21 with a nucleotide change of T -> C\n",
-      "Mutation on position 130 -> 63 with a nucleotide change of C -> T\n",
-      "Mutation on position 175 -> 108 with a nucleotide change of G -> A\n",
-      "Mutation on position 131 -> 64 with a nucleotide change of T -> C\n",
-      "Mutation on position 132 -> 65 with a nucleotide change of A -> T\n",
-      "Mutation on position 914 -> 847 with a nucleotide change of G -> A\n",
-      "Mutation on position 604 -> 537 with a nucleotide change of T -> G\n",
-      "Mutation on position 925 -> 858 with a nucleotide change of G -> A\n",
-      "Mutation on position 226 -> 159 with a nucleotide change of T -> C\n"
+      "Mutation on position 705 -> 705 with a nucleotide change of G -> A\n",
+      "Mutation on position 395 -> 395 with a nucleotide change of T -> G\n",
+      "Mutation on position 137 -> 137 with a nucleotide change of A -> G\n",
+      "Mutation on position 17 -> 17 with a nucleotide change of T -> C\n",
+      "Mutation on position 473 -> 473 with a nucleotide change of T -> C\n",
+      "Mutation on position 716 -> 716 with a nucleotide change of G -> A\n",
+      "Mutation on position 720 -> 720 with a nucleotide change of A -> C\n",
+      "Mutation on position 198 -> 198 with a nucleotide change of C -> A\n"
      ]
     }
    ],

diff --git a/docs/usage/network_analysis.ipynb b/docs/usage/network_analysis.ipynb
@@ -11,6 +11,7 @@
     "import matplotlib.pyplot as plt\n",
     "import networkx as nx\n",
     "from loguru import logger\n",
+    "\n",
     "from pyeed import Pyeed\n",
     "from pyeed.analysis.network_analysis import NetworkAnalysis\n",
     "from pyeed.analysis.sequence_alignment import PairwiseAligner\n",