PyEED · haeussma · May 8, 2025 · May 1, 2025 · May 1, 2025 · May 1, 2025
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -10,7 +10,7 @@ jobs:
       - name: Set up Python
         uses: actions/setup-python@v4
         with:
-          python-version: "3.x"
+          python-version: "3.12"
 
       - name: Install dependencies
         run: |

diff --git a/docs/usage/mutation_analysis.ipynb b/docs/usage/mutation_analysis.ipynb
@@ -166,13 +166,19 @@
     "RETURN id(r)\n",
     "\"\"\"\n",
     "\n",
-    "region_ids = eedb.db.execute_read(query_get_region_ids, parameters={\"protein_id\": ids, \"region_annotation\": \"coding sequence\"})\n",
-    "region_ids = [id['id(r)'] for id in region_ids]\n",
+    "region_ids = eedb.db.execute_read(\n",
+    "    query_get_region_ids,\n",
+    "    parameters={\"protein_id\": ids, \"region_annotation\": \"coding sequence\"},\n",
+    ")\n",
+    "region_ids = [id[\"id(r)\"] for id in region_ids]\n",
     "print(f\"Region ids: {region_ids}\")\n",
     "print(f\"len of ids: {len(ids)}\")\n",
     "\n",
     "sn_dna.apply_standard_numbering_pairwise(\n",
-    "    base_sequence_id=\"AF190695.1\", db=eedb.db, node_type=\"DNA\", region_ids_neo4j=region_ids\n",
+    "    base_sequence_id=\"AF190695.1\",\n",
+    "    db=eedb.db,\n",
+    "    node_type=\"DNA\",\n",
+    "    region_ids_neo4j=region_ids,\n",
     ")"
    ]
   },
@@ -223,7 +229,12 @@
     "name_of_standard_numbering_tool = \"test_standard_numbering_dna_pairwise\"\n",
     "\n",
     "mutations_dna = md.get_mutations_between_sequences(\n",
-    "    seq1, seq2, eedb.db, name_of_standard_numbering_tool, node_type=\"DNA\", region_ids_neo4j=region_ids\n",
+    "    seq1,\n",
+    "    seq2,\n",
+    "    eedb.db,\n",
+    "    name_of_standard_numbering_tool,\n",
+    "    node_type=\"DNA\",\n",
+    "    region_ids_neo4j=region_ids,\n",
     ")"
    ]
   },
@@ -306,8 +317,10 @@
     }
    ],
    "source": [
-    "for i in range(len(mutations_dna['from_positions'])):\n",
-    "    print(f\"Mutation on position {mutations_dna['from_positions'][i]} -> {mutations_dna['to_positions'][i]} with a nucleotide change of {mutations_dna['from_monomers'][i]} -> {mutations_dna['to_monomers'][i]}\")"
+    "for i in range(len(mutations_dna[\"from_positions\"])):\n",
+    "    print(\n",
+    "        f\"Mutation on position {mutations_dna['from_positions'][i]} -> {mutations_dna['to_positions'][i]} with a nucleotide change of {mutations_dna['from_monomers'][i]} -> {mutations_dna['to_monomers'][i]}\"\n",
+    "    )"
    ]
   },
   {

diff --git a/docs/usage/standard_numbering.ipynb b/docs/usage/standard_numbering.ipynb
@@ -96,7 +96,7 @@
     "eedb = Pyeed(uri, user=user, password=password)\n",
     "eedb.db.wipe_database(date=\"2025-03-19\")\n",
     "\n",
-    "eedb.db.initialize_db_constraints(user=user, password=password)\n"
+    "eedb.db.initialize_db_constraints(user=user, password=password)"
    ]
   },
   {
@@ -148,7 +148,7 @@
     "\n",
     "sn.apply_standard_numbering_pairwise(\n",
     "    base_sequence_id=\"AAM15527.1\", db=eedb.db, list_of_seq_ids=ids[0:5]\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -184,7 +184,7 @@
    "source": [
     "sn.apply_standard_numbering_pairwise(\n",
     "    base_sequence_id=\"AAM15527.1\", db=eedb.db, list_of_seq_ids=ids\n",
-    ")\n"
+    ")"
    ]
   },
   {
@@ -290,7 +290,9 @@
     }
    ],
    "source": [
-    "sn_dna_region = StandardNumberingTool(name=\"test_standard_numbering_dna_pairwise_region\")\n",
+    "sn_dna_region = StandardNumberingTool(\n",
+    "    name=\"test_standard_numbering_dna_pairwise_region\"\n",
+    ")\n",
     "\n",
     "\n",
     "ids = [\"AAM15527.1\", \"AAF05614.1\", \"AFN21551.1\", \"CAA76794.1\", \"AGQ50511.1\"]\n",
@@ -302,14 +304,20 @@
     "RETURN id(r)\n",
     "\"\"\"\n",
     "\n",
-    "region_ids = eedb.db.execute_read(query_get_region_ids, parameters={\"protein_id\": ids, \"region_annotation\": \"coding sequence\"})\n",
-    "region_ids = [id['id(r)'] for id in region_ids]\n",
+    "region_ids = eedb.db.execute_read(\n",
+    "    query_get_region_ids,\n",
+    "    parameters={\"protein_id\": ids, \"region_annotation\": \"coding sequence\"},\n",
+    ")\n",
+    "region_ids = [id[\"id(r)\"] for id in region_ids]\n",
     "print(f\"Region ids: {region_ids}\")\n",
     "print(f\"len of ids: {len(ids)}\")\n",
     "\n",
     "\n",
     "sn_dna_region.apply_standard_numbering_pairwise(\n",
-    "    base_sequence_id=\"AF190695.1\", db=eedb.db, node_type=\"DNA\", region_ids_neo4j=region_ids\n",
+    "    base_sequence_id=\"AF190695.1\",\n",
+    "    db=eedb.db,\n",
+    "    node_type=\"DNA\",\n",
+    "    region_ids_neo4j=region_ids,\n",
     ")"
    ]
   },

diff --git a/src/pyeed/analysis/mutation_detection.py b/src/pyeed/analysis/mutation_detection.py
@@ -17,23 +17,24 @@ def get_sequence_data(
         node_type: str = "Protein",
         region_ids_neo4j: Optional[list[int]] = None,
     ) -> tuple[dict[str, str], dict[str, list[str]]]:
-        """Fetch sequence and position data for two sequences from the database.
+        """
+        Fetch sequence and standard numbering position data for two sequences from the database.
 
         Args:
-            sequence_id1: First sequence accession ID
-            sequence_id2: Second sequence accession ID
-            db: Database connection instance
-            standard_numbering_tool_name: Name of standard numbering tool to use
-            node_type: Type of node to use (default: "Protein")
-            region_ids_neo4j: List of region IDs for the sequence cuting based on region_based_sequence.
+            sequence_id1 (str): Accession ID of the first sequence.
+            sequence_id2 (str): Accession ID of the second sequence.
+            db (DatabaseConnector): Database connection instance.
+            standard_numbering_tool_name (str): Name of the standard numbering tool to use.
+            node_type (str, optional): Type of node to use (default: "Protein").
+            region_ids_neo4j (Optional[list[int]], optional): List of region IDs for region-based sequence extraction.
 
         Returns:
-            tuple containing:
-                - dict[str, str]: Mapping of sequence IDs to sequences
-                - dict[str, list[str]]: Mapping of sequence IDs to position lists
+            tuple[dict[str, str], dict[str, list[str]]]:
+                - Mapping of sequence IDs to sequences.
+                - Mapping of sequence IDs to position lists.
 
         Raises:
-            ValueError: If standard numbering positions not found for both sequences
+            ValueError: If standard numbering positions are not found for both sequences.
         """
         if region_ids_neo4j is not None:
             query = f"""
@@ -84,20 +85,21 @@ def find_mutations(
         pos1: list[str],
         pos2: list[str],
     ) -> dict[str, Any]:
-        """Compare two sequences and identify mutations between them.
+        """
+        Compare two sequences and identify mutations between them using standard numbering positions.
 
         Args:
-            seq1: First amino acid sequence
-            seq2: Second amino acid sequence
-            pos1: Standard numbering positions for first sequence
-            pos2: Standard numbering positions for second sequence
+            seq1 (str): First amino acid sequence.
+            seq2 (str): Second amino acid sequence.
+            pos1 (list[str]): Standard numbering positions for the first sequence.
+            pos2 (list[str]): Standard numbering positions for the second sequence.
 
         Returns:
-            dict containing mutation information:
-                - from_positions: List[int] - Source positions (1-based)
-                - to_positions: List[int] - Target positions (1-based)
-                - from_monomers: List[str] - Source amino acids
-                - to_monomers: List[str] - Target amino acids
+            dict[str, Any]: Dictionary containing mutation information:
+                - from_positions (List[int]): Source positions (1-based).
+                - to_positions (List[int]): Target positions (1-based).
+                - from_monomers (List[str]): Source amino acids.
+                - to_monomers (List[str]): Target amino acids.
         """
         pos_to_idx1 = {pos: idx for idx, pos in enumerate(pos1)}
         pos_to_idx2 = {pos: idx for idx, pos in enumerate(pos2)}
@@ -134,19 +136,20 @@ def save_mutations_to_db(
         node_type: str = "Protein",
         region_ids_neo4j: Optional[list[int]] = None,
     ) -> None:
-        """Save detected mutations to the database.
+        """
+        Save detected mutations to the database as relationships between nodes.
 
         Args:
-            mutations: Dictionary containing mutation information:
-                - from_positions: List[int] - Source positions
-                - to_positions: List[int] - Target positions
-                - from_monomers: List[str] - Source amino acids
-                - to_monomers: List[str] - Target amino acids
-            db: Database connection instance
-            sequence_id1: First sequence accession ID
-            sequence_id2: Second sequence accession ID
-            node_type: Type of node to use (default: "Protein")
-            region_ids_neo4j: List of region IDs for the sequence cuting based on region_based_sequence.
+            mutations (dict[str, list[int | str]]): Dictionary containing mutation information:
+                - from_positions (List[int]): Source positions.
+                - to_positions (List[int]): Target positions.
+                - from_monomers (List[str]): Source amino acids.
+                - to_monomers (List[str]): Target amino acids.
+            db (DatabaseConnector): Database connection instance.
+            sequence_id1 (str): Accession ID of the first sequence.
+            sequence_id2 (str): Accession ID of the second sequence.
+            node_type (str, optional): Type of node to use (default: "Protein").
+            region_ids_neo4j (Optional[list[int]], optional): List of region IDs for region-based sequence extraction.
         """
         # Check if a mutation relationship already exists between these proteins
         if region_ids_neo4j is not None:
@@ -237,30 +240,30 @@ def get_mutations_between_sequences(
         db: DatabaseConnector,
         standard_numbering_tool_name: str,
         save_to_db: bool = True,
-        debug: bool = False,
         node_type: str = "Protein",
         region_ids_neo4j: Optional[list[int]] = None,
     ) -> dict[str, list[int | str]]:
-        """Get mutations between two sequences using standard numbering.
+        """
+        Get mutations between two sequences using standard numbering and optionally save them to the database.
 
         Args:
-            sequence_id1: First sequence accession ID
-            sequence_id2: Second sequence accession ID
-            db: Database connection instance
-            standard_numbering_tool_name: Name of standard numbering tool to use
-            save_to_db: Whether to save mutations to database (default: True)
-            node_type: Type of node to use (default: "Protein")
-            region_ids_neo4j: List of region IDs for the sequence cuting based on region_based_sequence.
+            sequence_id1 (str): Accession ID of the first sequence.
+            sequence_id2 (str): Accession ID of the second sequence.
+            db (DatabaseConnector): Database connection instance.
+            standard_numbering_tool_name (str): Name of the standard numbering tool to use.
+            save_to_db (bool, optional): Whether to save mutations to the database (default: True).
+            node_type (str, optional): Type of node to use (default: "Protein").
+            region_ids_neo4j (Optional[list[int]], optional): List of region IDs for region-based sequence extraction.
 
         Returns:
-            dict containing mutation information:
-                - from_positions: List[int] - Source positions (1-based)
-                - to_positions: List[int] - Target positions (1-based)
-                - from_monomers: List[str] - Source amino acids
-                - to_monomers: List[str] - Target amino acids
+            dict[str, list[int | str]]: Dictionary containing mutation information:
+                - from_positions (List[int]): Source positions (1-based).
+                - to_positions (List[int]): Target positions (1-based).
+                - from_monomers (List[str]): Source amino acids.
+                - to_monomers (List[str]): Target amino acids.
 
         Raises:
-            ValueError: If standard numbering positions not found for both sequences
+            ValueError: If standard numbering positions are not found for both sequences.
         """
         sequences, positions = self.get_sequence_data(
             sequence_id1,
@@ -271,8 +274,7 @@ def get_mutations_between_sequences(
             region_ids_neo4j,
         )
 
-        if debug:
-            logger.info(f"Debug mode output: {sequences} and {positions}")
+        logger.debug(f"Debug mode output: {sequences} and {positions}")
 
         mutations = self.find_mutations(
             sequences[sequence_id1],

diff --git a/src/pyeed/tools/resources/alphafold/docker_run.py b/src/pyeed/tools/resources/alphafold/docker_run.py
@@ -214,7 +214,7 @@ def main(argv):  # type: ignore
         mount, target_path = _create_mount(f"fasta_path_{i}", fasta_path)
         mounts.append(mount)
         target_fasta_paths.append(target_path)
-    command_args.append(f'--fasta_paths={",".join(target_fasta_paths)}')
+    command_args.append(f"--fasta_paths={','.join(target_fasta_paths)}")
 
     database_paths = [
         ("uniref90_database_path", uniref90_database_path),