From 00af80e569c89864f7662eea94b281258013af7d Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Thu, 1 May 2025 13:56:58 +0000 Subject: [PATCH 1/7] mutation detection doc strings update --- src/pyeed/analysis/mutation_detection.py | 98 +++++++++++++----------- 1 file changed, 52 insertions(+), 46 deletions(-) diff --git a/src/pyeed/analysis/mutation_detection.py b/src/pyeed/analysis/mutation_detection.py index c2562ae1..f5522ecf 100644 --- a/src/pyeed/analysis/mutation_detection.py +++ b/src/pyeed/analysis/mutation_detection.py @@ -1,6 +1,7 @@ from typing import Any, Optional from loguru import logger + from pyeed.dbconnect import DatabaseConnector @@ -17,23 +18,24 @@ def get_sequence_data( node_type: str = "Protein", region_ids_neo4j: Optional[list[int]] = None, ) -> tuple[dict[str, str], dict[str, list[str]]]: - """Fetch sequence and position data for two sequences from the database. + """ + Fetch sequence and standard numbering position data for two sequences from the database. Args: - sequence_id1: First sequence accession ID - sequence_id2: Second sequence accession ID - db: Database connection instance - standard_numbering_tool_name: Name of standard numbering tool to use - node_type: Type of node to use (default: "Protein") - region_ids_neo4j: List of region IDs for the sequence cuting based on region_based_sequence. + sequence_id1 (str): Accession ID of the first sequence. + sequence_id2 (str): Accession ID of the second sequence. + db (DatabaseConnector): Database connection instance. + standard_numbering_tool_name (str): Name of the standard numbering tool to use. + node_type (str, optional): Type of node to use (default: "Protein"). + region_ids_neo4j (Optional[list[int]], optional): List of region IDs for region-based sequence extraction. Returns: - tuple containing: - - dict[str, str]: Mapping of sequence IDs to sequences - - dict[str, list[str]]: Mapping of sequence IDs to position lists + tuple[dict[str, str], dict[str, list[str]]]: + - Mapping of sequence IDs to sequences. + - Mapping of sequence IDs to position lists. Raises: - ValueError: If standard numbering positions not found for both sequences + ValueError: If standard numbering positions are not found for both sequences. """ if region_ids_neo4j is not None: query = f""" @@ -84,20 +86,21 @@ def find_mutations( pos1: list[str], pos2: list[str], ) -> dict[str, Any]: - """Compare two sequences and identify mutations between them. + """ + Compare two sequences and identify mutations between them using standard numbering positions. Args: - seq1: First amino acid sequence - seq2: Second amino acid sequence - pos1: Standard numbering positions for first sequence - pos2: Standard numbering positions for second sequence + seq1 (str): First amino acid sequence. + seq2 (str): Second amino acid sequence. + pos1 (list[str]): Standard numbering positions for the first sequence. + pos2 (list[str]): Standard numbering positions for the second sequence. Returns: - dict containing mutation information: - - from_positions: List[int] - Source positions (1-based) - - to_positions: List[int] - Target positions (1-based) - - from_monomers: List[str] - Source amino acids - - to_monomers: List[str] - Target amino acids + dict[str, Any]: Dictionary containing mutation information: + - from_positions (List[int]): Source positions (1-based). + - to_positions (List[int]): Target positions (1-based). + - from_monomers (List[str]): Source amino acids. + - to_monomers (List[str]): Target amino acids. """ pos_to_idx1 = {pos: idx for idx, pos in enumerate(pos1)} pos_to_idx2 = {pos: idx for idx, pos in enumerate(pos2)} @@ -134,19 +137,20 @@ def save_mutations_to_db( node_type: str = "Protein", region_ids_neo4j: Optional[list[int]] = None, ) -> None: - """Save detected mutations to the database. + """ + Save detected mutations to the database as relationships between nodes. Args: - mutations: Dictionary containing mutation information: - - from_positions: List[int] - Source positions - - to_positions: List[int] - Target positions - - from_monomers: List[str] - Source amino acids - - to_monomers: List[str] - Target amino acids - db: Database connection instance - sequence_id1: First sequence accession ID - sequence_id2: Second sequence accession ID - node_type: Type of node to use (default: "Protein") - region_ids_neo4j: List of region IDs for the sequence cuting based on region_based_sequence. + mutations (dict[str, list[int | str]]): Dictionary containing mutation information: + - from_positions (List[int]): Source positions. + - to_positions (List[int]): Target positions. + - from_monomers (List[str]): Source amino acids. + - to_monomers (List[str]): Target amino acids. + db (DatabaseConnector): Database connection instance. + sequence_id1 (str): Accession ID of the first sequence. + sequence_id2 (str): Accession ID of the second sequence. + node_type (str, optional): Type of node to use (default: "Protein"). + region_ids_neo4j (Optional[list[int]], optional): List of region IDs for region-based sequence extraction. """ # Check if a mutation relationship already exists between these proteins if region_ids_neo4j is not None: @@ -241,26 +245,28 @@ def get_mutations_between_sequences( node_type: str = "Protein", region_ids_neo4j: Optional[list[int]] = None, ) -> dict[str, list[int | str]]: - """Get mutations between two sequences using standard numbering. + """ + Get mutations between two sequences using standard numbering and optionally save them to the database. Args: - sequence_id1: First sequence accession ID - sequence_id2: Second sequence accession ID - db: Database connection instance - standard_numbering_tool_name: Name of standard numbering tool to use - save_to_db: Whether to save mutations to database (default: True) - node_type: Type of node to use (default: "Protein") - region_ids_neo4j: List of region IDs for the sequence cuting based on region_based_sequence. + sequence_id1 (str): Accession ID of the first sequence. + sequence_id2 (str): Accession ID of the second sequence. + db (DatabaseConnector): Database connection instance. + standard_numbering_tool_name (str): Name of the standard numbering tool to use. + save_to_db (bool, optional): Whether to save mutations to the database (default: True). + debug (bool, optional): If True, print debug information (default: False). + node_type (str, optional): Type of node to use (default: "Protein"). + region_ids_neo4j (Optional[list[int]], optional): List of region IDs for region-based sequence extraction. Returns: - dict containing mutation information: - - from_positions: List[int] - Source positions (1-based) - - to_positions: List[int] - Target positions (1-based) - - from_monomers: List[str] - Source amino acids - - to_monomers: List[str] - Target amino acids + dict[str, list[int | str]]: Dictionary containing mutation information: + - from_positions (List[int]): Source positions (1-based). + - to_positions (List[int]): Target positions (1-based). + - from_monomers (List[str]): Source amino acids. + - to_monomers (List[str]): Target amino acids. Raises: - ValueError: If standard numbering positions not found for both sequences + ValueError: If standard numbering positions are not found for both sequences. """ sequences, positions = self.get_sequence_data( sequence_id1, From d465fee816e8c653b078d2bed41770e54aacd3ed Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Thu, 1 May 2025 13:58:16 +0000 Subject: [PATCH 2/7] removed debug option --- src/pyeed/analysis/mutation_detection.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/pyeed/analysis/mutation_detection.py b/src/pyeed/analysis/mutation_detection.py index f5522ecf..a9881810 100644 --- a/src/pyeed/analysis/mutation_detection.py +++ b/src/pyeed/analysis/mutation_detection.py @@ -241,7 +241,6 @@ def get_mutations_between_sequences( db: DatabaseConnector, standard_numbering_tool_name: str, save_to_db: bool = True, - debug: bool = False, node_type: str = "Protein", region_ids_neo4j: Optional[list[int]] = None, ) -> dict[str, list[int | str]]: @@ -254,7 +253,6 @@ def get_mutations_between_sequences( db (DatabaseConnector): Database connection instance. standard_numbering_tool_name (str): Name of the standard numbering tool to use. save_to_db (bool, optional): Whether to save mutations to the database (default: True). - debug (bool, optional): If True, print debug information (default: False). node_type (str, optional): Type of node to use (default: "Protein"). region_ids_neo4j (Optional[list[int]], optional): List of region IDs for region-based sequence extraction. @@ -277,8 +275,7 @@ def get_mutations_between_sequences( region_ids_neo4j, ) - if debug: - logger.info(f"Debug mode output: {sequences} and {positions}") + logger.debug(f"Debug mode output: {sequences} and {positions}") mutations = self.find_mutations( sequences[sequence_id1], From aa883926abc9920d95c119734382bed110a115bd Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Thu, 1 May 2025 14:04:47 +0000 Subject: [PATCH 3/7] fixed ruff import --- src/pyeed/analysis/embedding_analysis.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/pyeed/analysis/embedding_analysis.py b/src/pyeed/analysis/embedding_analysis.py index fa9d6c0e..ff4364ca 100644 --- a/src/pyeed/analysis/embedding_analysis.py +++ b/src/pyeed/analysis/embedding_analysis.py @@ -6,9 +6,10 @@ import scipy.spatial as sp from matplotlib.figure import Figure from numpy.typing import NDArray -from pyeed.dbconnect import DatabaseConnector from scipy.spatial.distance import cosine +from pyeed.dbconnect import DatabaseConnector + logger = logging.getLogger(__name__) From 8cc5f2ad2a40f6819662e28555cca04dbc500d27 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Thu, 1 May 2025 14:12:26 +0000 Subject: [PATCH 4/7] fixed uasge ruff errors --- docs/usage/blast.ipynb | 1 + docs/usage/clustalo.ipynb | 11 +++++------ docs/usage/embeddings_analysis.ipynb | 9 +++++---- docs/usage/mmseqs.ipynb | 3 +-- docs/usage/mutation_analysis.ipynb | 5 +++-- docs/usage/network_analysis.ipynb | 1 + docs/usage/standard_numbering.ipynb | 2 +- 7 files changed, 17 insertions(+), 15 deletions(-) diff --git a/docs/usage/blast.ipynb b/docs/usage/blast.ipynb index b56140d7..9e89d164 100644 --- a/docs/usage/blast.ipynb +++ b/docs/usage/blast.ipynb @@ -21,6 +21,7 @@ "source": [ "# change log level to INFO\n", "import sys\n", + "\n", "from loguru import logger\n", "\n", "logger.remove()\n", diff --git a/docs/usage/clustalo.ipynb b/docs/usage/clustalo.ipynb index 64ed62ee..4e28c94a 100644 --- a/docs/usage/clustalo.ipynb +++ b/docs/usage/clustalo.ipynb @@ -17,13 +17,14 @@ "metadata": {}, "outputs": [], "source": [ - "from pyeed import Pyeed\n", - "from pyeed.tools.clustalo import ClustalOmega\n", - "\n", - "# change log level to INFO\n", "import sys\n", + "\n", "from loguru import logger\n", "\n", + "from pyeed import Pyeed\n", + "from pyeed.model import Protein\n", + "from pyeed.tools.clustalo import ClustalOmega\n", + "\n", "logger.remove()\n", "level = logger.add(sys.stderr, level=\"INFO\")" ] @@ -109,8 +110,6 @@ "pyeed = Pyeed(uri=\"bolt://129.69.129.130:7687\", user=\"neo4j\", password=\"12345678\")\n", "\n", "# Get protein IDs from database\n", - "from pyeed.model import Protein\n", - "\n", "accession_ids = [protein.accession_id for protein in Protein.nodes.all()][:10]\n", "\n", "# Align sequences from database\n", diff --git a/docs/usage/embeddings_analysis.ipynb b/docs/usage/embeddings_analysis.ipynb index 65a2398c..01fde1f7 100644 --- a/docs/usage/embeddings_analysis.ipynb +++ b/docs/usage/embeddings_analysis.ipynb @@ -24,9 +24,10 @@ "source": [ "import sys\n", "\n", - "from loguru import logger\n", - "import pandas as pd\n", "import matplotlib.pyplot as plt\n", + "import pandas as pd\n", + "from loguru import logger\n", + "\n", "from pyeed import Pyeed\n", "from pyeed.analysis.embedding_analysis import EmbeddingTool\n", "\n", @@ -1223,7 +1224,7 @@ ], "metadata": { "kernelspec": { - "display_name": "pyeed_niklas", + "display_name": "pyeed_niklas_env", "language": "python", "name": "python3" }, @@ -1237,7 +1238,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.10.16" } }, "nbformat": 4, diff --git a/docs/usage/mmseqs.ipynb b/docs/usage/mmseqs.ipynb index 1185c6fe..2253fd8a 100644 --- a/docs/usage/mmseqs.ipynb +++ b/docs/usage/mmseqs.ipynb @@ -20,6 +20,7 @@ "outputs": [], "source": [ "from pyeed import Pyeed\n", + "from pyeed.model import Protein\n", "from pyeed.tools.mmseqs import MMSeqs" ] }, @@ -134,8 +135,6 @@ "pyeed = Pyeed(uri=\"bolt://localhost:7687\", user=\"neo4j\", password=\"12345678\")\n", "\n", "# Get first 100 protein IDs from database\n", - "from pyeed.model import Protein\n", - "\n", "accession_ids = [protein.accession_id for protein in Protein.nodes.all()][:100]\n", "\n", "# Cluster sequences\n", diff --git a/docs/usage/mutation_analysis.ipynb b/docs/usage/mutation_analysis.ipynb index 9b31c996..9f919f14 100644 --- a/docs/usage/mutation_analysis.ipynb +++ b/docs/usage/mutation_analysis.ipynb @@ -16,6 +16,7 @@ "outputs": [], "source": [ "import sys\n", + "\n", "from loguru import logger\n", "\n", "from pyeed import Pyeed\n", @@ -295,7 +296,7 @@ ], "metadata": { "kernelspec": { - "display_name": "pyeed_niklas", + "display_name": "pyeed_niklas_env", "language": "python", "name": "python3" }, @@ -309,7 +310,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.8" + "version": "3.10.16" } }, "nbformat": 4, diff --git a/docs/usage/network_analysis.ipynb b/docs/usage/network_analysis.ipynb index 4d45db71..0b254610 100644 --- a/docs/usage/network_analysis.ipynb +++ b/docs/usage/network_analysis.ipynb @@ -11,6 +11,7 @@ "import matplotlib.pyplot as plt\n", "import networkx as nx\n", "from loguru import logger\n", + "\n", "from pyeed import Pyeed\n", "from pyeed.analysis.network_analysis import NetworkAnalysis\n", "from pyeed.analysis.sequence_alignment import PairwiseAligner\n", diff --git a/docs/usage/standard_numbering.ipynb b/docs/usage/standard_numbering.ipynb index cd84cad9..54374cd6 100644 --- a/docs/usage/standard_numbering.ipynb +++ b/docs/usage/standard_numbering.ipynb @@ -23,10 +23,10 @@ "%reload_ext autoreload\n", "%autoreload 2\n", "import sys\n", + "\n", "from loguru import logger\n", "\n", "from pyeed import Pyeed\n", - "from pyeed.analysis.mutation_detection import MutationDetection\n", "from pyeed.analysis.standard_numbering import StandardNumberingTool\n", "\n", "logger.remove()\n", From 9db90e3f475bdcbc214ce36333edd852c7022cdf Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Thu, 1 May 2025 14:13:49 +0000 Subject: [PATCH 5/7] fixed ruff imports --- src/pyeed/analysis/network_analysis.py | 1 + src/pyeed/analysis/ontology_loading.py | 3 ++- src/pyeed/analysis/sequence_alignment.py | 3 ++- src/pyeed/analysis/standard_numbering.py | 1 + tests/unit/test_dbchat.py | 1 + 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/src/pyeed/analysis/network_analysis.py b/src/pyeed/analysis/network_analysis.py index dab37fa0..a46086da 100644 --- a/src/pyeed/analysis/network_analysis.py +++ b/src/pyeed/analysis/network_analysis.py @@ -2,6 +2,7 @@ import networkx as nx from loguru import logger + from pyeed.dbconnect import DatabaseConnector diff --git a/src/pyeed/analysis/ontology_loading.py b/src/pyeed/analysis/ontology_loading.py index ee909636..5b6341f5 100644 --- a/src/pyeed/analysis/ontology_loading.py +++ b/src/pyeed/analysis/ontology_loading.py @@ -1,8 +1,9 @@ from typing import Dict -from pyeed.dbconnect import DatabaseConnector from rdflib import OWL, RDF, RDFS, Graph, Namespace, URIRef +from pyeed.dbconnect import DatabaseConnector + class OntologyAdapter: """ diff --git a/src/pyeed/analysis/sequence_alignment.py b/src/pyeed/analysis/sequence_alignment.py index 3bfd019c..e22b8da3 100644 --- a/src/pyeed/analysis/sequence_alignment.py +++ b/src/pyeed/analysis/sequence_alignment.py @@ -5,9 +5,10 @@ from Bio.Align import PairwiseAligner as BioPairwiseAligner from Bio.Align.substitution_matrices import Array as BioSubstitutionMatrix from joblib import Parallel, cpu_count, delayed +from rich.progress import Progress + from pyeed.dbconnect import DatabaseConnector from pyeed.tools.utility import chunks -from rich.progress import Progress class PairwiseAligner: diff --git a/src/pyeed/analysis/standard_numbering.py b/src/pyeed/analysis/standard_numbering.py index 6f81869f..b2ea0667 100644 --- a/src/pyeed/analysis/standard_numbering.py +++ b/src/pyeed/analysis/standard_numbering.py @@ -13,6 +13,7 @@ from typing import Any, Dict, List, Optional, Tuple from loguru import logger + from pyeed.analysis.sequence_alignment import PairwiseAligner from pyeed.dbconnect import DatabaseConnector from pyeed.model import StandardNumbering diff --git a/tests/unit/test_dbchat.py b/tests/unit/test_dbchat.py index d1e202c6..bf6226ac 100644 --- a/tests/unit/test_dbchat.py +++ b/tests/unit/test_dbchat.py @@ -2,6 +2,7 @@ import pytest from neo4j.exceptions import CypherSyntaxError + from pyeed.dbchat import DBChat from pyeed.dbconnect import DatabaseConnector From 33e2784245810c686334d0423020ec966eebbd19 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Thu, 1 May 2025 14:25:23 +0000 Subject: [PATCH 6/7] fixed action --- .github/workflows/lint.yaml | 2 +- docs/usage/mutation_analysis.ipynb | 25 ++++++++++++++----- docs/usage/standard_numbering.ipynb | 22 ++++++++++------ src/pyeed/main.py | 8 +++--- .../tools/resources/alphafold/docker_run.py | 2 +- 5 files changed, 40 insertions(+), 19 deletions(-) diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 40a6b665..cdef9c83 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -10,7 +10,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: "3.x" + python-version: "3.12" - name: Install dependencies run: | diff --git a/docs/usage/mutation_analysis.ipynb b/docs/usage/mutation_analysis.ipynb index 9f919f14..e34b2ddb 100644 --- a/docs/usage/mutation_analysis.ipynb +++ b/docs/usage/mutation_analysis.ipynb @@ -154,13 +154,19 @@ "RETURN id(r)\n", "\"\"\"\n", "\n", - "region_ids = eedb.db.execute_read(query_get_region_ids, parameters={\"protein_id\": ids, \"region_annotation\": \"coding sequence\"})\n", - "region_ids = [id['id(r)'] for id in region_ids]\n", + "region_ids = eedb.db.execute_read(\n", + " query_get_region_ids,\n", + " parameters={\"protein_id\": ids, \"region_annotation\": \"coding sequence\"},\n", + ")\n", + "region_ids = [id[\"id(r)\"] for id in region_ids]\n", "print(f\"Region ids: {region_ids}\")\n", "print(f\"len of ids: {len(ids)}\")\n", "\n", "sn_dna.apply_standard_numbering_pairwise(\n", - " base_sequence_id=\"AF190695.1\", db=eedb.db, node_type=\"DNA\", region_ids_neo4j=region_ids\n", + " base_sequence_id=\"AF190695.1\",\n", + " db=eedb.db,\n", + " node_type=\"DNA\",\n", + " region_ids_neo4j=region_ids,\n", ")" ] }, @@ -211,7 +217,12 @@ "name_of_standard_numbering_tool = \"test_standard_numbering_dna_pairwise\"\n", "\n", "mutations_dna = md.get_mutations_between_sequences(\n", - " seq1, seq2, eedb.db, name_of_standard_numbering_tool, node_type=\"DNA\", region_ids_neo4j=region_ids\n", + " seq1,\n", + " seq2,\n", + " eedb.db,\n", + " name_of_standard_numbering_tool,\n", + " node_type=\"DNA\",\n", + " region_ids_neo4j=region_ids,\n", ")" ] }, @@ -282,8 +293,10 @@ } ], "source": [ - "for i in range(len(mutations_dna['from_positions'])):\n", - " print(f\"Mutation on position {mutations_dna['from_positions'][i]} -> {mutations_dna['to_positions'][i]} with a nucleotide change of {mutations_dna['from_monomers'][i]} -> {mutations_dna['to_monomers'][i]}\")" + "for i in range(len(mutations_dna[\"from_positions\"])):\n", + " print(\n", + " f\"Mutation on position {mutations_dna['from_positions'][i]} -> {mutations_dna['to_positions'][i]} with a nucleotide change of {mutations_dna['from_monomers'][i]} -> {mutations_dna['to_monomers'][i]}\"\n", + " )" ] }, { diff --git a/docs/usage/standard_numbering.ipynb b/docs/usage/standard_numbering.ipynb index 54374cd6..b5ee657b 100644 --- a/docs/usage/standard_numbering.ipynb +++ b/docs/usage/standard_numbering.ipynb @@ -96,7 +96,7 @@ "eedb = Pyeed(uri, user=user, password=password)\n", "eedb.db.wipe_database(date=\"2025-03-19\")\n", "\n", - "eedb.db.initialize_db_constraints(user=user, password=password)\n" + "eedb.db.initialize_db_constraints(user=user, password=password)" ] }, { @@ -148,7 +148,7 @@ "\n", "sn.apply_standard_numbering_pairwise(\n", " base_sequence_id=\"AAM15527.1\", db=eedb.db, list_of_seq_ids=ids[0:5]\n", - ")\n" + ")" ] }, { @@ -184,7 +184,7 @@ "source": [ "sn.apply_standard_numbering_pairwise(\n", " base_sequence_id=\"AAM15527.1\", db=eedb.db, list_of_seq_ids=ids\n", - ")\n" + ")" ] }, { @@ -290,7 +290,9 @@ } ], "source": [ - "sn_dna_region = StandardNumberingTool(name=\"test_standard_numbering_dna_pairwise_region\")\n", + "sn_dna_region = StandardNumberingTool(\n", + " name=\"test_standard_numbering_dna_pairwise_region\"\n", + ")\n", "\n", "\n", "ids = [\"AAM15527.1\", \"AAF05614.1\", \"AFN21551.1\", \"CAA76794.1\", \"AGQ50511.1\"]\n", @@ -302,14 +304,20 @@ "RETURN id(r)\n", "\"\"\"\n", "\n", - "region_ids = eedb.db.execute_read(query_get_region_ids, parameters={\"protein_id\": ids, \"region_annotation\": \"coding sequence\"})\n", - "region_ids = [id['id(r)'] for id in region_ids]\n", + "region_ids = eedb.db.execute_read(\n", + " query_get_region_ids,\n", + " parameters={\"protein_id\": ids, \"region_annotation\": \"coding sequence\"},\n", + ")\n", + "region_ids = [id[\"id(r)\"] for id in region_ids]\n", "print(f\"Region ids: {region_ids}\")\n", "print(f\"len of ids: {len(ids)}\")\n", "\n", "\n", "sn_dna_region.apply_standard_numbering_pairwise(\n", - " base_sequence_id=\"AF190695.1\", db=eedb.db, node_type=\"DNA\", region_ids_neo4j=region_ids\n", + " base_sequence_id=\"AF190695.1\",\n", + " db=eedb.db,\n", + " node_type=\"DNA\",\n", + " region_ids_neo4j=region_ids,\n", ")" ] }, diff --git a/src/pyeed/main.py b/src/pyeed/main.py index d4a520b9..fd5dec91 100644 --- a/src/pyeed/main.py +++ b/src/pyeed/main.py @@ -347,9 +347,9 @@ def fetch_dna_entries_for_proteins(self, ids: list[str] | None = None) -> None: try: batch_ids = nucleotide_ids[i : i + BATCH_SIZE] self.fetch_ncbi_nucleotide(batch_ids) - logger.info(f"Successfully fetched batch {i//BATCH_SIZE + 1}") + logger.info(f"Successfully fetched batch {i // BATCH_SIZE + 1}") except Exception as e: - logger.error(f"Error fetching batch {i//BATCH_SIZE + 1}: {str(e)}") + logger.error(f"Error fetching batch {i // BATCH_SIZE + 1}: {str(e)}") continue # Process protein-DNA relationships in batches @@ -419,11 +419,11 @@ def fetch_dna_entries_for_proteins(self, ids: list[str] | None = None) -> None: batch_create_query, {"relationships": new_relationships} ) logger.info( - f"Successfully processed relationship batch {i//BATCH_SIZE + 1}" + f"Successfully processed relationship batch {i // BATCH_SIZE + 1}" ) except Exception as e: logger.error( - f"Error processing relationship batch {i//BATCH_SIZE + 1}: {str(e)}" + f"Error processing relationship batch {i // BATCH_SIZE + 1}: {str(e)}" ) continue diff --git a/src/pyeed/tools/resources/alphafold/docker_run.py b/src/pyeed/tools/resources/alphafold/docker_run.py index 6c21ade7..5d8f4f5c 100644 --- a/src/pyeed/tools/resources/alphafold/docker_run.py +++ b/src/pyeed/tools/resources/alphafold/docker_run.py @@ -214,7 +214,7 @@ def main(argv): # type: ignore mount, target_path = _create_mount(f"fasta_path_{i}", fasta_path) mounts.append(mount) target_fasta_paths.append(target_path) - command_args.append(f'--fasta_paths={",".join(target_fasta_paths)}') + command_args.append(f"--fasta_paths={','.join(target_fasta_paths)}") database_paths = [ ("uniref90_database_path", uniref90_database_path), From 6d88ae67256967003bc5f608e15daf5f5d0c96b9 Mon Sep 17 00:00:00 2001 From: Niklas Abraham GPU Date: Thu, 1 May 2025 14:31:33 +0000 Subject: [PATCH 7/7] fixed ruff version mismatches --- src/pyeed/analysis/embedding_analysis.py | 3 +-- src/pyeed/analysis/mutation_detection.py | 1 - src/pyeed/analysis/network_analysis.py | 1 - src/pyeed/analysis/ontology_loading.py | 3 +-- src/pyeed/analysis/sequence_alignment.py | 3 +-- src/pyeed/analysis/standard_numbering.py | 1 - tests/unit/test_dbchat.py | 1 - 7 files changed, 3 insertions(+), 10 deletions(-) diff --git a/src/pyeed/analysis/embedding_analysis.py b/src/pyeed/analysis/embedding_analysis.py index ff4364ca..fa9d6c0e 100644 --- a/src/pyeed/analysis/embedding_analysis.py +++ b/src/pyeed/analysis/embedding_analysis.py @@ -6,9 +6,8 @@ import scipy.spatial as sp from matplotlib.figure import Figure from numpy.typing import NDArray -from scipy.spatial.distance import cosine - from pyeed.dbconnect import DatabaseConnector +from scipy.spatial.distance import cosine logger = logging.getLogger(__name__) diff --git a/src/pyeed/analysis/mutation_detection.py b/src/pyeed/analysis/mutation_detection.py index a9881810..274e168b 100644 --- a/src/pyeed/analysis/mutation_detection.py +++ b/src/pyeed/analysis/mutation_detection.py @@ -1,7 +1,6 @@ from typing import Any, Optional from loguru import logger - from pyeed.dbconnect import DatabaseConnector diff --git a/src/pyeed/analysis/network_analysis.py b/src/pyeed/analysis/network_analysis.py index a46086da..dab37fa0 100644 --- a/src/pyeed/analysis/network_analysis.py +++ b/src/pyeed/analysis/network_analysis.py @@ -2,7 +2,6 @@ import networkx as nx from loguru import logger - from pyeed.dbconnect import DatabaseConnector diff --git a/src/pyeed/analysis/ontology_loading.py b/src/pyeed/analysis/ontology_loading.py index 5b6341f5..ee909636 100644 --- a/src/pyeed/analysis/ontology_loading.py +++ b/src/pyeed/analysis/ontology_loading.py @@ -1,8 +1,7 @@ from typing import Dict -from rdflib import OWL, RDF, RDFS, Graph, Namespace, URIRef - from pyeed.dbconnect import DatabaseConnector +from rdflib import OWL, RDF, RDFS, Graph, Namespace, URIRef class OntologyAdapter: diff --git a/src/pyeed/analysis/sequence_alignment.py b/src/pyeed/analysis/sequence_alignment.py index e22b8da3..3bfd019c 100644 --- a/src/pyeed/analysis/sequence_alignment.py +++ b/src/pyeed/analysis/sequence_alignment.py @@ -5,10 +5,9 @@ from Bio.Align import PairwiseAligner as BioPairwiseAligner from Bio.Align.substitution_matrices import Array as BioSubstitutionMatrix from joblib import Parallel, cpu_count, delayed -from rich.progress import Progress - from pyeed.dbconnect import DatabaseConnector from pyeed.tools.utility import chunks +from rich.progress import Progress class PairwiseAligner: diff --git a/src/pyeed/analysis/standard_numbering.py b/src/pyeed/analysis/standard_numbering.py index b2ea0667..6f81869f 100644 --- a/src/pyeed/analysis/standard_numbering.py +++ b/src/pyeed/analysis/standard_numbering.py @@ -13,7 +13,6 @@ from typing import Any, Dict, List, Optional, Tuple from loguru import logger - from pyeed.analysis.sequence_alignment import PairwiseAligner from pyeed.dbconnect import DatabaseConnector from pyeed.model import StandardNumbering diff --git a/tests/unit/test_dbchat.py b/tests/unit/test_dbchat.py index bf6226ac..d1e202c6 100644 --- a/tests/unit/test_dbchat.py +++ b/tests/unit/test_dbchat.py @@ -2,7 +2,6 @@ import pytest from neo4j.exceptions import CypherSyntaxError - from pyeed.dbchat import DBChat from pyeed.dbconnect import DatabaseConnector