Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
43 commits
Select commit Hold shift + click to select a range
f661eb6
added ncbi to uniprot mapper
alacheim Mar 11, 2025
f44a2f3
changes in mapper
alacheim Mar 13, 2025
27ff231
fixed bug in organism mapper
alacheim Mar 14, 2025
3f7eade
major update for region handling of mature protein and coding sequenc…
NiklasAbraham Mar 17, 2025
44ca574
updated notebooks
NiklasAbraham Mar 19, 2025
9037be2
okay build in multiple region support for big dna with mutplte coding…
NiklasAbraham Mar 19, 2025
366e90f
added existing pairs check in pairwise alignment
alacheim Mar 24, 2025
c9566b5
adapted embeddings for multiple gpus
alacheim Mar 24, 2025
5e031bc
changed connection timeout
alacheim Mar 25, 2025
f4d04c8
added catalytic activity to uniprot model
alacheim Mar 26, 2025
ee68c0a
possibility to split data across multiple gpus
alacheim Mar 26, 2025
01fc23a
changed embeddings to multi gpu
alacheim Mar 26, 2025
b7c37e6
changed catalytic activity to reaction
alacheim Mar 31, 2025
b8a82df
add more efficient cypher queries
alacheim Mar 31, 2025
4406df3
added substrates and products to catalytic activity
alacheim Apr 16, 2025
edd47b6
fixed saving of molecule nodes
alacheim Apr 16, 2025
faf38fd
added individual file name to mapper
alacheim Apr 25, 2025
7a84004
Merge branch 'extend_pairwise_alignment' into uniprot_id_mapper
alacheim Apr 29, 2025
4df6c17
added SPARQLWrapper as dependency
alacheim Apr 29, 2025
a02d128
fixing with ruff
alacheim Apr 30, 2025
9baa8f4
fixing errors with ruff
alacheim Apr 30, 2025
056cb6b
fixing ruff errors
alacheim Apr 30, 2025
e7369e9
added crc64iso to dependencies
alacheim Apr 30, 2025
88d4587
Merge branch 'extend_pairwise_alignment' into uniprot_id_mapper
alacheim Apr 30, 2025
f9897fe
added pysam to dependencies
alacheim Apr 30, 2025
9d81e9b
fixing linting errors
alacheim Apr 30, 2025
3955718
reformatting
alacheim Apr 30, 2025
9b7f28b
fixing mypy errors
alacheim Apr 30, 2025
cf742d7
fixing mypy errors
alacheim Apr 30, 2025
f073cd6
fixing ruff error
alacheim Apr 30, 2025
3ec0368
fixing ruff error
alacheim Apr 30, 2025
37afe5c
trigger pipeline
alacheim Apr 30, 2025
d84c74e
formated with ruff
alacheim Apr 30, 2025
57c09b4
fixed linter issue in sequence alignment
NiklasAbraham May 1, 2025
014dcdc
fixed linter issue in sequence alignment
NiklasAbraham May 1, 2025
ad840ca
fixed ruff errors imports
NiklasAbraham May 1, 2025
e845add
Merge branch 'extend_pairwise_alignment' into uniprot_id_mapper
NiklasAbraham May 1, 2025
70710f0
fixed files for ruff stuff
NiklasAbraham May 1, 2025
a435764
fixed ruff files
NiklasAbraham May 1, 2025
c0739bb
fixed ruff import version mismatches
NiklasAbraham May 1, 2025
667882d
fixed ruff mismatches run
NiklasAbraham May 1, 2025
cf19b94
fixed mypy error, formated file
alacheim May 2, 2025
7bbcd6e
Merge pull request #149 from PyEED/uniprot_id_mapper
alacheim May 2, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
723 changes: 362 additions & 361 deletions docs/usage/blast.ipynb

Large diffs are not rendered by default.

338 changes: 169 additions & 169 deletions docs/usage/clustalo.ipynb

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions docs/usage/embeddings_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,10 @@
"source": [
"import sys\n",
"\n",
"from loguru import logger\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"from loguru import logger\n",
"\n",
"from pyeed import Pyeed\n",
"from pyeed.analysis.embedding_analysis import EmbeddingTool\n",
"\n",
Expand Down
3 changes: 1 addition & 2 deletions docs/usage/mmseqs.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
"outputs": [],
"source": [
"from pyeed import Pyeed\n",
"from pyeed.model import Protein\n",
"from pyeed.tools.mmseqs import MMSeqs"
]
},
Expand Down Expand Up @@ -134,8 +135,6 @@
"pyeed = Pyeed(uri=\"bolt://localhost:7687\", user=\"neo4j\", password=\"12345678\")\n",
"\n",
"# Get first 100 protein IDs from database\n",
"from pyeed.model import Protein\n",
"\n",
"accession_ids = [protein.accession_id for protein in Protein.nodes.all()][:100]\n",
"\n",
"# Cluster sequences\n",
Expand Down
107 changes: 73 additions & 34 deletions docs/usage/mutation_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,12 @@
},
{
"cell_type": "code",
"execution_count": 1,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import sys\n",
"\n",
"from loguru import logger\n",
"\n",
"from pyeed import Pyeed\n",
Expand All @@ -37,7 +38,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 10,
"metadata": {},
"outputs": [
{
Expand All @@ -56,7 +57,7 @@
"\n",
"eedb = Pyeed(uri, user=user, password=password)\n",
"\n",
"eedb.db.wipe_database(date=\"2025-03-14\")"
"eedb.db.wipe_database(date=\"2025-03-19\")"
]
},
{
Expand All @@ -75,14 +76,15 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"ids = [\"AAM15527.1\", \"AAF05614.1\", \"AFN21551.1\", \"CAA76794.1\", \"AGQ50511.1\"]\n",
"\n",
"eedb.fetch_from_primary_db(ids, db=\"ncbi_protein\")\n",
"eedb.fetch_dna_entries_for_proteins()"
"eedb.fetch_dna_entries_for_proteins()\n",
"eedb.create_coding_sequences_regions()"
]
},
{
Expand All @@ -100,9 +102,42 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 12,
"metadata": {},
"outputs": [],
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "6ed852d438ab480fa4d1c6129eacfd26",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Output()"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Region ids: [143, 129, 128, 69, 9]\n",
"len of ids: 5\n"
]
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"></pre>\n"
],
"text/plain": []
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sn_protein = StandardNumberingTool(name=\"test_standard_numbering_protein\")\n",
"\n",
Expand All @@ -111,11 +146,22 @@
" base_sequence_id=\"AAM15527.1\", db=eedb.db, list_of_seq_ids=ids\n",
")\n",
"\n",
"sn_dna = StandardNumberingTool(name=\"test_standard_numbering_dna\")\n",
"sn_dna = StandardNumberingTool(name=\"test_standard_numbering_dna_pairwise\")\n",
"\n",
"sn_dna.apply_standard_numbering(\n",
" base_sequence_id=\"AF190695.1\", db=eedb.db, node_type=\"DNA\"\n",
")\n"
"query_get_region_ids = \"\"\"\n",
"MATCH (p:Protein)<-[rel:ENCODES]-(d:DNA)-[rel2:HAS_REGION]->(r:Region)\n",
"WHERE r.annotation = $region_annotation AND p.accession_id IN $protein_id\n",
"RETURN id(r)\n",
"\"\"\"\n",
"\n",
"region_ids = eedb.db.execute_read(query_get_region_ids, parameters={\"protein_id\": ids, \"region_annotation\": \"coding sequence\"})\n",
"region_ids = [id['id(r)'] for id in region_ids]\n",
"print(f\"Region ids: {region_ids}\")\n",
"print(f\"len of ids: {len(ids)}\")\n",
"\n",
"sn_dna.apply_standard_numbering_pairwise(\n",
" base_sequence_id=\"AF190695.1\", db=eedb.db, node_type=\"DNA\", region_ids_neo4j=region_ids\n",
")"
]
},
{
Expand All @@ -136,7 +182,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -153,18 +199,19 @@
},
{
"cell_type": "code",
"execution_count": 6,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"md = MutationDetection()\n",
"\n",
"\n",
"seq1 = \"AF190695.1\"\n",
"seq2 = \"JX042489.1\"\n",
"name_of_standard_numbering_tool = \"test_standard_numbering_dna\"\n",
"name_of_standard_numbering_tool = \"test_standard_numbering_dna_pairwise\"\n",
"\n",
"mutations_dna = md.get_mutations_between_sequences(\n",
" seq1, seq2, eedb.db, name_of_standard_numbering_tool, node_type=\"DNA\"\n",
" seq1, seq2, eedb.db, name_of_standard_numbering_tool, node_type=\"DNA\", region_ids_neo4j=region_ids\n",
")"
]
},
Expand All @@ -183,14 +230,14 @@
},
{
"cell_type": "code",
"execution_count": 7,
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'from_positions': [241, 272, 125], 'to_positions': [241, 272, 125], 'from_monomers': ['R', 'D', 'V'], 'to_monomers': ['S', 'N', 'I']}\n"
"{'from_positions': [241, 125, 272], 'to_positions': [241, 125, 272], 'from_monomers': ['R', 'V', 'D'], 'to_monomers': ['S', 'I', 'N']}\n"
]
}
],
Expand All @@ -216,29 +263,21 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 16,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Mutation on position 682 -> 615 with a nucleotide change of T -> C\n",
"Mutation on position 407 -> 340 with a nucleotide change of C -> A\n",
"Mutation on position 92 -> 25 with a nucleotide change of C -> A\n",
"Mutation on position 162 -> 95 with a nucleotide change of G -> T\n",
"Mutation on position 929 -> 862 with a nucleotide change of A -> C\n",
"Mutation on position 346 -> 279 with a nucleotide change of A -> G\n",
"Mutation on position 87 -> 20 with a nucleotide change of C -> A\n",
"Mutation on position 88 -> 21 with a nucleotide change of T -> C\n",
"Mutation on position 130 -> 63 with a nucleotide change of C -> T\n",
"Mutation on position 175 -> 108 with a nucleotide change of G -> A\n",
"Mutation on position 131 -> 64 with a nucleotide change of T -> C\n",
"Mutation on position 132 -> 65 with a nucleotide change of A -> T\n",
"Mutation on position 914 -> 847 with a nucleotide change of G -> A\n",
"Mutation on position 604 -> 537 with a nucleotide change of T -> G\n",
"Mutation on position 925 -> 858 with a nucleotide change of G -> A\n",
"Mutation on position 226 -> 159 with a nucleotide change of T -> C\n"
"Mutation on position 705 -> 705 with a nucleotide change of G -> A\n",
"Mutation on position 395 -> 395 with a nucleotide change of T -> G\n",
"Mutation on position 137 -> 137 with a nucleotide change of A -> G\n",
"Mutation on position 17 -> 17 with a nucleotide change of T -> C\n",
"Mutation on position 473 -> 473 with a nucleotide change of T -> C\n",
"Mutation on position 716 -> 716 with a nucleotide change of G -> A\n",
"Mutation on position 720 -> 720 with a nucleotide change of A -> C\n",
"Mutation on position 198 -> 198 with a nucleotide change of C -> A\n"
]
}
],
Expand Down
1 change: 1 addition & 0 deletions docs/usage/network_analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"import matplotlib.pyplot as plt\n",
"import networkx as nx\n",
"from loguru import logger\n",
"\n",
"from pyeed import Pyeed\n",
"from pyeed.analysis.network_analysis import NetworkAnalysis\n",
"from pyeed.analysis.sequence_alignment import PairwiseAligner\n",
Expand Down
Loading