Skip to content

Commit 9b77322

Browse files
author
Luis Figueiredo
committed
remove protein_id qualifier if already present in another CDS
1 parent fce49ec commit 9b77322

File tree

2 files changed

+6
-1
lines changed

2 files changed

+6
-1
lines changed

deepbgc/util.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ def get_protein_features(record):
4848

4949

5050
def get_proteins_by_id(protein_features):
51-
return {get_protein_id(feature): feature for feature in protein_features}
51+
return {protein_id: feature for feature in protein_features for protein_id in get_protein_ids(feature)}
5252

5353

5454
def get_features_of_type(record, feature_type):
@@ -502,6 +502,8 @@ def fix_duplicate_cds(record):
502502

503503
if protein_id != new_protein_id:
504504
logging.warning('Setting new unique_protein_id %s for CDS %s', new_protein_id, protein_id)
505+
filtered_qualifiers = {k: v for k, v in feature.qualifiers.items() if k != "protein_id"}
506+
feature.qualifiers = collections.OrderedDict(filtered_qualifiers)
505507
feature.qualifiers['unique_protein_id'] = [new_protein_id]
506508

507509
protein_ids.add(new_protein_id)

test/unit/test_pfam.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
from io import StringIO
44
import pandas as pd
55
from copy import deepcopy
6+
from collections import OrderedDict
67
from Bio import SearchIO
78
from Bio.SeqRecord import SeqRecord
89
from Bio.Seq import Seq
@@ -188,6 +189,8 @@ def test_same_protein_id(mock_find_exe, mock_popen, mock_searchio, mock_read_csv
188189
)
189190

190191
expected_record = deepcopy(record)
192+
filtered_qualifiers = {k: v for k, v in expected_record.features[-1].qualifiers.items() if k != "protein_id"}
193+
expected_record.features[-1].qualifiers = OrderedDict(filtered_qualifiers)
191194
expected_record.features[-1].qualifiers['unique_protein_id'] = 'AAK73500.1_1'
192195
expected_record.features += [
193196
SeqFeature(

0 commit comments

Comments
 (0)