File tree Expand file tree Collapse file tree 2 files changed +25
-9
lines changed
Expand file tree Collapse file tree 2 files changed +25
-9
lines changed Original file line number Diff line number Diff line change @@ -199,24 +199,25 @@ def _read_data(self, raw_data: str) -> List[int]:
199199 Returns:
200200 List[int]: A list of integers representing the indices of the SMILES tokens.
201201 """
202- if self .canonicalize_smiles :
203- try :
204- mol = Chem .MolFromSmiles (raw_data .strip ())
205- if mol is not None :
206- raw_data = Chem .MolToSmiles (mol , canonical = True )
207- except Exception as e :
208- print (f"RDKit failed to canonicalize the SMILES: { raw_data } " )
209- print (f"\t { e } " )
210202 try :
211203 mol = Chem .MolFromSmiles (raw_data .strip ())
212204 if mol is None :
213205 raise ValueError (f"Invalid SMILES: { raw_data } " )
214- return [self ._get_token_index (v [1 ]) for v in _tokenize (raw_data )]
215206 except ValueError as e :
216207 print (f"could not process { raw_data } " )
217208 print (f"\t Error: { e } " )
218209 return None
219210
211+ if self .canonicalize_smiles :
212+ try :
213+ raw_data = Chem .MolToSmiles (mol , canonical = True )
214+ except Exception as e :
215+ print (f"RDKit failed to canonicalize the SMILES: { raw_data } " )
216+ print (f"\t { e } " )
217+ return None
218+
219+ return [self ._get_token_index (v [1 ]) for v in _tokenize (raw_data )]
220+
220221 def _back_to_smiles (self , smiles_encoded ):
221222 token_file = self .reader .token_path
222223 token_coding = {}
Original file line number Diff line number Diff line change @@ -111,6 +111,21 @@ def test_read_data_with_invalid_input(self) -> None:
111111 f"The output for invalid token '{ raw_data } ' should be None." ,
112112 )
113113
114+ def test_read_data_with_invalid_input_with_no_canonicalize (self ) -> None :
115+ """
116+ Test the _read_data method with an invalid input.
117+ The invalid token should prompt a return value None
118+ """
119+ self .reader .canonicalize_smiles = False
120+ raw_datas = ["%INVALID%" , "ADADAD" , "ADASDAD" , "CC(=O)NC1[Mg-2]" ]
121+ for raw_data in raw_datas :
122+ result = self .reader ._read_data (raw_data )
123+ self .assertIsNone (
124+ result ,
125+ f"The output for invalid token '{ raw_data } ' should be None." ,
126+ )
127+ self .reader .canonicalize_smiles = True # Reset to original state
128+
114129 @patch ("builtins.open" , new_callable = mock_open )
115130 def test_finish_method_for_new_tokens (self , mock_file : mock_open ) -> None :
116131 """
You can’t perform that action at this time.
0 commit comments