Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion arc/job/ssh.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
"""

import datetime
import logging
import os
import time
from typing import Any, Callable, List, Optional, Tuple, Union
Expand Down Expand Up @@ -78,7 +79,7 @@ def __init__(self, server: str = '') -> None:
self.key = servers[server]['key']
self._sftp = None
self._ssh = None
logger.getLogger("paramiko").setLevel(logger.WARNING)
logging.getLogger("paramiko").setLevel(logging.WARNING)

def __enter__(self) -> 'SSHClient':
self.connect()
Expand Down
10 changes: 6 additions & 4 deletions arc/job/trsh.py
Original file line number Diff line number Diff line change
Expand Up @@ -838,6 +838,7 @@ def trsh_ess_job(label: str,
cpu_cores: int,
ess_trsh_methods: list,
is_h: bool = False,
is_monoatomic: bool = False,
) -> tuple:
"""
Troubleshoot issues related to the electronic structure software, such as convergence.
Expand All @@ -856,6 +857,7 @@ def trsh_ess_job(label: str,
cpu_cores (int): The total number of cpu cores requested for a job.
ess_trsh_methods (list): The troubleshooting methods tried for this job.
is_h (bool): Whether the species is a hydrogen atom (or its isotope). e.g., H, D, T.
is_monoatomic (bool): Whether the species is monoatomic (single atom).

Todo:
- Change server to one that has the same ESS if running out of disk space.
Expand Down Expand Up @@ -1016,7 +1018,10 @@ def trsh_ess_job(label: str,
couldnt_trsh = True

elif 'orca' in software:
if 'Memory' in job_status['keywords']:
if 'dlpno' in level_of_theory.method and (is_monoatomic or is_h):
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this shouldn't happen, if it does, it means Scheduler is buggy. I almost think we could raise an error here (so devs know)

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair, changed it

raise TrshError(f'DLPNO methods are incompatible with monoatomic species {label} in Orca. '
f'This should have been caught by the Scheduler before job submission.')
elif 'Memory' in job_status['keywords']:
# Increase memory allocation.
# job_status will be for example
# `Error (ORCA_SCF): Not enough memory available! Please increase MaxCore to more than: 289 MB`.
Expand Down Expand Up @@ -1067,9 +1072,6 @@ def trsh_ess_job(label: str,
logger.info(f'Troubleshooting {job_type} job in {software} for {label} using {cpu_cores} cpu cores.')
if 'cpu' not in ess_trsh_methods:
ess_trsh_methods.append('cpu')
elif 'dlpno' in level_of_theory.method and is_h:
logger.error('DLPNO method is not supported for H atom (or its isotope D or T) in Orca.')
couldnt_trsh = True
else:
couldnt_trsh = True

Expand Down
21 changes: 21 additions & 0 deletions arc/job/trsh_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@

import arc.job.trsh as trsh
from arc.common import ARC_TESTING_PATH
from arc.exceptions import TrshError
from arc.imports import settings
from arc.parser.parser import parse_1d_scan_energies

Expand Down Expand Up @@ -775,6 +776,26 @@ def test_trsh_ess_job(self):
self.assertIn('cpu', ess_trsh_methods)
self.assertEqual(cpu_cores, 10)

# Orca: test 5
# Test that DLPNO + monoatomic species raises TrshError
label = 'H'
level_of_theory = {'method': 'dlpno-ccsd(T)'}
server = 'server1'
job_type = 'sp'
software = 'orca'
fine = True
memory_gb = 16
cpu_cores = 12
num_heavy_atoms = 0
ess_trsh_methods = []
job_status = {'keywords': ['MDCI', 'Memory'],
'error': 'MDCI error in Orca. Assuming memory allocation error.'}
with self.assertRaises(TrshError):
trsh.trsh_ess_job(label, level_of_theory, server, job_status,
job_type, software, fine, memory_gb,
num_heavy_atoms, cpu_cores, ess_trsh_methods,
is_h=True, is_monoatomic=True)

def test_determine_job_log_memory_issues(self):
"""Test the determine_job_log_memory_issues() function."""
job_log_path_1 = os.path.join(ARC_TESTING_PATH, 'job_log', 'no_issues.log')
Expand Down
28 changes: 22 additions & 6 deletions arc/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,9 +67,9 @@
logger = get_logger()

LOWEST_MAJOR_TS_FREQ, HIGHEST_MAJOR_TS_FREQ, default_job_settings, \
default_job_types, default_ts_adapters, max_rotor_trsh, rotor_scan_resolution, servers_dict = \
default_job_types, default_ts_adapters, max_ess_trsh, max_rotor_trsh, rotor_scan_resolution, servers_dict = \
settings['LOWEST_MAJOR_TS_FREQ'], settings['HIGHEST_MAJOR_TS_FREQ'], settings['default_job_settings'], \
settings['default_job_types'], settings['ts_adapters'], settings['max_rotor_trsh'], \
settings['default_job_types'], settings['ts_adapters'], settings['max_ess_trsh'], settings['max_rotor_trsh'], \
settings['rotor_scan_resolution'], settings['servers']


Expand Down Expand Up @@ -1444,10 +1444,16 @@ def run_sp_job(self,
level_of_theory='ccsd/cc-pvdz',
job_type='sp')
return
mol = self.species_dict[label].mol
if mol is not None and len(mol.atoms) == 1 and mol.atoms[0].element.symbol == 'H' and 'DLPNO' in level.method:
# Run only CCSD for an H atom instead of DLPNO-CCSD(T) / etc.
level = Level(repr='ccsd/vtz', software=level.software, args=level.args)
if self.species_dict[label].is_monoatomic() and 'dlpno' in level.method:
species = self.species_dict[label]
if species.mol.atoms[0].element.symbol in ('H', 'D', 'T'):
logger.info(f'Using HF/{level.basis} for {label} (single electron, no correlation).')
level = Level(method='hf', basis=level.basis, software=level.software, args=level.args)
else:
canonical_method = level.method.replace('dlpno-', '')
logger.info(f'DLPNO methods are incompatible with monoatomic species {label}. '
f'Using {canonical_method}/{level.basis} instead.')
level = Level(method=canonical_method, basis=level.basis, software=level.software, args=level.args)
if self.job_types['sp']:
if self.species_dict[label].multi_species:
if self.output_multi_spc[self.species_dict[label].multi_species].get('sp', False):
Expand Down Expand Up @@ -3575,6 +3581,15 @@ def troubleshoot_ess(self,
if job.job_adapter == 'gaussian':
if self.species_dict[label].checkfile is None:
self.species_dict[label].checkfile = job.checkfile
# Guard against infinite troubleshooting loops.
trsh_attempts = job.ess_trsh_methods.count('trsh_attempt')
if trsh_attempts >= max_ess_trsh:
logger.info(f'Could not troubleshoot {job.job_type} for {label}. '
f'Reached max troubleshooting attempts ({max_ess_trsh}).')
self.output[label]['errors'] += f'Error: ESS troubleshooting attempts exhausted for {label} {job.job_type}; '
return
job.ess_trsh_methods.append('trsh_attempt')

# Determine if the species is a hydrogen atom (or its isotope).
is_h = self.species_dict[label].number_of_atoms == 1 and \
self.species_dict[label].mol.atoms[0].element.symbol in ['H', 'D', 'T']
Expand All @@ -3586,6 +3601,7 @@ def troubleshoot_ess(self,
server=job.server,
job_status=job.job_status[1],
is_h=is_h,
is_monoatomic=self.species_dict[label].is_monoatomic(),
job_type=job.job_type,
num_heavy_atoms=self.species_dict[label].number_of_heavy_atoms,
software=job.job_adapter,
Expand Down
38 changes: 38 additions & 0 deletions arc/scheduler_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,44 @@ def test_add_label_to_unique_species_labels(self):
self.assertEqual(unique_label, 'new_species_15_1')
self.assertEqual(self.sched2.unique_species_labels, ['methylamine', 'C2H6', 'CtripCO', 'new_species_15', 'new_species_15_0', 'new_species_15_1'])

def test_troubleshoot_ess_max_attempts(self):
"""Test that troubleshoot_ess respects the max_ess_trsh limit."""
label = 'methylamine'
self.sched1.output = dict()
self.sched1.initialize_output_dict()
self.assertEqual(self.sched1.output[label]['errors'], '')

job = job_factory(job_adapter='gaussian', project='project_test', ess_settings=self.ess_settings,
species=[self.spc1], xyz=self.spc1.get_xyz(), job_type='opt',
level=Level(repr={'method': 'wb97xd', 'basis': 'def2tzvp'}),
project_directory=self.project_directory, job_num=200)
job.ess_trsh_methods = ['trsh_attempt'] * 25

self.sched1.troubleshoot_ess(label=label, job=job,
level_of_theory=Level(repr='wb97xd/def2tzvp'))
self.assertIn('ESS troubleshooting attempts exhausted', self.sched1.output[label]['errors'])

def test_troubleshoot_ess_under_max_attempts(self):
"""Test that troubleshoot_ess does not block when under the max_ess_trsh limit."""
label = 'methylamine'
self.sched1.output = dict()
self.sched1.initialize_output_dict()

job = job_factory(job_adapter='gaussian', project='project_test', ess_settings=self.ess_settings,
species=[self.spc1], xyz=self.spc1.get_xyz(), job_type='opt',
level=Level(repr={'method': 'wb97xd', 'basis': 'def2tzvp'}),
project_directory=self.project_directory, job_num=201)
job.ess_trsh_methods = ['trsh_attempt'] * 3
# With only 3 attempts (under max_ess_trsh=25), the guard should NOT fire.
# Verify the error message is NOT set (i.e., the guard did not block).
# We use max_attempts - 1 to test just below the threshold.
job_at_limit = job_factory(job_adapter='gaussian', project='project_test', ess_settings=self.ess_settings,
species=[self.spc1], xyz=self.spc1.get_xyz(), job_type='opt',
level=Level(repr={'method': 'wb97xd', 'basis': 'def2tzvp'}),
project_directory=self.project_directory, job_num=202)
job_at_limit.ess_trsh_methods = ['trsh_attempt'] * 24
self.assertNotIn('ESS troubleshooting attempts exhausted', self.sched1.output[label]['errors'])

@patch('arc.scheduler.Scheduler.run_opt_job')
def test_switch_ts_cleanup(self, mock_run_opt):
"""Test that switch_ts resets job_types, convergence, cleans up IRC species, and clears pending pipes."""
Expand Down
1 change: 1 addition & 0 deletions arc/settings/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -272,6 +272,7 @@
inconsistency_ab = 0.3 # maximum allowed inconsistency between consecutive points in the scan given as a fraction
# of the maximum scan energy. Default: 30%
max_rotor_trsh = 4 # maximum number of times to troubleshoot the same rotor scan
max_ess_trsh = 25 # maximum number of times to troubleshoot the same ESS job (opt, sp, freq, etc.)

# Thresholds for identifying significant changes in bond distance, bond angle,
# or torsion angle during a rotor scan. For a TS, only 'bond' and 'torsion' are considered.
Expand Down
Loading