This issue is part of a Codex global repository code scan.
parse_sqm_out() initializes forces as an empty list, converts it to np.array([forces]), and then checks len(forces) > 0. For files without a force block, this produces an array with shape (1, 0), so the parser stores data["forces"] even though no forces exist.
Affected code:
|
def parse_sqm_out(fname: FileType): |
|
"""Read atom symbols, charges and coordinates from ambertools sqm.out file.""" |
|
atom_symbols = [] |
|
coords = [] |
|
charges = [] |
|
forces = [] |
|
energies = [] |
|
|
|
with open_file(fname) as f: |
|
flag = START |
|
for line in f: |
|
if line.startswith(" Total SCF energy"): |
|
energy = float(line.strip().split()[-2]) |
|
energies = [energy] |
|
elif line.startswith(" Atom Element Mulliken Charge"): |
|
flag = READ_CHARGE |
|
charges = [] |
|
elif line.startswith(" Total Mulliken Charge"): |
|
flag = START |
|
elif line.startswith(" Final Structure"): |
|
flag = READ_COORDS_START |
|
coords = [] |
|
elif line.startswith("QMMM: Forces on QM atoms"): |
|
flag = READ_FORCES |
|
forces = [] |
|
elif flag == READ_CHARGE: |
|
ls = line.strip().split() |
|
atom_symbols.append(ls[-2]) |
|
charges.append(float(ls[-1])) |
|
elif READ_COORDS_START <= flag < READ_COORDS: |
|
flag += 1 |
|
elif flag == READ_COORDS: |
|
coords.append([float(x) for x in line.strip().split()[-3:]]) |
|
if len(coords) == len(charges): |
|
flag = START |
|
elif flag == READ_FORCES: |
|
ll = line.strip() |
|
if not ll.startswith("QMMM: Atm "): |
|
flag = START |
|
continue |
|
forces.append([float(ll[-60:-40]), float(ll[-40:-20]), float(ll[-20:])]) |
|
if len(forces) == len(charges): |
|
flag = START |
|
|
|
data = {} |
|
atom_names, data["atom_types"], atom_numbs = np.unique( |
|
atom_symbols, return_inverse=True, return_counts=True |
|
) |
|
data["charges"] = np.array(charges) |
|
data["atom_names"] = list(atom_names) |
|
data["atom_numbs"] = list(atom_numbs) |
|
data["orig"] = np.array([0, 0, 0]) |
|
data["cells"] = np.array( |
|
[[[100.0, 0.0, 0.0], [0.0, 100.0, 0.0], [0.0, 0.0, 100.0]]] |
|
) |
|
data["nopbc"] = True |
|
data["coords"] = np.array([coords]) |
|
|
|
energies = np.array(energies) |
|
forces = -np.array([forces], dtype=np.float64) * kcal2ev |
|
if len(forces) > 0: |
|
data["energies"] = energies |
|
data["forces"] = forces |
|
|
|
return data |
|
def from_labeled_system(self, fname, **kwargs): |
|
"""Read from ambertools sqm.out.""" |
|
data = dpdata.formats.amber.sqm.parse_sqm_out(fname) |
|
assert "forces" in list(data.keys()), f"No forces in {fname}" |
|
return data |
Minimal reproducer using the existing no-forces fixture:
import dpdata
for cls in [dpdata.System, dpdata.LabeledSystem]:
s = cls("tests/amber/sqm_no_forces.out", fmt="sqm/out")
print(cls.__name__, s.data.keys(), s.data["forces"].shape)
Current output:
System dict_keys([... 'energies', 'forces']) (1, 0)
LabeledSystem dict_keys([... 'energies', 'forces']) (1, 0)
Expected behavior: no-force SQM output should either omit forces or fail as a labeled system. It should not create a malformed (1, 0) forces array that passes the current from_labeled_system() key-existence check.
This issue is part of a Codex global repository code scan.
parse_sqm_out()initializesforcesas an empty list, converts it tonp.array([forces]), and then checkslen(forces) > 0. For files without a force block, this produces an array with shape(1, 0), so the parser storesdata["forces"]even though no forces exist.Affected code:
dpdata/dpdata/formats/amber/sqm.py
Lines 23 to 87 in a7a50bf
dpdata/dpdata/plugins/amber.py
Lines 69 to 73 in a7a50bf
Minimal reproducer using the existing no-forces fixture:
Current output:
Expected behavior: no-force SQM output should either omit
forcesor fail as a labeled system. It should not create a malformed(1, 0)forces array that passes the currentfrom_labeled_system()key-existence check.