Source code for gmso.formats.gro

"""Read and write Gromos87 (.GRO) file format."""

import datetime
import logging
from pathlib import Path
from typing import Union

import numpy as np
import unyt as u
from unyt.array import allclose_units

import gmso
from gmso.core.atom import Atom
from gmso.core.box import Box
from gmso.core.topology import Topology
from gmso.formats.formats_registry import loads_as, saves_as

logger = logging.getLogger(__name__)


[docs] @loads_as(".gro") def read_gro(filename: Union[str, Path]) -> Topology: """Read a Gromos87 (GRO) file and return a :class:`~gmso.Topology`. The GRO format is a plain-text structure file used by the GROMACS simulation engine. It encodes simulation-box parameters, atom count, residue/atom names, positions, and (optionally) velocities. Parameters ---------- filename : str or pathlib.Path Path to the ``.gro`` file to read. Returns ------- gmso.Topology Topology containing the sites and box parsed from *filename*. Notes ----- * GRO files do not specify bonds; the returned topology will have no connections. * Velocities are detected but not parsed. * Only single-frame GRO files are supported. * Residue/resid information is mapped to ``site.molecule`` and ``site.residue`` but the original resid integers are rebased to 0-indexed integers. """ top = Topology() with open(filename, "r") as gro_file: top.name = str(gro_file.readline().strip()) n_atoms = int(gro_file.readline()) coords = u.nm * np.zeros(shape=(n_atoms, 3)) for row, _ in enumerate(coords): line = gro_file.readline() if not line: msg = ( "Incorrect number of lines in .gro file. Based on the " "number in the second line of the file, {} rows of" "atoms were expected, but at least one fewer was found." ) raise ValueError(msg.format(n_atoms)) res_id = int(line[:5].strip()) - 1 # reformat from 1 to 0 index in gmso res_name = line[5:10].strip() atom_name = line[10:15].strip() positions = line[20:].split() coords[row] = u.nm * np.array( [ float(positions[0]), float(positions[1]), float(positions[2]), ] ) site = Atom(name=atom_name, position=coords[row]) site.molecule = (res_name, res_id) site.residue = (res_name, res_id) top.add_site(site, update_types=False) if len(positions) == 6: logger.info("Velocity information presents but will not be parsed.") top.update_topology() # Box information line = gro_file.readline().split() top.box = Box(u.nm * np.array([float(val) for val in line[:3]])) # Verify we have read the last line by ensuring the next line in blank line = gro_file.readline() if line: msg = ( "Incorrect number of lines in input file. Based on the " "number in the second line of the file, {} rows of atoms " "were expected, but at least one more was found." ) raise ValueError(msg.format(n_atoms)) return top
[docs] @saves_as(".gro") def write_gro( top: Topology, filename: Union[str, Path], n_decimals: int = 3, shift_coord: bool = False, ) -> None: """Write a :class:`~gmso.Topology` to a Gromos87 (GRO) file. Parameters ---------- top : gmso.Topology Topology to write. filename : str or pathlib.Path Destination file path. n_decimals : int, optional, default=3 Number of decimal places to use for coordinate values. shift_coord : bool, optional, default=False When ``True``, shift all coordinates so that the minimum position in each dimension is zero. Useful for visualisation; not required by the GRO format. Notes ----- * Velocities are not written. * Residue/resid indices cycle back at 99 999 to comply with the fixed-width GRO column format. """ pos_array = np.ndarray.copy(top.positions) if shift_coord: pos_array = _validate_positions(pos_array) with open(filename, "w") as out_file: out_file.write( "{} written by GMSO {} at {}\n".format( top.name if top.name is not None else "", gmso.__version__, str(datetime.datetime.now()), ) ) out_file.write("{:d}\n".format(top.n_sites)) out_file.write(_prepare_atoms(top, pos_array, n_decimals)) out_file.write(_prepare_box(top))
def _validate_positions(pos_array): """Modify coordinates, as necessary, to fit limitations of the GRO format.""" if np.min(pos_array) < 0: logger.info( "Topology contains some negative positions. Translating " "in order to ensure all coordinates are non-negative." ) min_xyz = np.min(pos_array, axis=0) min_xyz0 = np.where(min_xyz < 0 * min_xyz.units, min_xyz, 0 * min_xyz.units) pos_array -= min_xyz0 return pos_array def _prepare_atoms(top, updated_positions, n_decimals): out_str = str() logger.info( "Residue information is parsed from site.molecule," "or site.residue if site.molecule does not exist." "Note that the residue idx will be bumped by 1 since GROMACS utilize 1-index." ) # we need to sort through the sites to provide a unique number for each molecule/residue # we will store the unique id in dictionary where the key is the idx site_res_id = dict() seen = dict() for idx, site in enumerate(top.sites): if site.molecule: if site.molecule not in seen: seen[site.molecule] = len(seen) + 1 site_res_id[idx] = seen[site.molecule] elif site.residue: if site.residue not in seen: seen[site.residue] = len(seen) + 1 site_res_id[idx] = seen[site.residue] else: if "MOL" not in seen: seen["MOL"] = len(seen) + 1 site_res_id[idx] = seen["MOL"] for idx, (site, pos) in enumerate(zip(top.sites, updated_positions)): if site.molecule: res_id = site_res_id[idx] res_name = ( site.molecule.name if len(site.molecule.name) <= 5 else site.molecule.name[:5] ) site.label = f"res_id: {res_id}, " + site.label elif site.residue: res_id = site_res_id[idx] res_name = ( site.residue.name if len(site.residue.name) <= 5 else site.residue.name[:5] ) site.label = f"res_id: {res_id}, " + site.label else: res_id = site_res_id[idx] res_name = "MOL" site.label = f"res_id: {res_id}, " + site.label atom_name = site.name if len(site.name) <= 5 else site.name[:5] atom_id = idx + 1 # gromacs doesn't actually use the atom id in the .gro file # so we will just loop back to 1 once we exceed 99999 # as is suggested in the FAQ in the manual. max_val = 99999 atom_id = atom_id % max_val res_id = res_id % max_val varwidth = 5 + n_decimals crdfmt = f"{{:{varwidth}.{n_decimals}f}}" # preformat pos str crt_x = crdfmt.format(pos[0].in_units(u.nm).value)[:varwidth] crt_y = crdfmt.format(pos[1].in_units(u.nm).value)[:varwidth] crt_z = crdfmt.format(pos[2].in_units(u.nm).value)[:varwidth] out_str = out_str + "{0:5d}{1:5s}{2:>5s}{3:5d}{4}{5}{6}\n".format( res_id, res_name, atom_name, atom_id, crt_x, crt_y, crt_z, ) return out_str def _prepare_box(top): out_str = str() if allclose_units( top.box.angles, u.degree * [90, 90, 90], rtol=1e-5, atol=0.1 * u.degree, ): out_str = out_str + " {:0.5f} {:0.5f} {:0.5f}\n".format( top.box.lengths[0].in_units(u.nm).value.round(6), top.box.lengths[1].in_units(u.nm).value.round(6), top.box.lengths[2].in_units(u.nm).value.round(6), ) else: # TODO: Work around GROMACS's triclinic limitations #30 vectors = top.box.get_vectors() out_str = ( out_str + " {:0.5f} {:0.5f} {:0.5f} {:0.5f} {:0.5f} {:0.5f} {:0.5f} {:0.5f} {:0.5f} \n".format( vectors[0, 0].in_units(u.nm).value.round(6), vectors[1, 1].in_units(u.nm).value.round(6), vectors[2, 2].in_units(u.nm).value.round(6), vectors[0, 1].in_units(u.nm).value.round(6), vectors[0, 2].in_units(u.nm).value.round(6), vectors[1, 0].in_units(u.nm).value.round(6), vectors[1, 2].in_units(u.nm).value.round(6), vectors[2, 0].in_units(u.nm).value.round(6), vectors[2, 1].in_units(u.nm).value.round(6), ) ) return out_str