Source code for gmso.formats.gro

"""Read and write Gromos87 (.GRO) file format."""

import datetime
import logging
from pathlib import Path
from typing import Union

import numpy as np
import unyt as u
from unyt.array import allclose_units

import gmso
from gmso.core.atom import Atom
from gmso.core.box import Box
from gmso.core.topology import Topology
from gmso.formats.formats_registry import loads_as, saves_as

logger = logging.getLogger(__name__)



[docs]
@loads_as(".gro")
def read_gro(filename: Union[str, Path]) -> Topology:
    """Read a Gromos87 (GRO) file and return a :class:`~gmso.Topology`.

    The GRO format is a plain-text structure file used by the GROMACS
    simulation engine.  It encodes simulation-box parameters, atom count,
    residue/atom names, positions, and (optionally) velocities.

    Parameters
    ----------
    filename : str or pathlib.Path
        Path to the ``.gro`` file to read.

    Returns
    -------
    gmso.Topology
        Topology containing the sites and box parsed from *filename*.

    Notes
    -----
    * GRO files do not specify bonds; the returned topology will have no
      connections.
    * Velocities are detected but not parsed.
    * Only single-frame GRO files are supported.
    * Residue/resid information is mapped to ``site.molecule`` and
      ``site.residue`` but the original resid integers are rebased to
      0-indexed integers.
    """
    top = Topology()

    with open(filename, "r") as gro_file:
        top.name = str(gro_file.readline().strip())
        n_atoms = int(gro_file.readline())
        coords = u.nm * np.zeros(shape=(n_atoms, 3))
        for row, _ in enumerate(coords):
            line = gro_file.readline()
            if not line:
                msg = (
                    "Incorrect number of lines in .gro file. Based on the "
                    "number in the second line of the file, {} rows of"
                    "atoms were expected, but at least one fewer was found."
                )
                raise ValueError(msg.format(n_atoms))
            res_id = int(line[:5].strip()) - 1  # reformat from 1 to 0 index in gmso
            res_name = line[5:10].strip()
            atom_name = line[10:15].strip()

            positions = line[20:].split()
            coords[row] = u.nm * np.array(
                [
                    float(positions[0]),
                    float(positions[1]),
                    float(positions[2]),
                ]
            )
            site = Atom(name=atom_name, position=coords[row])

            site.molecule = (res_name, res_id)
            site.residue = (res_name, res_id)
            top.add_site(site, update_types=False)

        if len(positions) == 6:
            logger.info("Velocity information presents but will not be parsed.")
        top.update_topology()

        # Box information
        line = gro_file.readline().split()
        top.box = Box(u.nm * np.array([float(val) for val in line[:3]]))

        # Verify we have read the last line by ensuring the next line in blank
        line = gro_file.readline()
        if line:
            msg = (
                "Incorrect number of lines in input file. Based on the "
                "number in the second line of the file, {} rows of atoms "
                "were expected, but at least one more was found."
            )
            raise ValueError(msg.format(n_atoms))

    return top




[docs]
@saves_as(".gro")
def write_gro(
    top: Topology,
    filename: Union[str, Path],
    n_decimals: int = 3,
    shift_coord: bool = False,
) -> None:
    """Write a :class:`~gmso.Topology` to a Gromos87 (GRO) file.

    Parameters
    ----------
    top : gmso.Topology
        Topology to write.
    filename : str or pathlib.Path
        Destination file path.
    n_decimals : int, optional, default=3
        Number of decimal places to use for coordinate values.
    shift_coord : bool, optional, default=False
        When ``True``, shift all coordinates so that the minimum position in
        each dimension is zero.  Useful for visualisation; not required by the
        GRO format.

    Notes
    -----
    * Velocities are not written.
    * Residue/resid indices cycle back at 99 999 to comply with the fixed-width
      GRO column format.
    """
    pos_array = np.ndarray.copy(top.positions)
    if shift_coord:
        pos_array = _validate_positions(pos_array)

    with open(filename, "w") as out_file:
        out_file.write(
            "{} written by GMSO {} at {}\n".format(
                top.name if top.name is not None else "",
                gmso.__version__,
                str(datetime.datetime.now()),
            )
        )
        out_file.write("{:d}\n".format(top.n_sites))
        out_file.write(_prepare_atoms(top, pos_array, n_decimals))
        out_file.write(_prepare_box(top))



def _validate_positions(pos_array):
    """Modify coordinates, as necessary, to fit limitations of the GRO format."""
    if np.min(pos_array) < 0:
        logger.info(
            "Topology contains some negative positions. Translating "
            "in order to ensure all coordinates are non-negative."
        )
    min_xyz = np.min(pos_array, axis=0)
    min_xyz0 = np.where(min_xyz < 0 * min_xyz.units, min_xyz, 0 * min_xyz.units)

    pos_array -= min_xyz0

    return pos_array


def _prepare_atoms(top, updated_positions, n_decimals):
    out_str = str()
    logger.info(
        "Residue information is parsed from site.molecule,"
        "or site.residue if site.molecule does not exist."
        "Note that the residue idx will be bumped by 1 since GROMACS utilize 1-index."
    )
    # we need to sort through the sites to provide a unique number for each molecule/residue
    # we will store the unique id in dictionary where the key is the idx
    site_res_id = dict()
    seen = dict()
    for idx, site in enumerate(top.sites):
        if site.molecule:
            if site.molecule not in seen:
                seen[site.molecule] = len(seen) + 1
            site_res_id[idx] = seen[site.molecule]
        elif site.residue:
            if site.residue not in seen:
                seen[site.residue] = len(seen) + 1
            site_res_id[idx] = seen[site.residue]
        else:
            if "MOL" not in seen:
                seen["MOL"] = len(seen) + 1
            site_res_id[idx] = seen["MOL"]

    for idx, (site, pos) in enumerate(zip(top.sites, updated_positions)):
        if site.molecule:
            res_id = site_res_id[idx]
            res_name = (
                site.molecule.name
                if len(site.molecule.name) <= 5
                else site.molecule.name[:5]
            )

            site.label = f"res_id: {res_id}, " + site.label
        elif site.residue:
            res_id = site_res_id[idx]
            res_name = (
                site.residue.name
                if len(site.residue.name) <= 5
                else site.residue.name[:5]
            )
            site.label = f"res_id: {res_id}, " + site.label
        else:
            res_id = site_res_id[idx]
            res_name = "MOL"

            site.label = f"res_id: {res_id}, " + site.label

        atom_name = site.name if len(site.name) <= 5 else site.name[:5]
        atom_id = idx + 1

        # gromacs doesn't actually use the atom id in the .gro file
        # so we will just loop back to 1 once we exceed 99999
        # as is suggested in the FAQ in the manual.

        max_val = 99999
        atom_id = atom_id % max_val
        res_id = res_id % max_val

        varwidth = 5 + n_decimals
        crdfmt = f"{{:{varwidth}.{n_decimals}f}}"

        # preformat pos str
        crt_x = crdfmt.format(pos[0].in_units(u.nm).value)[:varwidth]
        crt_y = crdfmt.format(pos[1].in_units(u.nm).value)[:varwidth]
        crt_z = crdfmt.format(pos[2].in_units(u.nm).value)[:varwidth]
        out_str = out_str + "{0:5d}{1:5s}{2:>5s}{3:5d}{4}{5}{6}\n".format(
            res_id,
            res_name,
            atom_name,
            atom_id,
            crt_x,
            crt_y,
            crt_z,
        )

    return out_str


def _prepare_box(top):
    out_str = str()
    if allclose_units(
        top.box.angles,
        u.degree * [90, 90, 90],
        rtol=1e-5,
        atol=0.1 * u.degree,
    ):
        out_str = out_str + " {:0.5f} {:0.5f} {:0.5f}\n".format(
            top.box.lengths[0].in_units(u.nm).value.round(6),
            top.box.lengths[1].in_units(u.nm).value.round(6),
            top.box.lengths[2].in_units(u.nm).value.round(6),
        )
    else:
        # TODO: Work around GROMACS's triclinic limitations #30
        vectors = top.box.get_vectors()
        out_str = (
            out_str
            + " {:0.5f} {:0.5f} {:0.5f} {:0.5f} {:0.5f} {:0.5f} {:0.5f} {:0.5f} {:0.5f} \n".format(
                vectors[0, 0].in_units(u.nm).value.round(6),
                vectors[1, 1].in_units(u.nm).value.round(6),
                vectors[2, 2].in_units(u.nm).value.round(6),
                vectors[0, 1].in_units(u.nm).value.round(6),
                vectors[0, 2].in_units(u.nm).value.round(6),
                vectors[1, 0].in_units(u.nm).value.round(6),
                vectors[1, 2].in_units(u.nm).value.round(6),
                vectors[2, 0].in_units(u.nm).value.round(6),
                vectors[2, 1].in_units(u.nm).value.round(6),
            )
        )
    return out_str