Source code for gmso.formats.mol2

"""Convert to and from a TRIPOS mol2 file."""

import datetime
import itertools
import logging
from pathlib import Path
from typing import TYPE_CHECKING, Union

import unyt as u

if TYPE_CHECKING:
    from gmso.core.topology import Topology

from gmso import Atom, Bond, Box, Topology
from gmso import __version__ as gmso_version
from gmso.abc.abstract_site import Molecule, Residue
from gmso.core.element import element_by_name, element_by_symbol
from gmso.formats.formats_registry import loads_as, saves_as

logger = logging.getLogger(__name__)


[docs] @loads_as(".mol2") def read_mol2( filename: Union[str, Path], site_type: str = "atom", verbose: bool = False, ) -> "Topology": """Read a TRIPOS MOL2 file and return a :class:`~gmso.Topology`. Parses sites, bonds, and box information from a MOL2 file. Forcefield parameters present in the file are not converted. Parameters ---------- filename : str or pathlib.Path Path to the ``.mol2`` file. site_type : str, optional, default='atom' Controls element identification. Use ``'atom'`` to read element symbols from the file; use ``'lj'`` to skip element lookup and store the raw site name instead (useful for coarse-grained systems). verbose : bool, optional, default=False When ``True``, emit warnings for any assumptions made during parsing. Returns ------- gmso.Topology Topology containing the sites, bonds, and box from *filename*. Notes ----- Atom positions in MOL2 files are assumed to be in Angstroms. """ mol2path = Path(filename) if not mol2path.exists(): msg = "Provided path to file that does not exist" raise FileNotFoundError(msg) # Initialize topology topology = Topology(name=mol2path.stem) # save the name from the filename with open(filename, "r") as f: fcontents = f.readlines() sections = {"Meta": list()} section_key = "Meta" # Used to parse the meta info at top of the file for line in fcontents: if "@<TRIPOS>" in line: section_key = line.strip("\n") sections[section_key] = list() else: sections[section_key].append(line) _parse_site = _parse_lj if site_type == "lj" else _parse_atom supported_rti = { "@<TRIPOS>ATOM": _parse_site, "@<TRIPOS>BOND": _parse_bond, "@<TRIPOS>CRYSIN": _parse_box, "@<TRIPOS>FF_PBC": _parse_box, "@<TRIPOS>MOLECULE": _parse_molecule, } for section in sections: if section not in supported_rti: logger.info( f"The record type indicator {section} is not supported. " "Skipping current section and moving to the next RTI header." ) else: supported_rti[section](topology, sections[section], verbose) # TODO: read in parameters to correct attribute as well. This can be saved in various rti sections. return topology
[docs] @saves_as(".mol2") def write_mol2( top: "Topology", filename: Union[str, Path], n_decimals: int = 4, ) -> None: """Write a :class:`~gmso.Topology` to a TRIPOS MOL2 file. Parameters ---------- top : gmso.Topology Topology to write. filename : str or pathlib.Path Destination file path. n_decimals : int, optional, default=4 Number of decimal places written for each coordinate value. Returns ------- None Writes the MOL2 file to *filename* in place. """ with open(filename, "w") as out_file: out_file.write( "{} written by GMSO {} at {}\n".format( top.name if top.name is not None else "", gmso_version, str(datetime.datetime.now()), ) ) _write_molecule_info(top, out_file) _write_sites_info(top, out_file, n_decimals) _write_bonds_info(top, out_file) _write_box_info(top, out_file)
def _parse_lj(top, section, verbose): """Parse atom of lj style from mol2 file.""" for line in section: if line.strip(): content = line.split() position = [float(x) for x in content[2:5]] * u.Å try: charge = float(content[8]) except IndexError: if verbose: logger.info( f"No charge was detected for site {content[1]} with index {content[0]}" ) charge = None atom = Atom( name=content[1], position=position.to("nm"), charge=charge, residue=(content[7], int(content[6])), ) top.add_site(atom) def _parse_atom(top, section, verbose): """Parse atom information from the mol2 file.""" def parse_ele(*symbols): methods = [element_by_name, element_by_symbol] elem = None for symbol, method in itertools.product(symbols, methods): elem = method(symbol) if elem: break return elem for line in section: if line.strip(): content = line.split() position = [float(x) for x in content[2:5]] * u.Å element = parse_ele(content[5], content[1]) if not element and verbose: logger.info( f"No element detected for site {content[1]} with index {content[0]}, " "consider manually adding the element to the topology" ) try: charge = float(content[8]) except IndexError: if verbose: logger.info( f"No charge was detected for site {content[1]} with index {content[0]}" ) charge = None molecule = top.label if top.__dict__.get("label") else top.name atom = Atom( name=content[1], position=position.to("nm"), element=element, charge=charge, residue=Residue(name=content[7], number=int(content[6])), molecule=Molecule(name=molecule, number=0), ) top.add_site(atom) def _parse_bond(top, section, verbose): """Parse bond information from the mol2 file.""" for line in section: if line.strip(): content = line.split() bond = Bond( connection_members=tuple( sorted( [ top.sites[int(content[1]) - 1], top.sites[int(content[2]) - 1], ], key=lambda x: top.get_index(x), ) ) ) top.add_connection(bond) def _parse_box(top, section, verbose): """Parse box information from the mol2 file.""" if top.box: logger.info( f"This mol2 file has two boxes to be read in, only reading in one with dimensions {top.box}" ) for line in section: if line.strip(): content = line.split() top.box = Box( lengths=[float(x) for x in content[0:3]] * u.Å, angles=[float(x) for x in content[3:6]] * u.degree, ) def _parse_molecule(top, section, verbose): """Parse molecule information from the mol2 file.""" top.label = str(section[0].strip()) def _write_sites_info(top, out_file, n_decimals): """Write site information to ATOM section.""" # TODO: Create rules to make sure nothing is too long, so that it cuts off. # ATOM top.sites # @<TRIPOS>ATOM # 1 C -0.7600 1.1691 -0.0005 C.ar 1 BENZENE 0.000 out_file.write("@<TRIPOS>ATOM\n") for index, site in enumerate(top.sites): lineList = [ str(index + 1), site.element.symbol, *map(str, site.position.in_units("angstrom").value.round(n_decimals)), site.atom_type.name if site.atom_type else site.name, str(site.molecule.number), site.molecule.name, str(site.charge) if site.charge else "0.00", ] formattedStr = "\t".join(lineList) + "\n" out_file.writelines(formattedStr) def _write_bonds_info(top, out_file): """writes the bonds in a topology with assigned atom number.""" # TODO: need to add bond type info as well # TODO: double vs single bonds? out_file.write("@<TRIPOS>BOND\n") atom_id = {site: idx + 1 for idx, site in enumerate(top.sites)} for index, bond in enumerate(top.bonds): idx1 = atom_id[bond.connection_members[0]] idx2 = atom_id[bond.connection_members[1]] idx1, idx2 = min(idx1, idx2), max(idx1, idx2) bond_info_str = f"{index + 1} {idx1} {idx2} 1\n" out_file.write(bond_info_str) def _write_molecule_info(top, out_file): # TODO: This should check the last three integers to write them correctly # TODO: This should handle different molecule types out_file.write("@<TRIPOS>MOLECULE\n") name = top.name num_atoms = top.n_sites num_bonds = top.n_bonds out_file.write(f"{name}\n{num_atoms} {num_bonds} 1 0 0\n") def _write_box_info(top, out_file): # @<TRIPOS>SUBSTRUCTURE # 1 **** 1 TEMP 0 **** **** 0 ROOT pass