Source code for gaddlemaps.components._system

#    Gaddlemaps python module.
#    Copyright (C) 2019-2021 José Manuel Otero Mato, Hadrián Montes Campos, Luis Miguel Varela Cabo
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU Affero General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU Affero General Public License for more details.
#
#    You should have received a copy of the GNU Affero General Public License
#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
"""
This submodule defines the System and SystemGro classes.
"""

from io import TextIOWrapper
import typing
from collections import Counter
from more_itertools import islice_extended, last
from typing import Dict, Generator, List, Mapping, Tuple, Union, overload

import numpy as np

from ..parsers import open_coordinate_file
from . import AtomGro, Molecule, MoleculeTop, Residue


[docs]class System:
    """
    Class to manage simulation systems.

    A System object is formed by Molecule objects. Only the molecules
    corresponding to the input ftops will be loaded. The input files can be
    paths to files or opened files.

    Parameters
    ----------
    fgro : string or TextIOWrapper
        Gromacs file with the system information.
    *ftops : string or TextIOWrapper
        Paths with the files with the bonds information to load molecules.

    Raises
    ------
    IOError
        If one of the topology files do not match with any molecule in the
        system.

    """
    def __init__(self, fgro: Union[str, TextIOWrapper],
                 *ftops: Union[str, TextIOWrapper]):
        self.system_gro = SystemGro(fgro)
        self.different_molecules: List[Molecule] = []
        # (ind_mol, ind_gro_start, ammount)
        self._molecules_ordered: List[List[int]] = []
        self._available_mgro_ordered = np.array(list(self.system_gro.molecules_info_ordered_all))
        for ftop in ftops:
            self.add_ftop(ftop)

    def __str__(self) -> str:
        if not self.composition:
            return 'Simulation system with no loaded molecules.'
        string = 'Simulation system with:\n\n'
        string += '\n'.join(["{:6}: {}".format(k, v)
                             for k, v in sorted(self.composition.items())])
        return string

    __repr__ = __str__

    def __iter__(self) -> Generator[Molecule, None, None]:
        for index, gro_start, gro_end in self._molecules_ordered_all_gen():
            residues = self.system_gro[gro_start:gro_end]  # type: ignore
            mol = self.different_molecules[index].copy(residues)
            yield mol

    @overload
    def __getitem__(self, index: int) -> Molecule:
        ...
    @overload
    def __getitem__(self, index: slice) -> List[Molecule]:
        ...
    def __getitem__(self, index):
        try:
            if isinstance(index, slice):
                molecules = []
                info = list(self._molecules_ordered_all_gen())
                for info in islice_extended(self._molecules_ordered_all_gen(),
                                            index.start, index.stop,
                                            index.step):
                    itp_index, gro_start, gro_end = info
                    residues = self.system_gro[gro_start:gro_end]
                    mol = self.different_molecules[itp_index].copy(residues)
                    molecules.append(mol)
                return molecules
            elif isinstance(index, int):
                if index == -1:
                    info = last(self._molecules_ordered_all_gen())
                else:
                    info = next(islice_extended(self._molecules_ordered_all_gen(),
                                                index, index+1))
                itp_index, gro_start, gro_end = info
                residues = self.system_gro[gro_start:gro_end]
                mol = self.different_molecules[itp_index].copy(residues)
                return mol
            else:
                raise TypeError(('System indices must be integers or slices,'
                                 f' not {type(index)}.'))
        except StopIteration:
            raise IndexError('Molecule index out of range')

    def __len__(self) -> int:
        return sum(elem[2] for elem in self._molecules_ordered)

    def _check_index_in_available_mgro(self, index_array: np.ndarray,
                                       mol_itp: MoleculeTop) -> int:
        start_index = None
        len_array = len(index_array)
        for mgro_index, mgro_pk in enumerate(self._available_mgro_ordered):
            if mgro_pk == index_array[0]:
                if (self._available_mgro_ordered[mgro_index:mgro_index+len_array] == index_array).all():
                    start_index = mgro_index
                    break
        if start_index is None:
            raise IOError(('The sequence of residues found for '
                           f'{mol_itp.name} is not found in the gro file, so '
                           'the molecule is not recognized in the system.'))
        return start_index

    def _find_all_molecules_and_replace(self, index_mol_gro: np.ndarray,
                                        mol_index: int,
                                        start_index: int = 0):
        av_gro = self._available_mgro_ordered
        l_index_mol = len(index_mol_gro)
        new_block = True
        while (start_index+l_index_mol) <= len(av_gro):
            if (av_gro[start_index:start_index+l_index_mol] == index_mol_gro).all():
                if new_block:
                    self._molecules_ordered.append([mol_index, start_index, 1])
                    new_block = False
                else:
                    self._molecules_ordered[-1][2] += 1
                av_gro[start_index:start_index+l_index_mol] = -1
                start_index += l_index_mol
            else:
                new_block = True
                start_index += 1

    def _molecules_ordered_all_gen(self) -> Generator[Tuple[int, int, int],
                                                      None, None]:
        for index, gro_start, ammount in self._molecules_ordered:
            len_mol = len(self.different_molecules[index].resnames)
            for i in range(ammount):
                yield (index, gro_start+i*len_mol, gro_start+(i+1)*len_mol)

[docs]    def add_ftop(self, ftop: Union[str, TextIOWrapper]):
        """
        Adds and identifies the molecule from the ftop to the system.
        
        Parameters
        ----------
        ftop : str or TextIOWrapper
            Path to the file (or opened file) with the topology of the molecule.
        """
        self.add_molecule_top(MoleculeTop(ftop))

[docs]    def add_molecule_top(self, mol_top: MoleculeTop):
        """
        Adds a molecule to the system and find it in the gro file.
        """
        # Check if all the residues in the itp are in the gro file
        index_mol_gro = []
        gro_mols_resnames = self.system_gro.molecules_resname_len_index
        for resname_len_top in mol_top.resname_len_list:
            if resname_len_top not in gro_mols_resnames:
                raise IOError(f'The molecule {mol_top.name} is not in the gro file')
            index_mol_gro.append(gro_mols_resnames[resname_len_top])
        # Check if the resnames appear in the same order in the gro file.
        index_mol_gro = np.array(index_mol_gro)
        start_index = self._check_index_in_available_mgro(index_mol_gro,
                                                          mol_top)
        # Try to init the molecule
        residues = self.system_gro[start_index:start_index + len(index_mol_gro)]
        molecule = Molecule(mol_top, residues)
        mol_index = len(self.different_molecules)
        self.different_molecules.append(molecule)
        self._find_all_molecules_and_replace(index_mol_gro, mol_index,
                                             start_index)
        # Sort the molecules in ordered_molecules
        self._molecules_ordered.sort(key=lambda x: x[1])

    @property
    def composition(self) -> typing.Counter[str]:
        """
        Counter of str: int : For each molecule name (key), how many
            molecules there are (value).
        """
        composition: typing.Counter[str] = Counter()
        for index, _, ammount in self._molecules_ordered:
            composition[self.different_molecules[index].name] += ammount
        return composition

    @property
    def fgro(self) -> str:
        return self.system_gro.fgro


[docs]class SystemGro:
    """
    Class to work with the information in gro files.

    Basically this class acts like a list of Residue objects. Only one Residue
    instance of each type is loaded to afford memory for large systems. The
    positions of the rest of the residues are storage and they are generated
    when requested.

    Parameters
    ----------
    fgro : string or TextIOWrapper
        Gromacs file path or open file with the system information.

    """
    def __init__(self, fgro: Union[str, TextIOWrapper]):
        self.fgro = fgro if isinstance(fgro, str) else fgro.name
        self._open_fgro = open_coordinate_file(fgro)
        self.different_molecules: List[Residue] = []
        self._molecules_pk: Dict[Tuple[str, int], int] = {}   # Dictionary with {(resname, len(mol)): index}
        self._molecules_ordered: List[int] = []
        self._parse_gro()

    def __str__(self) -> str:
        string = 'Simulation system with:\n\n'
        string += '\n'.join(["{:6}: {}".format(k, v)
                             for k, v in sorted(self.composition.items())])
        return string

    __repr__ = __str__

    def __iter__(self) -> Generator[Residue, None, None]:
        for _, start, len_mol in self._molecules_ordered_all_gen():
            self._open_fgro.seek_atom(start)
            yield Residue([AtomGro(next(self._open_fgro))
                           for _ in range(len_mol)])

    def __del__(self):
        self._open_fgro.close()

    @overload
    def __getitem__(self, index: int) -> Residue:
        ...
    @overload
    def __getitem__(self, index: slice) -> List[Residue]:
        ...
    def __getitem__(self, index):
        try:
            if isinstance(index, slice):
                molecules = []
                for _, start, len_mol in islice_extended(self._molecules_ordered_all_gen(),
                                                        index.start, index.stop,
                                                        index.step):
                    self._open_fgro.seek_atom(start)
                    molecules.append(Residue([AtomGro(next(self._open_fgro))
                                              for _ in range(len_mol)]))
                return molecules
            if isinstance(index, int):
                if index == -1:
                    info = last(self._molecules_ordered_all_gen())
                else:
                    info = next(islice_extended(self._molecules_ordered_all_gen(),
                                                index, index+1))
                _, start, len_mol = info
                self._open_fgro.seek_atom(start)
                return Residue([AtomGro(next(self._open_fgro))
                                for _ in range(len_mol)])
            else:
                raise TypeError(('SystemGro indices must be integers or slices,'
                                 f' not {type(index)}.'))
        except StopIteration:
            raise IndexError('Residue index out of range')

    def __len__(self) -> int:
        return sum(elem[1] for elem in self._pk_ammount_ordered_gen())

    def _parse_gro(self):
        current_residue = [AtomGro(next(self._open_fgro))]
        prev_atom_residname = current_residue[0].residname
        for line in self._open_fgro:
            atom = AtomGro(line)
            if atom.residname == prev_atom_residname:
                current_residue.append(atom)
            else:
                self._add_residue_init(Residue(current_residue))
                current_residue = [atom]
                prev_atom_residname = current_residue[0].residname
        self._add_residue_init(Residue(current_residue))

    def _add_residue_init(self, residue: Residue):
        key = (residue.resname, len(residue))
        if residue not in self.different_molecules:
            self.different_molecules.append(residue)
            index = len(self.different_molecules) - 1
            self._molecules_pk[key] = index
        index = self._molecules_pk[key]

        if not self._molecules_ordered or self._molecules_ordered[-2] != index:
            self._molecules_ordered += [index, 1]
        else:
            self._molecules_ordered[-1] += 1

    def _pk_ammount_ordered_gen(self) -> Generator[Tuple[int, int], None, None]:
        for i in range(0, len(self._molecules_ordered), 2):
            yield (self._molecules_ordered[i], self._molecules_ordered[i+1])

    def _molecules_ordered_all_gen(self) -> Generator[Tuple[int, int, int], None, None]:
        start_atom = 0
        for index, ammount in self._pk_ammount_ordered_gen():
            len_mol = len(self.different_molecules[index])
            for _ in range(ammount):
                yield (index, start_atom, len_mol)
                start_atom += len_mol

    @property
    def n_atoms(self) -> int:
        """
        int : The number of atoms in the system.
        """
        return self._open_fgro.natoms

    @property
    def box_matrix(self) -> np.ndarray:
        """
        numpy.ndarray (3,3) : the 3 lattice vectors of the box.
        """
        return self._open_fgro.box_matrix

    @property
    def comment_line(self) -> str:
        """
        str: The comment line in the gro file.
        """
        return self._open_fgro.comment

    @property
    def molecules_info_ordered_all(self) -> Generator[int, None, None]:
        """
        generator of int: Returns the index of the molecules in the
            different_molecules attribute in order of appearance.
        """
        for index, ammount in self._pk_ammount_ordered_gen():
            for _ in range(ammount):
                yield index

    @property
    def molecules_resname_len_index(self) -> Dict[Tuple[str, int], int]:
        """
        dict of tuple (string, int) to int: The keys of the dictionary are
            tuples with the residue name and the number of atoms of the
            different residues in the system and the values are its index in
            the list of different molecules.
        """
        return self._molecules_pk

    @property
    def composition(self) -> Mapping[str, int]:
        """
        Counter of str: int : For each resname (key), how many molecules
            there are (value).
        """
        composition: Mapping[str, int] = Counter()
        for index, ammount in self._pk_ammount_ordered_gen():
            composition[self.different_molecules[index].resname] += ammount  # type: ignore
        return composition