# Gaddlemaps python module.
# Copyright (C) 2019-2021 José Manuel Otero Mato, Hadrián Montes Campos, Luis Miguel Varela Cabo
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
"""
This submodule defines the System and SystemGro classes.
"""
from io import TextIOWrapper
import typing
from collections import Counter
from more_itertools import islice_extended, last
from typing import Dict, Generator, List, Mapping, Tuple, Union, overload
import numpy as np
from ..parsers import open_coordinate_file
from . import AtomGro, Molecule, MoleculeTop, Residue
[docs]class System:
"""
Class to manage simulation systems.
A System object is formed by Molecule objects. Only the molecules
corresponding to the input ftops will be loaded. The input files can be
paths to files or opened files.
Parameters
----------
fgro : string or TextIOWrapper
Gromacs file with the system information.
*ftops : string or TextIOWrapper
Paths with the files with the bonds information to load molecules.
Raises
------
IOError
If one of the topology files do not match with any molecule in the
system.
"""
def __init__(self, fgro: Union[str, TextIOWrapper],
*ftops: Union[str, TextIOWrapper]):
self.system_gro = SystemGro(fgro)
self.different_molecules: List[Molecule] = []
# (ind_mol, ind_gro_start, ammount)
self._molecules_ordered: List[List[int]] = []
self._available_mgro_ordered = np.array(list(self.system_gro.molecules_info_ordered_all))
for ftop in ftops:
self.add_ftop(ftop)
def __str__(self) -> str:
if not self.composition:
return 'Simulation system with no loaded molecules.'
string = 'Simulation system with:\n\n'
string += '\n'.join(["{:6}: {}".format(k, v)
for k, v in sorted(self.composition.items())])
return string
__repr__ = __str__
def __iter__(self) -> Generator[Molecule, None, None]:
for index, gro_start, gro_end in self._molecules_ordered_all_gen():
residues = self.system_gro[gro_start:gro_end] # type: ignore
mol = self.different_molecules[index].copy(residues)
yield mol
@overload
def __getitem__(self, index: int) -> Molecule:
...
@overload
def __getitem__(self, index: slice) -> List[Molecule]:
...
def __getitem__(self, index):
try:
if isinstance(index, slice):
molecules = []
info = list(self._molecules_ordered_all_gen())
for info in islice_extended(self._molecules_ordered_all_gen(),
index.start, index.stop,
index.step):
itp_index, gro_start, gro_end = info
residues = self.system_gro[gro_start:gro_end]
mol = self.different_molecules[itp_index].copy(residues)
molecules.append(mol)
return molecules
elif isinstance(index, int):
if index == -1:
info = last(self._molecules_ordered_all_gen())
else:
info = next(islice_extended(self._molecules_ordered_all_gen(),
index, index+1))
itp_index, gro_start, gro_end = info
residues = self.system_gro[gro_start:gro_end]
mol = self.different_molecules[itp_index].copy(residues)
return mol
else:
raise TypeError(('System indices must be integers or slices,'
f' not {type(index)}.'))
except StopIteration:
raise IndexError('Molecule index out of range')
def __len__(self) -> int:
return sum(elem[2] for elem in self._molecules_ordered)
def _check_index_in_available_mgro(self, index_array: np.ndarray,
mol_itp: MoleculeTop) -> int:
start_index = None
len_array = len(index_array)
for mgro_index, mgro_pk in enumerate(self._available_mgro_ordered):
if mgro_pk == index_array[0]:
if (self._available_mgro_ordered[mgro_index:mgro_index+len_array] == index_array).all():
start_index = mgro_index
break
if start_index is None:
raise IOError(('The sequence of residues found for '
f'{mol_itp.name} is not found in the gro file, so '
'the molecule is not recognized in the system.'))
return start_index
def _find_all_molecules_and_replace(self, index_mol_gro: np.ndarray,
mol_index: int,
start_index: int = 0):
av_gro = self._available_mgro_ordered
l_index_mol = len(index_mol_gro)
new_block = True
while (start_index+l_index_mol) <= len(av_gro):
if (av_gro[start_index:start_index+l_index_mol] == index_mol_gro).all():
if new_block:
self._molecules_ordered.append([mol_index, start_index, 1])
new_block = False
else:
self._molecules_ordered[-1][2] += 1
av_gro[start_index:start_index+l_index_mol] = -1
start_index += l_index_mol
else:
new_block = True
start_index += 1
def _molecules_ordered_all_gen(self) -> Generator[Tuple[int, int, int],
None, None]:
for index, gro_start, ammount in self._molecules_ordered:
len_mol = len(self.different_molecules[index].resnames)
for i in range(ammount):
yield (index, gro_start+i*len_mol, gro_start+(i+1)*len_mol)
[docs] def add_ftop(self, ftop: Union[str, TextIOWrapper]):
"""
Adds and identifies the molecule from the ftop to the system.
Parameters
----------
ftop : str or TextIOWrapper
Path to the file (or opened file) with the topology of the molecule.
"""
self.add_molecule_top(MoleculeTop(ftop))
[docs] def add_molecule_top(self, mol_top: MoleculeTop):
"""
Adds a molecule to the system and find it in the gro file.
"""
# Check if all the residues in the itp are in the gro file
index_mol_gro = []
gro_mols_resnames = self.system_gro.molecules_resname_len_index
for resname_len_top in mol_top.resname_len_list:
if resname_len_top not in gro_mols_resnames:
raise IOError(f'The molecule {mol_top.name} is not in the gro file')
index_mol_gro.append(gro_mols_resnames[resname_len_top])
# Check if the resnames appear in the same order in the gro file.
index_mol_gro = np.array(index_mol_gro)
start_index = self._check_index_in_available_mgro(index_mol_gro,
mol_top)
# Try to init the molecule
residues = self.system_gro[start_index:start_index + len(index_mol_gro)]
molecule = Molecule(mol_top, residues)
mol_index = len(self.different_molecules)
self.different_molecules.append(molecule)
self._find_all_molecules_and_replace(index_mol_gro, mol_index,
start_index)
# Sort the molecules in ordered_molecules
self._molecules_ordered.sort(key=lambda x: x[1])
@property
def composition(self) -> typing.Counter[str]:
"""
Counter of str: int : For each molecule name (key), how many
molecules there are (value).
"""
composition: typing.Counter[str] = Counter()
for index, _, ammount in self._molecules_ordered:
composition[self.different_molecules[index].name] += ammount
return composition
@property
def fgro(self) -> str:
return self.system_gro.fgro
[docs]class SystemGro:
"""
Class to work with the information in gro files.
Basically this class acts like a list of Residue objects. Only one Residue
instance of each type is loaded to afford memory for large systems. The
positions of the rest of the residues are storage and they are generated
when requested.
Parameters
----------
fgro : string or TextIOWrapper
Gromacs file path or open file with the system information.
"""
def __init__(self, fgro: Union[str, TextIOWrapper]):
self.fgro = fgro if isinstance(fgro, str) else fgro.name
self._open_fgro = open_coordinate_file(fgro)
self.different_molecules: List[Residue] = []
self._molecules_pk: Dict[Tuple[str, int], int] = {} # Dictionary with {(resname, len(mol)): index}
self._molecules_ordered: List[int] = []
self._parse_gro()
def __str__(self) -> str:
string = 'Simulation system with:\n\n'
string += '\n'.join(["{:6}: {}".format(k, v)
for k, v in sorted(self.composition.items())])
return string
__repr__ = __str__
def __iter__(self) -> Generator[Residue, None, None]:
for _, start, len_mol in self._molecules_ordered_all_gen():
self._open_fgro.seek_atom(start)
yield Residue([AtomGro(next(self._open_fgro))
for _ in range(len_mol)])
def __del__(self):
self._open_fgro.close()
@overload
def __getitem__(self, index: int) -> Residue:
...
@overload
def __getitem__(self, index: slice) -> List[Residue]:
...
def __getitem__(self, index):
try:
if isinstance(index, slice):
molecules = []
for _, start, len_mol in islice_extended(self._molecules_ordered_all_gen(),
index.start, index.stop,
index.step):
self._open_fgro.seek_atom(start)
molecules.append(Residue([AtomGro(next(self._open_fgro))
for _ in range(len_mol)]))
return molecules
if isinstance(index, int):
if index == -1:
info = last(self._molecules_ordered_all_gen())
else:
info = next(islice_extended(self._molecules_ordered_all_gen(),
index, index+1))
_, start, len_mol = info
self._open_fgro.seek_atom(start)
return Residue([AtomGro(next(self._open_fgro))
for _ in range(len_mol)])
else:
raise TypeError(('SystemGro indices must be integers or slices,'
f' not {type(index)}.'))
except StopIteration:
raise IndexError('Residue index out of range')
def __len__(self) -> int:
return sum(elem[1] for elem in self._pk_ammount_ordered_gen())
def _parse_gro(self):
current_residue = [AtomGro(next(self._open_fgro))]
prev_atom_residname = current_residue[0].residname
for line in self._open_fgro:
atom = AtomGro(line)
if atom.residname == prev_atom_residname:
current_residue.append(atom)
else:
self._add_residue_init(Residue(current_residue))
current_residue = [atom]
prev_atom_residname = current_residue[0].residname
self._add_residue_init(Residue(current_residue))
def _add_residue_init(self, residue: Residue):
key = (residue.resname, len(residue))
if residue not in self.different_molecules:
self.different_molecules.append(residue)
index = len(self.different_molecules) - 1
self._molecules_pk[key] = index
index = self._molecules_pk[key]
if not self._molecules_ordered or self._molecules_ordered[-2] != index:
self._molecules_ordered += [index, 1]
else:
self._molecules_ordered[-1] += 1
def _pk_ammount_ordered_gen(self) -> Generator[Tuple[int, int], None, None]:
for i in range(0, len(self._molecules_ordered), 2):
yield (self._molecules_ordered[i], self._molecules_ordered[i+1])
def _molecules_ordered_all_gen(self) -> Generator[Tuple[int, int, int], None, None]:
start_atom = 0
for index, ammount in self._pk_ammount_ordered_gen():
len_mol = len(self.different_molecules[index])
for _ in range(ammount):
yield (index, start_atom, len_mol)
start_atom += len_mol
@property
def n_atoms(self) -> int:
"""
int : The number of atoms in the system.
"""
return self._open_fgro.natoms
@property
def box_matrix(self) -> np.ndarray:
"""
numpy.ndarray (3,3) : the 3 lattice vectors of the box.
"""
return self._open_fgro.box_matrix
@property
def comment_line(self) -> str:
"""
str: The comment line in the gro file.
"""
return self._open_fgro.comment
@property
def molecules_info_ordered_all(self) -> Generator[int, None, None]:
"""
generator of int: Returns the index of the molecules in the
different_molecules attribute in order of appearance.
"""
for index, ammount in self._pk_ammount_ordered_gen():
for _ in range(ammount):
yield index
@property
def molecules_resname_len_index(self) -> Dict[Tuple[str, int], int]:
"""
dict of tuple (string, int) to int: The keys of the dictionary are
tuples with the residue name and the number of atoms of the
different residues in the system and the values are its index in
the list of different molecules.
"""
return self._molecules_pk
@property
def composition(self) -> Mapping[str, int]:
"""
Counter of str: int : For each resname (key), how many molecules
there are (value).
"""
composition: Mapping[str, int] = Counter()
for index, ammount in self._pk_ammount_ordered_gen():
composition[self.different_molecules[index].resname] += ammount # type: ignore
return composition