# Gaddlemaps python module.
# Copyright (C) 2019-2021 José Manuel Otero Mato, Hadrián Montes Campos, Luis Miguel Varela Cabo
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see <https://www.gnu.org/licenses/>.
'''
This module contains Atom and Molecule objects which connect information from
both gro and itp files.
'''
from collections import defaultdict
from typing import (Any, DefaultDict, Dict, Iterator, List, Optional, Tuple,
Union)
import numpy as np
from scipy.spatial.distance import euclidean
from . import AtomGro, AtomTop, MoleculeTop, Residue
[docs]class Atom:
"""
An atom class that wraps the AtomTop and AtomGro classes.
You can access to the methods and attributes that both AtomGro and
AtomItp have. To create the atom object, both input atoms should have the
same resname and name attributes. On the other hand, only the attributes
from the AtomGro can be changed (e.g. positions, velocities, ...) excluding
the resname and name.
Parameters
----------
atom_top : AtomTop
The AtomTop object.
atom_gro : AtomGro
The AtomGro object.
Raises
------
IOError
If the atom_gro and atom_top do not correspond to the same atom.
TypeError
If the inputs are not instances of the corresponding classes.
"""
def __init__(self, atom_top: AtomTop, atom_gro: AtomGro):
if not isinstance(atom_gro, AtomGro):
raise TypeError('atom_gro input have to be an AtomGro instance.')
if not isinstance(atom_top, AtomTop):
raise TypeError('atom_top input have to be an AtomTop instance.')
if atom_gro.resname != atom_top.resname or atom_gro.name != atom_top.name:
raise IOError((f'Input atoms do not match:\n-{atom_gro}'
f'\n-{atom_top}'))
self._atom_gro = atom_gro
self._atom_top = atom_top
@property
def atom_gro(self) -> AtomGro:
"""
AtomGro : The input AtomGro object
"""
return self._atom_gro
@property
def atom_top(self) -> AtomTop:
"""
AtomTop : The input AtomTop object
"""
return self._atom_top
@property
def top_resid(self) -> int:
"""
Residue number for the part of the atom with the topology (atom_top).
"""
return self._atom_top.resid
@top_resid.setter
def top_resid(self, new_resid: int):
self._atom_top.resid = new_resid
@property
def gro_resid(self) -> int:
"""
Residue number for the gro part of the atom (atom_gro).
"""
return self._atom_gro.resid
@gro_resid.setter
def gro_resid(self, new_resid: int):
self._atom_gro.resid = new_resid
def __getattr__(self, attr: str) -> Any:
if attr == 'resid':
raise AttributeError(('To access resid use gro_resid or'
' top_resid properties.'))
if hasattr(self._atom_gro, attr):
return getattr(self._atom_gro, attr)
if hasattr(self._atom_top, attr):
return getattr(self._atom_top, attr)
raise AttributeError(f'Atom object has no attribute {attr}')
def __setattr__(self, attr: str, value: Any):
if attr in ['_atom_top', '_atom_gro']:
super(Atom, self).__setattr__(attr, value)
# Special cases that are present in both atoms
elif attr == 'resid':
raise AttributeError(('To set resid use gro_resid or'
' top_resid properties.'))
elif attr in ('resname', 'name'):
setattr(self._atom_top, attr, value)
setattr(self._atom_gro, attr, value)
elif attr in super(Atom, self).__dir__():
super(Atom, self).__setattr__(attr, value)
elif hasattr(self._atom_top, attr):
setattr(self._atom_top, attr, value)
elif hasattr(self._atom_gro, attr):
setattr(self._atom_gro, attr, value)
else:
super(Atom, self).__setattr__(attr, value)
def __str__(self) -> str:
string = (f'Atom {self.name} of molecule {self.resname}'
f' with gro residue number {self.gro_resid}.')
return string
__repr__ = __str__
def __hash__(self) -> int:
return hash(self._atom_top)
def __dir__(self) -> List[str]:
"""
For Ipython autocompletion.
"""
_dir = set(super(Atom, self).__dir__())
_dir.update(dir(self._atom_gro))
_dir.update(dir(self._atom_top))
return list(_dir)
[docs] def copy(self) -> 'Atom':
"""
Returns a copy of self.
Only the gro atom is copied. The atom_top remains the same.
Returns
-------
new_atom : Atom
The copied atom.
"""
return Atom(self._atom_top, self._atom_gro.copy())
def __eq__(self, atom: Any) -> bool:
"""
Two atoms are the same if they have the same name, resname, index and top_resid.
"""
if isinstance(atom, Atom):
condition = (self.resname == atom.resname and
self.name == atom.name and
self.index == atom.index and
self.top_resid == atom.top_resid)
return condition
return False
[docs]class Molecule(Residue):
"""
Loads a molecule combining a MoleculeTop and a list of Residue.
This class wraps all the features of both MoleculeTop and Residues which
conform the molecule. When an object is initialized a copy of the input
residues is stored (to avoid undesired attribute changes). This class
inherits from Residue so they have the same methods and properties
(although most of them are reimplemented).
Parameters
----------
molecule_top : MoleculeTop
The object with the bonds information of the molecule.
residues : List of Residue
An list with the residues that constitute the molecule.
Raises
------
TypeError
If the input are instances of wrong type.
IOError
If residues do not constitute the molecule_top.
"""
__excluded__ = ['resname', 'resid', 'residname', 'remove_atom']
def __init__(self, molecule_top: MoleculeTop, residues: List[Residue]):
if not _molecule_top_and_residues_match(molecule_top, residues):
raise IOError(('The molecule can not be initialized. '
'The input residues have not the same atoms that '
'molecule_top has.'))
self._molecule_top = molecule_top
self._residues: List[Residue] = []
# Save the residue number of each atom
self._each_atom_resid: List[int] = []
# Initialize attributes
for res_index, res in enumerate(residues):
self._residues.append(res.copy())
self._each_atom_resid += [res_index] * len(res)
def __getitem__(self, index: int) -> 'Atom': # type: ignore
residue_index = self._each_atom_resid[index]
atom_index = sum(i == residue_index for i in self._each_atom_resid[:index])
return Atom(self._molecule_top[index], self._residues[residue_index][atom_index])
def __iter__(self) -> Iterator['Atom']: # type: ignore
index = 0
for res in self._residues:
for atom in res:
yield Atom(self.molecule_top[index], atom)
index += 1
def __len__(self) -> int:
return len(self._each_atom_resid)
def __str__(self) -> str:
return f'Molecule of {self.name}.'
__repr__ = __str__
def __eq__(self, molecule: Any) -> bool:
if (isinstance(molecule, Molecule) and
molecule.name == self.name and
len(molecule) == len(self)):
for at1, at2 in zip(self, molecule):
if at1 != at2:
return False
return True
return False
def __ne__(self, molecule: Any) -> bool:
return not self == molecule
def __add__(self, other: Union['Residue', 'AtomGro']) -> 'Residue':
raise NotImplementedError
def __radd__(self, other: Union['Residue', 'AtomGro']) -> 'Residue':
raise NotImplementedError
def __getattribute__(self, attr: str) -> Any:
"""
Special methods that are implemented for Residue but not for
Molecule.
"""
if attr in ['resname', 'resid', 'residname', 'remove_atom']:
raise AttributeError(attr)
else:
return super(Molecule, self).__getattribute__(attr)
def __getattr__(self, attr: str) -> Any:
if hasattr(self._molecule_top, attr):
return getattr(self._molecule_top, attr)
raise AttributeError(f'Molecule object has no attribute {attr}')
def __setattr__(self, attr: str, value: Any):
if attr in ['resname', 'resid', 'residname', 'remove_atom']:
raise AttributeError(attr)
if attr in ['_molecule_top', '_residues']:
super(Molecule, self).__setattr__(attr, value)
elif attr in super(Molecule, self).__dir__():
super(Molecule, self).__setattr__(attr, value)
elif hasattr(self._molecule_top, attr):
setattr(self._molecule_top, attr, value)
else:
super(Molecule, self).__setattr__(attr, value)
def __dir__(self) -> List[str]:
"""
For Ipython autocompletion.
"""
_dir = set(super(Molecule, self).__dir__())
_dir.update(dir(self._molecule_top))
return list(_dir)
@property
def molecule_top(self) -> MoleculeTop:
"""
MoleculeTop : The object with the topology information of the molecule.
"""
return self._molecule_top
@property
def residues(self) -> List[Residue]:
"""
List of Residue : a list with the Residue objects that constitute the
molecule.
"""
return self._residues
@property
def atoms(self) -> List['Atom']: # type: ignore
"""
list of Atom: List with the copies of the atoms of the molecule.
"""
return [atom.copy() for atom in self]
@property
def atoms_positions(self) -> np.ndarray:
"""
numpy.ndarray((N, 3)) : An array with the atoms positions.
"""
return np.concatenate([res.atoms_positions for res in self._residues])
@atoms_positions.setter
def atoms_positions(self, new_positions: np.ndarray):
super(Molecule, type(self)).atoms_positions.fset(self, new_positions)
@property
def atoms_velocities(self) -> Optional[np.ndarray]:
"""
numpy.ndarray((N, 3)) or None : An array with the atoms velocities.
If one of the atoms has no velocity this returns None.
"""
vels = []
for res in self._residues:
res_vel = res.atoms_velocities
if res_vel is None:
return None
vels.append(res_vel)
return np.concatenate(vels)
@atoms_velocities.setter
def atoms_velocities(self, new_velocities: Optional[np.ndarray]):
super(Molecule, type(self)).atoms_velocities.fset(self, new_velocities) # type: ignore
@property
def atoms_ids(self) -> List[int]:
"""
list of int: A list with the ids of the atoms in the residues.
"""
return sum((res.atoms_ids for res in self._residues), [])
@atoms_ids.setter
def atoms_ids(self, new_ids: List[int]):
super(Molecule, type(self)).atoms_ids.fset(self, new_ids) # type: ignore
@property
def resnames(self) -> List[str]:
"""
List of string: A list with the names of the residues constituting the
molecule.
To set this property, a list of strings with the same length as the
original must be passed. This will change each residue name. You can
also pass just a string and this will set all the residue names to the
same value.
"""
return [res.resname for res in self._residues]
@resnames.setter
def resnames(self, new_resnames: Union[str, List[str]]):
if isinstance(new_resnames, list) and isinstance(new_resnames[0], str):
if len(new_resnames) != len(self.resnames):
raise ValueError(('You should provide a list with'
f' {len(self.resnames)} residues instead of'
f' {len(new_resnames)}.'))
for atom, res_index in zip(self, self._each_atom_resid):
atom.resname = new_resnames[res_index]
elif isinstance(new_resnames, str):
for atom in self:
atom.resname = new_resnames
else:
raise TypeError('Resnames must be a string or a list of string.')
@property
def resids(self) -> List[int]:
"""
List of int: A list with the residues ids of the residues
constituting the molecule.
To set this property, a list of int with the same length as the
original must be passed. This will change each residue id. You can
also pass just an int and this will set all the residue ids to the
same value.
"""
return [res.resid for res in self._residues]
@resids.setter
def resids(self, new_resids: Union[int, List[int]]):
if isinstance(new_resids, list) and isinstance(new_resids[0], int):
if len(new_resids) != len(self.resids):
raise ValueError(('You should provide a list with'
f' {len(self.resids)} residue ids instead'
f' of {len(new_resids)}.'))
for atom, res_index in zip(self, self._each_atom_resid):
atom.top_resid = new_resids[res_index]
atom.gro_resid = new_resids[res_index]
elif isinstance(new_resids, int):
for atom in self:
atom.top_resid = new_resids
atom.gro_resid = new_resids
else:
raise TypeError('Resids must be an int or a list of int.')
[docs] def copy(self, new_residues: List[Residue] = None) -> 'Molecule':
"""
Returns a copy of the molecule.
If new_molecule_gro is passed, the old residues will be replaced
to update the positions. This is used in the extrapolation step.
NOTE: With this method, the molecule_top used for the Molecule
initialization remains the same. This means that future changes in
copied molecules may affect other parts of you code. If you want a
completely independent new molecule use "deep_copy" method.
Parameters
----------
new_residues
List of residues to replace the original positions.
Returns
-------
molecule : Molecule
The copy of the molecule.
"""
# Maybe this should be optimized
if new_residues is None:
new_residues = self._residues
return Molecule(self._molecule_top, new_residues)
[docs] def deep_copy(self, new_residues: List[Residue] = None) -> 'Molecule':
"""
Returns a deep copy of the molecule.
If new_molecule_gro is passed, the old residues will be replaced
to update the positions. This is used in the extrapolation step. This
method generates a new molecule that is not linked to any attribute of
the original one.
Parameters
----------
new_residues
List of residues to replace the original positions.
Returns
-------
molecule : Molecule
The deep copy of the molecule.
"""
if new_residues is None:
new_residues = self._residues
return Molecule(self._molecule_top.copy(), new_residues)
[docs] def index(self, atom: 'Atom') -> int:
"""
Returns the index of the atom in the molecule.
Parameters
----------
atom : Atom
The atom to find the index.
Returns
-------
index : int
The index of the atom in the molecule.
"""
for index, self_atom in enumerate(self):
if self_atom == atom:
return index
raise ValueError(f'{atom} is not in molecule.')
@property
def bonds_distance(self) -> Dict[int, List[Tuple[int, float]]]:
"""
dict of int to list of tuple(int, float): A complex data structure
that collect the information of the bond distances. The key of the
property corresponds to the atom index in the molecule. The value
is a list with tuples. For each tuple, the first value corresponds
with the index of the bonded atom and the second is the length of
the bond. This property is used in the alignment process in gaddle
maps.
"""
bond_info: DefaultDict[int, List[Tuple[int, float]]] = defaultdict(list)
for index, atom in enumerate(self):
for index_to in atom.bonds:
atom_to = self[index_to]
distance = euclidean(atom.position, atom_to.position)
bond_info[index].append((index_to, distance))
return dict(bond_info)
[docs] @classmethod
def from_files(cls, fgro: str, ftop: str) -> 'Molecule':
"""
Loads the molecule from gro and a compatible topology file.
Parameters
----------
fgro : str
The file name with the gro.
ftop : str
The file name with the top.
Returns
-------
molecule : Molecule
The initialized molecule.
"""
from . import System
syst = System(fgro, ftop)
if len(syst) == 1:
return syst[0]
raise IOError('The input gro file has more than one molecule.')
def _molecule_top_and_residues_match(molecule_top: MoleculeTop,
residues: List[Residue]) -> bool:
if len(molecule_top) != sum(len(res) for res in residues):
return False
index = 0
for res in residues:
for atom in res:
at_top = molecule_top[index]
if atom.resname != at_top.resname or atom.name != at_top.name:
return False
index += 1
return True