Source code for gaddlemaps.components._components

#    Gaddlemaps python module.
#    Copyright (C) 2019-2021 José Manuel Otero Mato, Hadrián Montes Campos, Luis Miguel Varela Cabo
#
#    This program is free software: you can redistribute it and/or modify
#    it under the terms of the GNU Affero General Public License as published by
#    the Free Software Foundation, either version 3 of the License, or
#    (at your option) any later version.
#
#    This program is distributed in the hope that it will be useful,
#    but WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
#    GNU Affero General Public License for more details.
#
#    You should have received a copy of the GNU Affero General Public License
#    along with this program.  If not, see <https://www.gnu.org/licenses/>.
'''
This module contains Atom and Molecule objects which connect information from
both gro and itp files.
'''

from collections import defaultdict
from typing import (Any, DefaultDict, Dict, Iterator, List, Optional, Tuple,
                    Union)

import numpy as np
from scipy.spatial.distance import euclidean

from . import AtomGro, AtomTop, MoleculeTop, Residue


[docs]class Atom:
    """
    An atom class that wraps the AtomTop and AtomGro classes.

    You can access to the methods and attributes that both AtomGro and
    AtomItp have. To create the atom object, both input atoms should have the
    same resname and name attributes. On the other hand, only the attributes
    from the AtomGro can be changed (e.g. positions, velocities, ...) excluding
    the resname and name.

    Parameters
    ----------
    atom_top : AtomTop
        The AtomTop object.
    atom_gro : AtomGro
        The AtomGro object.

    Raises
    ------
    IOError
        If the atom_gro and atom_top do not correspond to the same atom.
    TypeError
        If the inputs are not instances of the corresponding classes.

    """

    def __init__(self, atom_top: AtomTop, atom_gro: AtomGro):
        if not isinstance(atom_gro, AtomGro):
            raise TypeError('atom_gro input have to be an AtomGro instance.')
        if not isinstance(atom_top, AtomTop):
            raise TypeError('atom_top input have to be an AtomTop instance.')
        if atom_gro.resname != atom_top.resname or atom_gro.name != atom_top.name:
            raise IOError((f'Input atoms do not match:\n-{atom_gro}'
                           f'\n-{atom_top}'))
        self._atom_gro = atom_gro
        self._atom_top = atom_top

    @property
    def atom_gro(self) -> AtomGro:
        """
        AtomGro : The input AtomGro object
        """
        return self._atom_gro

    @property
    def atom_top(self) -> AtomTop:
        """
        AtomTop : The input AtomTop object
        """
        return self._atom_top

    @property
    def top_resid(self) -> int:
        """
        Residue number for the part of the atom with the topology (atom_top).
        """
        return self._atom_top.resid

    @top_resid.setter
    def top_resid(self, new_resid: int):
        self._atom_top.resid = new_resid

    @property
    def gro_resid(self) -> int:
        """
        Residue number for the gro part of the atom (atom_gro).
        """
        return self._atom_gro.resid

    @gro_resid.setter
    def gro_resid(self, new_resid: int):
        self._atom_gro.resid = new_resid

    def __getattr__(self, attr: str) -> Any:
        if attr == 'resid':
            raise AttributeError(('To access resid use gro_resid or'
                                  ' top_resid properties.'))
        if hasattr(self._atom_gro, attr):
            return getattr(self._atom_gro, attr)
        if hasattr(self._atom_top, attr):
            return getattr(self._atom_top, attr)
        raise AttributeError(f'Atom object has no attribute {attr}')

    def __setattr__(self, attr: str, value: Any):
        if attr in ['_atom_top', '_atom_gro']:
            super(Atom, self).__setattr__(attr, value)
        # Special cases that are present in both atoms
        elif attr == 'resid':
            raise AttributeError(('To set resid use gro_resid or'
                                  ' top_resid properties.'))
        elif attr in ('resname', 'name'):
            setattr(self._atom_top, attr, value)
            setattr(self._atom_gro, attr, value)
        elif attr in super(Atom, self).__dir__():
            super(Atom, self).__setattr__(attr, value)
        elif hasattr(self._atom_top, attr):
            setattr(self._atom_top, attr, value)
        elif hasattr(self._atom_gro, attr):
            setattr(self._atom_gro, attr, value)
        else:
            super(Atom, self).__setattr__(attr, value)

    def __str__(self) -> str:
        string = (f'Atom {self.name} of molecule {self.resname}'
                  f' with gro residue number {self.gro_resid}.')
        return string

    __repr__ = __str__

    def __hash__(self) -> int:
        return hash(self._atom_top)

    def __dir__(self) -> List[str]:
        """
        For Ipython autocompletion.
        """
        _dir = set(super(Atom, self).__dir__())
        _dir.update(dir(self._atom_gro))
        _dir.update(dir(self._atom_top))
        return list(_dir)

[docs]    def copy(self) -> 'Atom':
        """
        Returns a copy of self.

        Only the gro atom is copied. The atom_top remains the same.

        Returns
        -------
        new_atom : Atom
            The copied atom.

        """
        return Atom(self._atom_top, self._atom_gro.copy())

    def __eq__(self, atom: Any) -> bool:
        """
        Two atoms are the same if they have the same name, resname, index and top_resid.
        """

        if isinstance(atom, Atom):
            condition = (self.resname == atom.resname and
                         self.name == atom.name and
                         self.index == atom.index and
                         self.top_resid == atom.top_resid)
            return condition
        return False


[docs]class Molecule(Residue):
    """
    Loads a molecule combining a MoleculeTop and a list of Residue.

    This class wraps all the features of both MoleculeTop and Residues which
    conform the molecule. When an object is initialized a copy of the input
    residues is stored (to avoid undesired attribute changes). This class
    inherits from Residue so they have the same methods and properties
    (although most of them are reimplemented).

    Parameters
    ----------
    molecule_top : MoleculeTop
        The object with the bonds information of the molecule.
    residues : List of Residue
        An list with the residues that constitute the molecule.

    Raises
    ------
    TypeError
        If the input are instances of wrong type.
    IOError
        If residues do not constitute the molecule_top.

    """
    __excluded__ = ['resname', 'resid', 'residname', 'remove_atom']

    def __init__(self, molecule_top: MoleculeTop, residues: List[Residue]):
        if not _molecule_top_and_residues_match(molecule_top, residues):
            raise IOError(('The molecule can not be initialized. '
                           'The input residues have not the same atoms that '
                           'molecule_top has.'))

        self._molecule_top = molecule_top
        self._residues: List[Residue] = []
        # Save the residue number of each atom
        self._each_atom_resid: List[int] = []

        # Initialize attributes
        for res_index, res in enumerate(residues):
            self._residues.append(res.copy())
            self._each_atom_resid += [res_index] * len(res)

    def __getitem__(self, index: int) -> 'Atom':  # type: ignore
        residue_index = self._each_atom_resid[index]
        atom_index = sum(i == residue_index for i in self._each_atom_resid[:index])
        return Atom(self._molecule_top[index], self._residues[residue_index][atom_index])

    def __iter__(self) -> Iterator['Atom']:  # type: ignore
        index = 0
        for res in self._residues:
            for atom in res:
                yield Atom(self.molecule_top[index], atom)
                index += 1

    def __len__(self) -> int:
        return len(self._each_atom_resid)

    def __str__(self) -> str:
        return f'Molecule of {self.name}.'

    __repr__ = __str__

    def __eq__(self, molecule: Any) -> bool:
        if (isinstance(molecule, Molecule) and
            molecule.name == self.name and
            len(molecule) == len(self)):

            for at1, at2 in zip(self, molecule):
                if at1 != at2:
                    return False
            return True
        return False

    def __ne__(self, molecule: Any) -> bool:
        return not self == molecule

    def __add__(self, other: Union['Residue', 'AtomGro']) -> 'Residue':
        raise NotImplementedError

    def __radd__(self, other: Union['Residue', 'AtomGro']) -> 'Residue':
        raise NotImplementedError

    def __getattribute__(self, attr: str) -> Any:
        """
        Special methods that are implemented for Residue but not for
        Molecule.
        """
        if attr in ['resname', 'resid', 'residname', 'remove_atom']:
            raise AttributeError(attr)
        else:
            return super(Molecule, self).__getattribute__(attr)

    def __getattr__(self, attr: str) -> Any:
        if hasattr(self._molecule_top, attr):
            return getattr(self._molecule_top, attr)
        raise AttributeError(f'Molecule object has no attribute {attr}')

    def __setattr__(self, attr: str, value: Any):
        if attr in ['resname', 'resid', 'residname', 'remove_atom']:
            raise AttributeError(attr)
        if attr in ['_molecule_top', '_residues']:
            super(Molecule, self).__setattr__(attr, value)
        elif attr in super(Molecule, self).__dir__():
            super(Molecule, self).__setattr__(attr, value)
        elif hasattr(self._molecule_top, attr):
            setattr(self._molecule_top, attr, value)
        else:
            super(Molecule, self).__setattr__(attr, value)

    def __dir__(self) -> List[str]:
        """
        For Ipython autocompletion.
        """
        _dir = set(super(Molecule, self).__dir__())
        _dir.update(dir(self._molecule_top))
        return list(_dir)

    @property
    def molecule_top(self) -> MoleculeTop:
        """
        MoleculeTop : The object with the topology information of the molecule.
        """
        return self._molecule_top

    @property
    def residues(self) -> List[Residue]:
        """
        List of Residue : a list with the Residue objects that constitute the
        molecule.
        """
        return self._residues

    @property
    def atoms(self) -> List['Atom']:  # type: ignore
        """
        list of Atom: List with the copies of the atoms of the molecule.
        """
        return [atom.copy() for atom in self]

    @property
    def atoms_positions(self) -> np.ndarray:
        """
        numpy.ndarray((N, 3)) : An array with the atoms positions.
        """
        return np.concatenate([res.atoms_positions for res in self._residues])

    @atoms_positions.setter
    def atoms_positions(self, new_positions: np.ndarray):
        super(Molecule, type(self)).atoms_positions.fset(self, new_positions)

    @property
    def atoms_velocities(self) -> Optional[np.ndarray]:
        """
        numpy.ndarray((N, 3)) or None : An array with the atoms velocities.
            If one of the atoms has no velocity this returns None.
        """
        vels = []
        for res in self._residues:
            res_vel = res.atoms_velocities
            if res_vel is None:
                return None
            vels.append(res_vel)
        return np.concatenate(vels)

    @atoms_velocities.setter
    def atoms_velocities(self, new_velocities: Optional[np.ndarray]):
        super(Molecule, type(self)).atoms_velocities.fset(self, new_velocities) # type: ignore

    @property
    def atoms_ids(self) -> List[int]:
        """
        list of int: A list with the ids of the atoms in the residues.
        """
        return sum((res.atoms_ids for res in self._residues), [])

    @atoms_ids.setter
    def atoms_ids(self, new_ids: List[int]):
        super(Molecule, type(self)).atoms_ids.fset(self, new_ids) # type: ignore

    @property
    def resnames(self) -> List[str]:
        """
        List of string: A list with the names of the residues constituting the
            molecule.

        To set this property, a list of strings with the same length as the
        original must be passed. This will change each residue name. You can
        also pass just a string and this will set all the residue names to the
        same value.
        """
        return [res.resname for res in self._residues]

    @resnames.setter
    def resnames(self, new_resnames: Union[str, List[str]]):
        if isinstance(new_resnames, list) and isinstance(new_resnames[0], str):
            if len(new_resnames) != len(self.resnames):
                raise ValueError(('You should provide a list with'
                                  f' {len(self.resnames)} residues instead of'
                                  f' {len(new_resnames)}.'))

            for atom, res_index in zip(self, self._each_atom_resid):
                atom.resname = new_resnames[res_index]
        elif isinstance(new_resnames, str):
            for atom in self:
                atom.resname = new_resnames
        else:
            raise TypeError('Resnames must be a string or a list of string.')

    @property
    def resids(self) -> List[int]:
        """
        List of int: A list with the residues ids of the residues
            constituting the molecule.

        To set this property, a list of int with the same length as the
        original must be passed. This will change each residue id. You can
        also pass just an int and this will set all the residue ids to the
        same value.
        """
        return [res.resid for res in self._residues]

    @resids.setter
    def resids(self, new_resids: Union[int, List[int]]):
        if isinstance(new_resids, list) and isinstance(new_resids[0], int):
            if len(new_resids) != len(self.resids):
                raise ValueError(('You should provide a list with'
                                  f' {len(self.resids)} residue ids instead'
                                  f' of {len(new_resids)}.'))

            for atom, res_index in zip(self, self._each_atom_resid):
                atom.top_resid = new_resids[res_index]
                atom.gro_resid = new_resids[res_index]
        elif isinstance(new_resids, int):
            for atom in self:
                atom.top_resid = new_resids
                atom.gro_resid = new_resids
        else:
            raise TypeError('Resids must be an int or a list of int.')

[docs]    def copy(self, new_residues: List[Residue] = None) -> 'Molecule':
        """
        Returns a copy of the molecule.

        If new_molecule_gro is passed, the old residues will be replaced
        to update the positions. This is used in the extrapolation step.

        NOTE: With this method, the molecule_top used for the Molecule
        initialization remains the same. This means that future changes in
        copied molecules may affect other parts of you code. If you want a
        completely independent new molecule use "deep_copy" method.

        Parameters
        ----------
        new_residues
            List of residues to replace the original positions.

        Returns
        -------
        molecule : Molecule
            The copy of the molecule.

        """
        # Maybe this should be optimized
        if new_residues is None:
            new_residues = self._residues
        return Molecule(self._molecule_top, new_residues)

[docs]    def deep_copy(self, new_residues: List[Residue] = None) -> 'Molecule':
        """
        Returns a deep copy of the molecule.

        If new_molecule_gro is passed, the old residues will be replaced
        to update the positions. This is used in the extrapolation step. This
        method generates a new molecule that is not linked to any attribute of
        the original one.

        Parameters
        ----------
        new_residues
            List of residues to replace the original positions.

        Returns
        -------
        molecule : Molecule
            The deep copy of the molecule.

        """
        if new_residues is None:
            new_residues = self._residues
        return Molecule(self._molecule_top.copy(), new_residues)

[docs]    def index(self, atom: 'Atom') -> int:
        """
        Returns the index of the atom in the molecule.

        Parameters
        ----------
        atom : Atom
            The atom to find the index.

        Returns
        -------
        index : int
            The index of the atom in the molecule.

        """
        for index, self_atom in enumerate(self):
            if self_atom == atom:
                return index
        raise ValueError(f'{atom} is not in molecule.')

    @property
    def bonds_distance(self) -> Dict[int, List[Tuple[int, float]]]:
        """
        dict of int to list of tuple(int, float): A complex data structure
            that collect the information of the bond distances. The key of the
            property corresponds to the atom index in the molecule. The value
            is a list with tuples. For each tuple, the first value corresponds
            with the index of the bonded atom and the second is the length of
            the bond. This property is used in the alignment process in gaddle
            maps.
        """
        bond_info: DefaultDict[int, List[Tuple[int, float]]] = defaultdict(list)
        for index, atom in enumerate(self):
            for index_to in atom.bonds:
                atom_to = self[index_to]
                distance = euclidean(atom.position, atom_to.position)
                bond_info[index].append((index_to, distance))
        return dict(bond_info)

[docs]    @classmethod
    def from_files(cls, fgro: str, ftop: str) -> 'Molecule':
        """
        Loads the molecule from gro and a compatible topology file.

        Parameters
        ----------
        fgro : str
            The file name with the gro.
        ftop : str
            The file name with the top.

        Returns
        -------
        molecule : Molecule
            The initialized molecule.

        """
        from . import System
        syst = System(fgro, ftop)
        if len(syst) == 1:
            return syst[0]
        raise IOError('The input gro file has more than one molecule.')


def _molecule_top_and_residues_match(molecule_top: MoleculeTop,
                                     residues: List[Residue]) -> bool:
    if len(molecule_top) != sum(len(res) for res in residues):
        return False
    index = 0
    for res in residues:
        for atom in res:
            at_top = molecule_top[index]
            if atom.resname != at_top.resname or atom.name != at_top.name:
                return False
            index += 1
    return True