Source code for moldrug.fitness

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
import os
from copy import deepcopy
from typing import Dict, List, Union

import numpy as np
# from warnings import import warn
from meeko import (MoleculePreparation, PDBQTMolecule, PDBQTWriterLegacy,
                   RDKitMolCreate)
from rdkit import Chem
from rdkit.Chem import QED, Descriptors

from moldrug import constraintconf, utils, verbose


def __get_default_desirability(multireceptor: bool = False) -> dict:
    """Storage default desirability functions values to work with the
    cost functions defined in :mod:`moldrug.fitness`

    Parameters
    ----------
    multireceptor : bool, optional
        True:  :meth:`CostMultiReceptors` and :meth:`CostMultiReceptorsOnlyVina`
        False: :meth:`Cost`, by default False

    Returns
    -------
    dict
        The default desirability
    """

    desirability = {
        'qed': {
            'w': 1,
            'LargerTheBest': {
                'LowerLimit': 0.1,
                'Target': 0.75,
                'r': 1
            }
        },
        'sa_score': {
            'w': 1,
            'SmallerTheBest': {
                'Target': 3,
                'UpperLimit': 7,
                'r': 1
            }
        },
    }
    if multireceptor:
        # Usewd for multireceptor cost functions
        desirability['vina_scores'] = {
            'ensemble': {
                'w': 1,
                'SmallerTheBest': {
                    'Target': -12,
                    'UpperLimit': -6,
                    'r': 1
                }
            },
            'min': {
                'w': 1,
                'SmallerTheBest': {
                    'Target': -12,
                    'UpperLimit': -6,
                    'r': 1
                }
            },
            'max': {
                'w': 1,
                'LargerTheBest': {
                    'LowerLimit': -4,
                    'Target': 0,
                    'r': 1
                }
            }
        }
    else:
        desirability['vina_score'] = {
            'w': 1,
            'SmallerTheBest': {
                'Target': -12,
                'UpperLimit': -6,
                'r': 1
            }
        }
    return desirability


def __get_mol_cost(mol: Chem.rdchem.Mol,
                   wd: str = '.vina_jobs',
                   vina_executable: str = 'vina',
                   receptor_pdbqt_path: str = None,
                   boxcenter: List[float] = None,
                   boxsize: List[float] = None,
                   docking_type: str = 'score_only',
                   desirability: Dict = None):
    """
    This function is for developing. It calculate the cost of a single molecule.
    I use it for testing.

    Parameters
    ----------
    mol : Chem.rdchem.Mol
        The molecule to calculate the cost
    wd : str, optional
        The working directory to execute the docking jobs, by default '.vina_jobs'
    vina_executable : str, optional
         This is the name of the vina executable, could be a path to the binary object (absolute path is recommended),
         which must have  execution permits (chmod a+x <your binary file>), by default 'vina'
    receptor_pdbqt_path : str, optional
        Where the receptor pdbqt file is located, by default None
    boxcenter : List[float], optional
        A list of three floats with the definition of the center of the box
        in angstrom for docking (x, y, z), by default None
    boxsize : List[float], optional
        A list of three floats with the definition of the box size
        in angstrom of the docking box (x, y, z), by default None
    docking_type : str, optional
        any valid string: score_only, local_only, free, by default True
    desirability : Dict, optional
        Desirability definition to update the internal default values. The update use :meth:`moldrug.utils.deep_update`
        Each variable only will accept
        the keys [w, and the name of the desirability function of :meth:`moldrug.utils.DerringerSuichDesirability`],
        by default None which means that it will use:

        .. ipython:: python

            from moldrug.fitness import __get_default_desirability
            import json
            json.dumps(__get_default_desirability(multireceptor=False), indent = 4)
    Returns
    -------
    dict
        A dict with the keywords:qed, vina_score, sa_score and cost.

    Raises
    ------
    RuntimeError
        Inconsistences on user definition of the desirability function parameters.
    """

    if desirability is None:
        desirability = __get_default_desirability(multireceptor=False)
    else:
        desirability = utils.deep_update(
            target_dict=__get_default_desirability(multireceptor=False),
            update_dict=desirability
        )

    if not os.path.exists(wd):
        os.makedirs(wd)
    # Initializing result dict
    results = dict()
    # Importing sascorer
    sascorer = utils.import_sascorer()
    # multicriteria optimization,Optimization of Several Response Variables

    # Getting estimate of drug-likness
    results['qed'] = QED.weights_mean(Chem.RemoveHs(mol))

    # Getting synthetic accessibility score
    results['sa_score'] = sascorer.calculateScore(Chem.RemoveHs(mol))

    # Getting vina_score and update pdbqt
    # Making the ligand pdbqt
    preparator = MoleculePreparation()
    mol_setups = preparator.prepare(Chem.AddHs(mol, addCoords=True))
    with open(os.path.join(os.path.join(wd, 'ligand.pdbqt')), 'w') as f:
        f.write(PDBQTWriterLegacy.write_string(mol_setups[0])[0])

    # If the vina_executable is a path
    if os.path.isfile(vina_executable):
        vina_executable = os.path.abspath(vina_executable)

    # Creating the command line for vina
    cmd_vina_str = f"{vina_executable} --receptor {receptor_pdbqt_path}"\
        f" --center_x {boxcenter[0]} --center_y {boxcenter[1]} --center_z {boxcenter[2]}"\
        f" --size_x {boxsize[0]} --size_y {boxsize[1]} --size_z {boxsize[2]}"\
        f" --ligand {os.path.join(wd, 'ligand.pdbqt')}"
    if docking_type == 'score_only':
        cmd_vina_str += " --score_only"
    elif docking_type == 'local_only':
        cmd_vina_str += f" --local_only --out {os.path.join(wd, 'ligand_out.pdbqt')}"
    elif docking_type == 'free':
        cmd_vina_str += f" --out {os.path.join(wd, 'ligand_out.pdbqt')}"
    else:
        raise ValueError(f"docking_type must be one of: score_only, local_only or free. {docking_type} was given.")
    cmd_vina_result = utils.run(cmd_vina_str)
    if docking_type in ['score_only', 'local_only']:
        for line in cmd_vina_result.stdout.split('\n'):
            # Check over different vina versions
            if line.startswith('Affinity'):
                results['vina_score'] = float(line.split()[1])
                break
            elif 'Estimated Free Energy of Binding' in line:
                results['vina_score'] = float(line.split(':')[1].split()[0])
                break
    else:
        best_energy = utils.VINA_OUT(os.path.join(wd, 'ligand_out.pdbqt')).BestEnergy()
        results['vina_score'] = best_energy.freeEnergy
        pdbqt_mol = PDBQTMolecule.from_file(os.path.join(wd, 'ligand_out.pdbqt'), skip_typing=True)
        with Chem.SDWriter(os.path.join(wd, 'ligand_out.sdf')) as w:
            w.write(RDKitMolCreate.from_pdbqt_mol(pdbqt_mol)[0])
    # Getting the desirability
    base = 1
    exponent = 0
    for variable in desirability:
        for key in desirability[variable]:
            if key == 'w':
                w = desirability[variable][key]
            elif key in utils.DerringerSuichDesirability():
                d = utils.DerringerSuichDesirability()[key](results[variable], **desirability[variable][key])
            else:
                raise RuntimeError(f"Inside the desirability dictionary you provided for the variable = {variable}"
                                   f"a non implemented key = {key}. Only are possible: 'w' (standing for weight)"
                                   "and any possible Derringer-Suich desirability function: "
                                   f"{utils.DerringerSuichDesirability().keys()}")
        base *= d**w
        exponent += w

    # We are using a geometric mean. And because we are minimizing we have to return
    results['desirability'] = 1 - base**(1 / exponent)
    return results


def _vinadock(
        Individual: utils.Individual,
        wd: str = '.vina_jobs',
        vina_executable: str = 'vina',
        vina_seed: Union[int, None] = None,
        receptor_pdbqt_path: str = None,
        boxcenter: List[float] = None,
        boxsize: List[float] = None,
        exhaustiveness: int = 8,
        ad4map: str = None,
        ncores: int = 1,
        num_modes: int = 1,
        constraint: bool = False,
        constraint_type: str = 'score_only',  # score_only, local_only
        constraint_ref: Chem.rdchem.Mol = None,
        constraint_receptor_pdb_path: str = None,
        constraint_num_conf: int = 100,
        constraint_minimum_conf_rms: int = 0.01):
    """
    This function is intend to be used to perform docking
    for all the cost functions implemented on :mod:`moldrug.fitness`
    If ad4map (available from moldrug-3.0.0) is set,
    the last version of vina (`releases <https://github.com/ccsb-scripps/AutoDock-Vina/releases>`_)
    must be installed. To see how to use AutoDock4 force fields in the new version of vina, follow
    `this tutorial <https://autodock-vina.readthedocs.io/en/latest/docking_zinc.html>_`

    Parameters
    ----------
    Individual : utils.Individual
        An Individual with the pdbqt attribute (only needed for non constraint docking).
    wd : str, optional
        The working directory to execute the docking jobs, by default '.vina_jobs'
    vina_executable : str, optional
        This is the name of the vina executable, could be a path to the binary object (absolute path is recommended),
        which must have  execution permits (chmod a+x <your binary file>), by default 'vina'
    vina_seed : Union[int, None], optional
        Explicit random seed used by vina or the constraint docking routine, by default None
    receptor_pdbqt_path : str, optional
        Where the receptor pdbqt file is located, by default None
    boxcenter : List[float], optional
        A list of three floats with the definition of the center of the box
        in angstrom for docking (x, y, z), by default None
    boxsize : List[float], optional
        A list of three floats with the definition of the box size
        in angstrom of the docking box (x, y, z), by default None
    exhaustiveness : int, optional
        Parameter of vina that controls the accuracy of the docking searching, by default 8
    ad4map : str, optional
        Affinity maps for the autodock4.2 (ad4) or vina scoring function, by default None
    ncores : int, optional
        Number of cpus to use in Vina, by default 1
    num_modes : int, optional
        How many modes should Vina export, by default 1
    constraint : bool, optional
        Controls if constraint docking will be perform, by default False
    constraint_type : str, optional
        This is the type of constraint docking. Could be local_only
        (vina will perform local optimization and score the resulted pose)
        or score_only (in this case the provided pose by the internal conformer
        generator will only be scored), by default 'score_only'
    constraint_ref : Chem.rdchem.Mol, optional
        The part of the molecule that we would like to constraint, by default None
    constraint_receptor_pdb_path : str, optional
        The same as constraint_receptor_pdbqt_path but in pdb format, by default None
    constraint_num_conf : int, optional
        Maximum number of conformer to be generated internally by moldrug , by default 100
    constraint_minimum_conf_rms : int, optional
        RMS to filter duplicate conformers, by default 0.01

    Returns
    -------
    tuple
        A tuple with two elements:
        (vina score, pdbqt string)

    Raises
    ------
    Exception
        Inappropriate constraint_type. must be local_only or score_only.
        Only will be checked if constraint is set to True.
    """

    constraint_type = constraint_type.lower()

    # Creating the error directory if needed
    if not os.path.isdir('error'):
        os.makedirs('error')
    # Creating the working directory if needed
    if not os.path.exists(wd):
        os.makedirs(wd)

    # Converting to absolute path in case that vina_executable points to a file
    if os.path.isfile(vina_executable):
        vina_executable = os.path.abspath(vina_executable)
    # TODO Add Check on the vina executable
    # Creating the command line for vina
    cmd_vina_str = f"{vina_executable}"\
        f" --cpu {ncores} --exhaustiveness {exhaustiveness} --num_modes {num_modes}"

    if ad4map:
        cmd_vina_str += f" --scoring ad4 --maps {os.path.abspath(ad4map)}"
    else:
        cmd_vina_str += f" --receptor {os.path.abspath(receptor_pdbqt_path)}"\
            f" --center_x {boxcenter[0]} --center_y {boxcenter[1]} --center_z {boxcenter[2]}"\
            f" --size_x {boxsize[0]} --size_y {boxsize[1]} --size_z {boxsize[2]}"\

    if vina_seed is not None:
        cmd_vina_str += f" --seed {vina_seed}"

    if constraint:
        # Check for the correct type of docking
        if constraint_type in ['score_only', 'local_only']:
            cmd_vina_str += f" --{constraint_type}"
        else:
            raise Exception("constraint_type only admit two possible values: score_only, local_only.")

        # Generate constrained conformer
        try:
            out_mol = constraintconf.generate_conformers(
                mol=Chem.RemoveHs(Individual.mol),
                ref_mol=Chem.RemoveHs(constraint_ref),
                num_conf=constraint_num_conf,
                # ref_smi=Chem.MolToSmiles(constraint_ref),
                minimum_conf_rms=constraint_minimum_conf_rms,
                randomseed=vina_seed)
        except Exception as e:
            if verbose:
                print(f"constraintconf.generate_conformers fails inside moldrug.fitness._vinadock with {e}")
            vina_score_pdbqt = (np.inf, "NonValidConformer")
            return vina_score_pdbqt
        # Remove conformers that clash with the protein in case of score_only,
        # for local_only vina will handle the clash.
        if constraint_type == 'score_only':
            clash_filter = constraintconf.ProteinLigandClashFilter(protein_pdbpath=constraint_receptor_pdb_path,
                                                                   distance=1.5)  # TODO is this a good threshold? 
            clashIds = [conf.GetId() for conf in out_mol.GetConformers() if clash_filter(conf)]
            _ = [out_mol.RemoveConformer(clashId) for clashId in clashIds]

        # Check first if some valid conformer exist
        if len(out_mol.GetConformers()):
            vina_score_pdbqt = (np.inf, None)
            for conf in out_mol.GetConformers():
                temp_mol = deepcopy(out_mol)
                temp_mol.RemoveAllConformers()
                temp_mol.AddConformer(out_mol.GetConformer(conf.GetId()), assignId=True)

                preparator = MoleculePreparation()
                mol_setups = preparator.prepare(Chem.AddHs(temp_mol, addCoords=True))
                with open(os.path.join(wd, f'{Individual.idx}_conf_{conf.GetId()}.pdbqt'), 'w') as f:
                    f.write(PDBQTWriterLegacy.write_string(mol_setups[0])[0])

                # Make a copy to the vina command string and add the out (is needed) and ligand options
                cmd_vina_str_tmp = cmd_vina_str[:]
                cmd_vina_str_tmp += f" --ligand {os.path.join(wd, f'{Individual.idx}_conf_{conf.GetId()}.pdbqt')}"

                if constraint_type == 'local_only':
                    cmd_vina_str_tmp += f" --out {os.path.join(wd, f'{Individual.idx}_conf_{conf.GetId()}_out.pdbqt')}"
                try:
                    cmd_vina_result = utils.run(cmd_vina_str_tmp)
                except Exception as e:
                    if os.path.isfile(receptor_pdbqt_path):
                        with open(receptor_pdbqt_path, 'r') as f:
                            receptor_str = f.read()
                    else:
                        receptor_str = None

                    error = {
                        'Exception': e,
                        'Individual': Individual,
                        f'used_mol_conf_{conf.GetId()}': temp_mol,
                        f'used_ligand_pdbqt_conf_{conf.GetId()}': PDBQTWriterLegacy.write_string(mol_setups[0])[0],
                        'receptor_str': receptor_str,
                        'boxcenter': boxcenter,
                        'boxsize': boxsize,
                    }
                    utils.compressed_pickle(f'error/idx_{Individual.idx}_conf_{conf.GetId()}_error', error)
                    vina_score_pdbqt = (np.inf, PDBQTWriterLegacy.write_string(mol_setups[0])[0])
                    return vina_score_pdbqt

                vina_score = np.inf
                for line in cmd_vina_result.stdout.split('\n'):
                    # Check over different vina versions
                    if line.startswith('Affinity'):
                        vina_score = float(line.split()[1])
                        break
                    elif 'Estimated Free Energy of Binding' in line:
                        vina_score = float(line.split(':')[1].split()[0])
                        break
                if vina_score < vina_score_pdbqt[0]:
                    if constraint_type == 'local_only':
                        if os.path.isfile(os.path.join(wd, f'{Individual.idx}_conf_{conf.GetId()}_out.pdbqt')):
                            with open(os.path.join(wd, f'{Individual.idx}_conf_{conf.GetId()}_out.pdbqt'), 'r') as f:
                                pdbqt = f.read()
                        else:
                            pdbqt = "NonExistedFileToRead"
                        vina_score_pdbqt = (vina_score, pdbqt)
                    else:
                        vina_score_pdbqt = (vina_score, PDBQTWriterLegacy.write_string(mol_setups[0])[0])
        else:
            vina_score_pdbqt = (np.inf, "NonGenConformer")
    # "Normal" docking
    else:
        cmd_vina_str += f" --ligand {os.path.join(wd, f'{Individual.idx}.pdbqt')} "\
            f"--out {os.path.join(wd, f'{Individual.idx}_out.pdbqt')}"
        with open(os.path.join(wd, f'{Individual.idx}.pdbqt'), 'w') as lig_pdbqt:
            lig_pdbqt.write(Individual.pdbqt)
        try:
            utils.run(cmd_vina_str)
        except Exception as e:
            receptor_str = None
            if receptor_pdbqt_path:
                if os.path.isfile(receptor_pdbqt_path):
                    with open(receptor_pdbqt_path, 'r') as f:
                        receptor_str = f.read()

            error = {
                'Exception': e,
                'Individual': Individual,
                'receptor_str': receptor_str,
                'boxcenter': boxcenter,
                'boxsize': boxsize,
            }
            utils.compressed_pickle(f'error/{Individual.idx}_error', error)
            # warn(f"\nVina failed! Check: {Individual.idx}_error.pbz2 file in error.\n")
            if verbose:
                for key in error:
                    print(f"{key}: {error[key]}")
            vina_score_pdbqt = (np.inf, 'VinaFailed')
            return vina_score_pdbqt

        # Getting the information
        best_energy = utils.VINA_OUT(os.path.join(wd, f'{Individual.idx}_out.pdbqt')).BestEnergy()
        vina_score_pdbqt = (best_energy.freeEnergy, ''.join(best_energy.chunk))

    return vina_score_pdbqt


[docs] def Cost( Individual: utils.Individual, wd: str = '.vina_jobs', vina_executable: str = 'vina', vina_seed: Union[int, None] = None, receptor_pdbqt_path: str = None, boxcenter: List[float] = None, boxsize: List[float] = None, exhaustiveness: int = 8, ad4map: str = None, ncores: int = 1, num_modes: int = 1, constraint: bool = False, constraint_type: str = 'score_only', # score_only, local_only constraint_ref: Chem.rdchem.Mol = None, constraint_receptor_pdb_path: str = None, constraint_num_conf: int = 100, constraint_minimum_conf_rms: int = 0.01, desirability: Dict = None): """ This is the main Cost function of the module. It use the concept of desirability functions. The response variables are: #. `Vina score. <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3041641/>`_ #. `Quantitative Estimation of Drug-likeness (QED). <https://www.nature.com/articles/nchem.1243>`_ #. `Synthetic accessibility score. <https://jcheminf.biomedcentral.com/articles/10.1186/1758-2946-1-8)>`_ If ad4map is set, the last version of vina (`releases <https://github.com/ccsb-scripps/AutoDock-Vina/releases>`_) must be installed. To see how to use AutoDock4 force fields in the new version of vina, follow `this tutorial <https://autodock-vina.readthedocs.io/en/latest/docking_zinc.html>_` Parameters ---------- Individual : utils.Individual A Individual with the pdbqt attribute wd : str, optional The working directory to execute the docking jobs, by default '.vina_jobs' vina_executable : str, optional This is the name of the vina executable, could be a path to the binary object (absolute path is recommended), which must have execution permits (chmod a+x <your binary file>), by default 'vina' vina_seed : Union[int, None], optional Explicit random seed used by vina, by default None receptor_pdbqt_path : str, optional Where the receptor pdbqt file is located, by default None boxcenter : list[float], optional A list of three floats with the definition of the center of the box in angstrom for docking (x, y, z), by default None boxsize : list[float], optional A list of three floats with the definition of the box size in angstrom of the docking box (x, y, z), by default None exhaustiveness : int, optional Parameter of vina that controls the accuracy of the docking searching, by default 8 ad4map : str, optional Affinity maps for the autodock4.2 (ad4) or vina scoring function, by default None ncores : int, optional Number of cpus to use in Vina, by default 1 num_modes : int, optional How many modes should Vina export, by default 1 constraint : bool, optional Controls if constraint docking will be perform, by default False constraint_type : str, optional This is the type of constraint docking. Could be local_only (vina will perform local optimization and score the resulted pose) or score_only (in this case the provided pose by the internal conformer generator will only be scored), by default 'score_only' constraint_ref : Chem.rdchem.Mol, optional The part of the molecule that we would like to constraint, by default None constraint_receptor_pdb_path : str, optional The same as constraint_receptor_pdbqt_path but in pdb format, by default None constraint_num_conf : int, optional Maximum number of conformer to be generated internally by moldrug , by default 100 constraint_minimum_conf_rms : int, optional RMS to filter duplicate conformers, by default 0.01 desirability : dict, optional Desirability definition to update the internal default values. The update use :meth:`moldrug.utils.deep_update` Each variable only will accept the keys [w, and the name of the desirability function of :meth:`moldrug.utils.DerringerSuichDesirability`], by default None which means that it will use: .. ipython:: python from moldrug.fitness import __get_default_desirability import json print(json.dumps(__get_default_desirability(multireceptor=False), indent = 4)) Returns ------- utils.Individual A new instance of the original Individual with the the new attributes: pdbqt, qed, vina_score, sa_score and cost. cost attribute will be a number between 0 and 1, been 0 the optimal value. Example ------- .. ipython:: python from moldrug import utils, fitness from rdkit import Chem import tempfile, os from moldrug.data import get_data tmp_path = tempfile.TemporaryDirectory() data_x0161 = get_data('x0161') ligand_mol = Chem.MolFromSmiles(data_x0161['smiles']) I = utils.Individual(ligand_mol) box = data_x0161['box'] # Using the default desirability NewI = fitness.Cost(Individual=I, wd=tmp_path.name, receptor_pdbqt_path=data_x0161['protein']['pdbqt'], \ boxcenter=box['boxcenter'], boxsize=box['boxsize'], exhaustiveness=4, ncores=4) print(NewI.cost, NewI.vina_score, NewI.qed, NewI.sa_score) """ if desirability is None: desirability = __get_default_desirability(multireceptor=False) else: desirability = utils.deep_update( target_dict=__get_default_desirability(multireceptor=False), update_dict=desirability ) sascorer = utils.import_sascorer() # multicriteria optimization,Optimization of Several Response Variables # Getting estimate of drug-likness Individual.qed = QED.weights_mean(Chem.RemoveHs(Individual.mol)) # Getting synthetic accessibility score Individual.sa_score = sascorer.calculateScore(Chem.RemoveHs(Individual.mol)) # Getting vina_score and update pdbqt Individual.vina_score, Individual.pdbqt = _vinadock( Individual=Individual, wd=wd, vina_executable=vina_executable, vina_seed=vina_seed, receptor_pdbqt_path=receptor_pdbqt_path, boxcenter=boxcenter, boxsize=boxsize, exhaustiveness=exhaustiveness, ad4map=ad4map, ncores=ncores, num_modes=num_modes, constraint=constraint, constraint_type=constraint_type, constraint_ref=constraint_ref, constraint_receptor_pdb_path=constraint_receptor_pdb_path, constraint_num_conf=constraint_num_conf, constraint_minimum_conf_rms=constraint_minimum_conf_rms) # Adding the cost using all the information of qed, sas and vina_cost # Construct the desirability # Quantitative estimation of drug-likeness (ranges from 0 to 1). We could use just the value perse, # but using LargerTheBest we are more permissible. base = 1 exponent = 0 for variable in desirability: for key in desirability[variable]: if key == 'w': w = desirability[variable][key] elif key in utils.DerringerSuichDesirability(): d = utils.DerringerSuichDesirability()[key]( getattr(Individual, variable), **desirability[variable][key]) else: raise RuntimeError(f"Inside the desirability dictionary you provided for the variable = {variable} " f"a non implemented key = {key}. Only are possible:" "'w' (standing for weight) and any " "possible Derringer-Suich desirability " f"function: {utils.DerringerSuichDesirability().keys()}") base *= d**w exponent += w # We are using a geometric mean. And because we are minimizing we have to return Individual.cost = 1 - base**(1 / exponent) return Individual
[docs] def CostOnlyVina( Individual: utils.Individual, wd: str = '.vina_jobs', vina_executable: str = 'vina', vina_seed: Union[int, None] = None, receptor_pdbqt_path: str = None, boxcenter: List[float] = None, boxsize: List[float] = None, exhaustiveness: int = 8, ad4map: str = None, ncores: int = 1, num_modes: int = 1, constraint: bool = False, constraint_type: str = 'score_only', # score_only, local_only constraint_ref: Chem.rdchem.Mol = None, constraint_receptor_pdb_path: str = None, constraint_num_conf: int = 100, constraint_minimum_conf_rms: int = 0.01, wt_cutoff: Union[None, float] = None): """ This Cost function performs Docking and return the vina_score as Cost. Parameters ---------- Individual : utils.Individual A Individual with the pdbqt attribute wd : str, optional The working directory to execute the docking jobs, by default '.vina_jobs' vina_executable : str, optional This is the name of the vina executable, could be a path to the binary object (absolute path is recommended), which must have execution permits (chmod a+x <your binary file>), by default 'vina' vina_seed : Union[int, None], optional Explicit random seed used by vina, by default None receptor_path : str, optional Where the receptor pdbqt file is located, by default None boxcenter : list[float], optional A list of three floats with the definition of the center of the box in angstrom for docking (x, y, z), by default None boxsize : list[float], optional A list of three floats with the definition of the box size in angstrom of the docking box (x, y, z), by default None exhaustiveness : int, optional Parameter of vina that controls the accuracy of the docking searching, by default 8 ad4map : str, optional Affinity maps for the autodock4.2 (ad4) or vina scoring function, by default None ncores : int, optional Number of cpus to use in Vina, by default 1 num_modes : int, optional How many modes should Vina export, by default 1 constraint : bool, optional Controls if constraint docking will be perform, by default False constraint_type : str, optional This is the type of constraint docking. Could be local_only (vina will perform local optimization and score the resulted pose) or score_only (in this case the provided pose by the internal conformer generator will only be scored), by default 'score_only' constraint_ref : Chem.rdchem.Mol, optional The part of the molecule that we would like to constraint, by default None constraint_receptor_pdb_path : str, optional The same as constraint_receptor_pdbqt_path but in pdb format, by default None constraint_num_conf : int, optional Maximum number of conformer to be generated internally by moldrug , by default 100 constraint_minimum_conf_rms : int, optional RMS to filter duplicate conformers, by default 0.01 wt_cutoff : Union[None, float], optional If some number is provided the molecules with a molecular weight higher than wt_cutoff will get as vina_score = cost = np.inf. Vina will not be invoked, by default None Returns ------- utils.Individual A new instance of the original Individual with the the new attributes: pdbqt, vina_score and cost. In this case cost = vina_score, the lowest the values the best individual. Example ------- .. ipython:: python from moldrug import utils, fitness from rdkit import Chem import tempfile, os from moldrug.data import get_data tmp_path = tempfile.TemporaryDirectory() data_x0161 = get_data('x0161') ligand_mol = Chem.MolFromSmiles(data_x0161['smiles']) I = utils.Individual(ligand_mol) box = data_x0161['box'] NewI = fitness.CostOnlyVina(Individual=I, wd=tmp_path.name, receptor_pdbqt_path=data_x0161['protein']['pdbqt'],\ boxcenter=box['boxcenter'], boxsize=box['boxsize'], exhaustiveness=4,ncores=4) print(NewI.cost, NewI.vina_score) """ # If the molecule is heavy, don't perform docking and assign infinite to the cost attribute. # Add the pdbqt to pdbqts and np.inf to vina_scores if wt_cutoff: if Descriptors.MolWt(Individual.mol) > wt_cutoff: Individual.vina_score = np.inf Individual.cost = np.inf Individual.pdbqt = 'TooHeavy' return Individual # Getting vina_score and update pdbqt Individual.vina_score, Individual.pdbqt = _vinadock( Individual=Individual, wd=wd, vina_executable=vina_executable, vina_seed=vina_seed, receptor_pdbqt_path=receptor_pdbqt_path, boxcenter=boxcenter, boxsize=boxsize, exhaustiveness=exhaustiveness, ad4map=ad4map, ncores=ncores, num_modes=num_modes, constraint=constraint, constraint_type=constraint_type, constraint_ref=constraint_ref, constraint_receptor_pdb_path=constraint_receptor_pdb_path, constraint_num_conf=constraint_num_conf, constraint_minimum_conf_rms=constraint_minimum_conf_rms) Individual.cost = Individual.vina_score return Individual
[docs] def CostMultiReceptors( Individual: utils.Individual, wd: str = '.vina_jobs', vina_executable: str = 'vina', vina_seed: Union[int, None] = None, receptor_pdbqt_path: List[str] = None, vina_score_type: Union[None, str, List[str]] = None, boxcenter: List[List[float]] = None, boxsize: List[List[float]] = None, exhaustiveness: int = 8, ad4map: List[str] = None, ncores: int = 1, num_modes: int = 1, constraint: bool = False, constraint_type: str = 'score_only', # score_only, local_only constraint_ref: Chem.rdchem.Mol = None, constraint_receptor_pdb_path: List[str] = None, constraint_num_conf: int = 100, constraint_minimum_conf_rms: int = 0.01, desirability: Dict = None): """ This function is similar to :meth:`moldrug.fitness.Cost` but it will add the possibility to work with more than one receptor. It also use the concept of desirability and the response variables are: #. `Vina scores. <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3041641/>`_ #. `Quantitative Estimation of Drug-likeness (QED). <https://www.nature.com/articles/nchem.1243>`_ #. `Synthetic accessibility score. <https://jcheminf.biomedcentral.com/articles/10.1186/1758-2946-1-8)>`_ In this case every vina score (for all the provided receptors) will be used for the construction of the desirability. If ad4map is set, the last version of vina (`releases <https://github.com/ccsb-scripps/AutoDock-Vina/releases>`_) must be installed. To see how to use AutoDock4 force fields in the new version of vina, follow `this tutorial <https://autodock-vina.readthedocs.io/en/latest/docking_zinc.html>_` Parameters ---------- Individual : utils.Individual A Individual with the pdbqt attribute wd : str, optional The working directory to execute the docking jobs, by default '.vina_jobs' vina_executable : str, optional This is the name of the vina executable, could be a path to the binary object (absolute path is recommended), which must have execution permits (chmod a+x <your binary file>), by default 'vina' vina_seed : Union[int, None], optional Explicit random seed used by vina, by default None receptor_pdbqt_path : list[str], optional A list of location of the receptors pdbqt files, by default None vina_score_type : Union[None, str, List[str]], optional This is a list with the keywords 'min' and/or 'max' or 'ensemble. E.g. If two receptor were provided and for the first one we would like to find a minimum in the vina scoring function and for the other one a maximum (selectivity for the first receptor); we must provided the list: ['min', 'max']. In the other hand, if we have several conformations of the same receptor (flexible receptor) we could use 'ensemble'. In this case the vina value used for the optimization will be the lowest (if SmallerTheBest is selected) or highest (LargerTheBest) of the vina scores from all conformations, by default None boxcenter : list[list[float]], optional A list of three floats with the definition of the center of the box in angstrom for docking (x, y, z), by default None boxsize : list[list[float]], optional A list of three floats with the definition of the box size in angstrom of the docking box (x, y, z), by default None exhaustiveness : int, optional Parameter of vina that controls the accuracy of the docking searching, by default 8 ad4map : list[str], optional A list of affinity maps for the autodock4.2 (ad4) or vina scoring function. For every receptor you should have a separate directory with all the maps, by default None ncores : int, optional Number of cpus to use in Vina, by default 1 num_modes : int, optional How many modes should Vina export, by default 1 constraint : bool, optional Controls if constraint docking will be perform, by default False constraint_type : str, optional This is the type of constraint docking. Could be local_only (vina will perform local optimization and score the resulted pose) or score_only (in this case the provided pose by the internal conformer generator will only be scored), by default 'score_only' constraint_ref : Chem.rdchem.Mol, optional The part of the molecule that we would like to constraint, by default None constraint_receptor_pdb_path : list[str], optional The same as constraint_receptor_pdbqt_path but in pdb format, by default None constraint_num_conf : int, optional Maximum number of conformer to be generated internally by moldrug , by default 100 constraint_minimum_conf_rms : int, optional RMS to filter duplicate conformers, by default 0.01 desirability : dict, optional Desirability definition to update the internal default values. The update use :meth:`moldrug.utils.deep_update` Each variable only will accept the keys [w, and the name of the desirability function of :meth:`moldrug.utils.DerringerSuichDesirability`]. In the case of vina_scores there is another layer for the vina_score_type = [min, max], by default is None which means that it will use: .. ipython:: python from moldrug.fitness import __get_default_desirability import json print(json.dumps(__get_default_desirability(multireceptor=True), indent = 4)) Returns ------- utils.Individual A new instance of the original Individual with the the new attributes: pdbqts [a list of pdbqt], qed, vina_scores[a list of vina_score], sa_score and cost. cost attribute will be a number between 0 and 1, been 0 the optimal value. Example ------- .. ipython:: python from moldrug import utils, fitness from rdkit import Chem import tempfile, os from moldrug.data import get_data data_x0161 = get_data('x0161') data_6lu7 = get_data('6lu7') tmp_path = tempfile.TemporaryDirectory() ligand_mol = Chem.MolFromSmiles(data_x0161['smiles']) I = utils.Individual(ligand_mol) receptor_paths = [data_x0161['protein']['pdbqt'], data_6lu7['protein']['pdbqt']] boxcenter = [data_x0161['box']['boxcenter'], data_6lu7['box']['boxcenter']] boxsize = [data_x0161['box']['boxsize'], data_6lu7['box']['boxsize']] vina_score_type = ['min', 'max'] # Using the default desirability NewI = fitness.CostMultiReceptors( Individual = I, wd = tmp_path.name,receptor_pdbqt_path = receptor_paths, vina_score_type = vina_score_type, boxcenter = boxcenter,boxsize = boxsize,exhaustiveness = 4,ncores = 4) print(NewI.cost, NewI.vina_score, NewI.qed, NewI.sa_score) """ if desirability is None: desirability = __get_default_desirability(multireceptor=True) else: desirability = utils.deep_update( target_dict=__get_default_desirability(multireceptor=True), update_dict=desirability ) if not ad4map: ad4map = [None] * len(receptor_pdbqt_path) sascorer = utils.import_sascorer() Individual.qed = QED.weights_mean(Chem.RemoveHs(Individual.mol)) # Getting synthetic accessibility score Individual.sa_score = sascorer.calculateScore(Chem.RemoveHs(Individual.mol)) # Getting Vina score pdbqt_list = [] Individual.vina_score = [] for (i, _) in enumerate(receptor_pdbqt_path): # Getting vina_score and update pdbqt if constraint: vina_score, pdbqt = _vinadock( Individual=Individual, wd=wd, vina_executable=vina_executable, vina_seed=vina_seed, receptor_pdbqt_path=receptor_pdbqt_path[i], boxcenter=boxcenter[i], boxsize=boxsize[i], exhaustiveness=exhaustiveness, ad4map=ad4map[i], ncores=ncores, num_modes=num_modes, constraint=constraint, constraint_type=constraint_type, constraint_ref=constraint_ref, constraint_receptor_pdb_path=constraint_receptor_pdb_path[i], constraint_num_conf=constraint_num_conf, constraint_minimum_conf_rms=constraint_minimum_conf_rms) else: vina_score, pdbqt = _vinadock( Individual=Individual, wd=wd, vina_executable=vina_executable, vina_seed=vina_seed, receptor_pdbqt_path=receptor_pdbqt_path[i], boxcenter=boxcenter[i], boxsize=boxsize[i], exhaustiveness=exhaustiveness, ad4map=ad4map[i], ncores=ncores, num_modes=num_modes) Individual.vina_score.append(vina_score) pdbqt_list.append(pdbqt) # Update the pdbqt attribute Individual.pdbqt = pdbqt_list # make a copy of the default values of desirability # pops the region of vina_scores desirability_to_work_with = desirability.copy() vina_desirability_section = desirability_to_work_with.pop('vina_scores') # Initialize base and exponent base = 1 exponent = 0 # Runs for all properties different to vina_scores for variable in desirability_to_work_with: for key in desirability_to_work_with[variable]: if key == 'w': w = desirability_to_work_with[variable][key] elif key in utils.DerringerSuichDesirability(): d = utils.DerringerSuichDesirability()[key]( getattr(Individual, variable), **desirability_to_work_with[variable][key]) else: raise RuntimeError(f"Inside the desirability dictionary you provided for the variable = {variable} " f"a non implemented key = {key}. Only are possible: 'w' (standing for weight) and " "any possible Derringer-Suich " f"desirability function: {utils.DerringerSuichDesirability().keys()}. " "Only in the case of vina_scores [min and max] keys") base *= d**w exponent += w # Check how to build the desirability if vina_score_type == 'ensemble': # In this case the user is looking for a minimum (potent binder) if 'SmallerTheBest' in vina_desirability_section['ensemble']: vina_score_to_use = min(Individual.vina_score) elif 'LargerTheBest' in vina_desirability_section['ensemble']: vina_score_to_use = max(Individual.vina_score) else: raise RuntimeError(f"For {vina_score_type=} only Derringer-Suich desirability functions:" "SmallerTheBest and LargerTheBest are possible") for key in vina_desirability_section['ensemble']: if key == 'w': w = vina_desirability_section['ensemble'][key] elif key in utils.DerringerSuichDesirability(): d = utils.DerringerSuichDesirability()[key](vina_score_to_use, **vina_desirability_section['ensemble'][key]) else: raise RuntimeError("Inside the desirability dictionary " f"you provided for the variable = vina_scores['ensemble'] " f"a non implemented key = {key}. Only are possible: 'w' (standing for weight) and " "any possible Derringer-Suich " f"desirability function: SmallerTheBest and LargerTheBest.") base *= d**w exponent += w else: # Run only for vina_scores for vs, vst in zip(Individual.vina_score, vina_score_type): for key in vina_desirability_section[vst]: if key == 'w': w = vina_desirability_section[vst][key] elif key in utils.DerringerSuichDesirability(): d = utils.DerringerSuichDesirability()[key](vs, **vina_desirability_section[vst][key]) else: raise RuntimeError("Inside the desirability dictionary " f"you provided for the variable = vina_scores[{vst}] " f"a non implemented key = {key}. Only are possible: 'w' (standing for weight) and " "any possible Derringer-Suich " f"desirability function: {utils.DerringerSuichDesirability().keys()}.") base *= d**w exponent += w # We are using a geometric mean. And because we are minimizing we have to return Individual.cost = 1 - base**(1 / exponent) return Individual
[docs] def CostMultiReceptorsOnlyVina( Individual: utils.Individual, wd: str = '.vina_jobs', vina_executable: str = 'vina', vina_seed: Union[int, None] = None, receptor_pdbqt_path: List[str] = None, vina_score_type: Union[None, str, List[str]] = None, boxcenter: List[List[float]] = None, boxsize: List[List[float]] = None, exhaustiveness: int = 8, ad4map: List[str] = None, ncores: int = 1, num_modes: int = 1, constraint: bool = False, constraint_type: str = 'score_only', # score_only, local_only constraint_ref: Chem.rdchem.Mol = None, constraint_receptor_pdb_path: List[str] = None, constraint_num_conf: int = 100, constraint_minimum_conf_rms: int = 0.01, desirability: Dict = None, wt_cutoff: Union[None, float] = None): """ This function is similar to :meth:`moldrug.fitness. CostOnlyVina` but it will add the possibility to work with more than one receptor. It also use the concept of desirability. The response variables are the vina scores on each receptor. If ad4map is set, the last version of vina (`releases <https://github.com/ccsb-scripps/AutoDock-Vina/releases>`_) must be installed. To see how to use AutoDock4 force fields in the new version of vina, follow `this tutorial <https://autodock-vina.readthedocs.io/en/latest/docking_zinc.html>_` Parameters ---------- Individual : utils.Individual A Individual with the pdbqt attribute wd : str, optional The working directory to execute the docking jobs, by default '.vina_jobs' vina_executable : str, optional This is the name of the vina executable, could be a path to the binary object (absolute path is recommended), which must have execution permits (chmod a+x <your binary file>), by default 'vina' vina_seed : Union[int, None], optional Explicit random seed used by vina, by default None receptor_pdbqt_path : list[str], optional A list of location of the receptors pdbqt files, by default None vina_score_type : Union[None, str, List[str]], optional This is a list with the keywords 'min' and/or 'max' or 'ensemble. E.g. If two receptor were provided and for the first one we would like to find a minimum in the vina scoring function and for the other one a maximum (selectivity for the first receptor); we must provided the list: ['min', 'max']. In the other hand, if we have several conformations of the same receptor (flexible receptor) we could use 'ensemble'. In this case the vina value used for the optimization will be the lowest (if SmallerTheBest is selected) or highest (LargerTheBest) of the vina scores from all conformations, by default None boxcenter : list[float], optional A list of three floats with the definition of the center of the box in angstrom for docking (x, y, z), by default None boxsize : list[float], optional A list of three floats with the definition of the box size in angstrom of the docking box (x, y, z), by default None exhaustiveness : int, optional Parameter of vina that controls the accuracy of the docking searching, by default 8 ad4map : list[str], optional A list of affinity maps for the autodock4.2 (ad4) or vina scoring function. For every receptor you should have a separate directory with all the maps, by default None ncores : int, optional Number of cpus to use in Vina, by default 1 num_modes : int, optional How many modes should Vina export, by default 1 constraint : bool, optional Controls if constraint docking will be perform, by default False constraint_type : str, optional This is the type of constraint docking. Could be local_only (vina will perform local optimization and score the resulted pose) or score_only (in this case the provided pose by the internal conformer generator will only be scored), by default 'score_only' constraint_ref : Chem.rdchem.Mol, optional The part of the molecule that we would like to constraint, by default None constraint_receptor_pdb_path : list[str], optional The same as constraint_receptor_pdbqt_path but in pdb format, by default None constraint_num_conf : int, optional Maximum number of conformer to be generated internally by moldrug , by default 100 constraint_minimum_conf_rms : int, optional RMS to filter duplicate conformers, by default 0.01 desirability : dict, optional Desirability definition to update the internal default values. The update use :meth:`moldrug.utils.deep_update` Each variable only will accept the keys [w, and the name of the desirability function of :meth:`moldrug.utils.DerringerSuichDesirability`]. In the case of vina_scores there is another layer for the vina_score_type = [min, max], by default is None which means that it will use: .. ipython:: python from moldrug.fitness import __get_default_desirability import json print(json.dumps(__get_default_desirability(multireceptor=True)['vina_scores'], indent = 4)) If `vina_score_type = ensemble`, the only parameter that it will be used is the name of the desirability in order to look for a minimum (SmallerTheBest) or a maximum (LargerTheBest). The following desaribility is enough in case minimum is desired: .. ipython:: python desirability = { 'ensemble': 'SmallerTheBest' # (LargerTheBest if a maximum is desired) } print(json.dumps(desirability, indent = 4)) wt_cutoff : Union[None, float], optional If some number is provided, the molecules with a molecular weight higher than wt_cutoff will get as vina_score = cost = np.inf. Vina will not be invoked, by default None Returns ------- utils.Individual A new instance of the original Individual with the the new attributes: pdbqts [a list of pdbqt], vina_scores [a list of vina_score], and cost. cost attribute will be a number between 0 and 1, been 0 the optimal value. Example ------- .. ipython:: python from moldrug import utils, fitness from rdkit import Chem import tempfile, os from moldrug.data import get_data data_x0161 = get_data('x0161') data_6lu7 = get_data('6lu7') tmp_path = tempfile.TemporaryDirectory() ligand_mol = Chem.MolFromSmiles(data_x0161['smiles']) I = utils.Individual(ligand_mol) receptor_paths = [data_x0161['protein']['pdbqt'], data_6lu7['protein']['pdbqt']] boxcenter = [data_x0161['box']['boxcenter'], data_6lu7['box']['boxcenter']] boxsize = [data_x0161['box']['boxsize'], data_6lu7['box']['boxsize']] vina_score_type = ['min', 'max'] # Using the default desirability NewI = fitness.CostMultiReceptorsOnlyVina( Individual = I,wd = tmp_path.name,receptor_pdbqt_path = receptor_paths, vina_score_type = vina_score_type, boxcenter = boxcenter,boxsize = boxsize, exhaustiveness = 4,ncores = 4) print(NewI.cost, NewI.vina_score) """ if desirability is None: desirability = __get_default_desirability(multireceptor=True)['vina_scores'] else: desirability = utils.deep_update( target_dict=__get_default_desirability(multireceptor=True)['vina_scores'], update_dict=desirability ) pdbqt_list = [] Individual.vina_score = [] if not ad4map: ad4map = [None] * len(receptor_pdbqt_path) # If the molecule is heavy, don't perform docking and assign infinite to the cost attribute. # Add "TooHeavy" string to pdbqts and np.inf to vina_scores if wt_cutoff: if Descriptors.MolWt(Individual.mol) > wt_cutoff: for _ in receptor_pdbqt_path: pdbqt_list.append('TooHeavy') Individual.vina_score.append(np.inf) Individual.cost = np.inf # Update the pdbqt attribute Individual.pdbqt = pdbqt_list return Individual # Getting Vina score pdbqt_list = [] Individual.vina_score = [] for (i, _) in enumerate(receptor_pdbqt_path): # Getting vina_score and update pdbqt if constraint: vina_score, pdbqt = _vinadock( Individual=Individual, wd=wd, vina_executable=vina_executable, vina_seed=vina_seed, receptor_pdbqt_path=receptor_pdbqt_path[i], boxcenter=boxcenter[i], boxsize=boxsize[i], exhaustiveness=exhaustiveness, ad4map=ad4map[i], ncores=ncores, num_modes=num_modes, constraint=constraint, constraint_type=constraint_type, constraint_ref=constraint_ref, constraint_receptor_pdb_path=constraint_receptor_pdb_path[i], constraint_num_conf=constraint_num_conf, constraint_minimum_conf_rms=constraint_minimum_conf_rms) else: vina_score, pdbqt = _vinadock( Individual=Individual, wd=wd, vina_executable=vina_executable, vina_seed=vina_seed, receptor_pdbqt_path=receptor_pdbqt_path[i], boxcenter=boxcenter[i], boxsize=boxsize[i], exhaustiveness=exhaustiveness, ad4map=ad4map[i], ncores=ncores, num_modes=num_modes) Individual.vina_score.append(vina_score) pdbqt_list.append(pdbqt) # Update the pdbqt attribute Individual.pdbqt = pdbqt_list if vina_score_type == 'ensemble': # In this case the user is looking for a minimum (potent binder) if 'SmallerTheBest' in desirability['ensemble']: vina_score_to_use = min(Individual.vina_score) elif 'LargerTheBest' in desirability['ensemble']: vina_score_to_use = max(Individual.vina_score) else: raise RuntimeError(f"For {vina_score_type=} only Derringer-Suich desirability functions:" "SmallerTheBest and LargerTheBest are possible") Individual.cost = vina_score_to_use else: # Initialize base and exponent base = 1 exponent = 0 # Run for vina_scores for vs, vst in zip(Individual.vina_score, vina_score_type): for key in desirability[vst]: if key == 'w': w = desirability[vst][key] elif key in utils.DerringerSuichDesirability(): d = utils.DerringerSuichDesirability()[key](vs, **desirability[vst][key]) else: raise RuntimeError("Inside the desirability dictionary you provided " f"for the variable = vina_scores[{vst}] " f"a non implemented key = {key}. " "Only are possible: 'w' (standing for weight) " "and any possible Derringer-Suich " f"desirability function: {utils.DerringerSuichDesirability().keys()}.") base *= d**w exponent += w # We are using a geometric mean. And because we are minimizing we have to return Individual.cost = 1 - base**(1 / exponent) return Individual
if __name__ == '__main__': pass