De-lint ligand.py.

This commit is contained in:
Nathan Baker
2020-05-24 18:29:38 -07:00
parent 3a5bae5b41
commit 786a4c9292

View File

@@ -1,16 +1,9 @@
#!/usr/bin/python """Ligand classes and functions."""
from propka.calculations import squared_distance
from __future__ import division from propka.vector_algebra import vector
from __future__ import print_function
import sys
import propka.calculations
from propka.vector_algebra import *
from propka.lib import info, warning
all_sybyl_types = [ ALL_SYBYL_TYPES = [
'C.3', # carbon sp3 'C.3', # carbon sp3
'H', # hydrogen 'H', # hydrogen
'C.2', # carbon sp2 'C.2', # carbon sp2
@@ -66,183 +59,109 @@ all_sybyl_types = [
'Sn'] # tin 'Sn'] # tin
#propka_input_types = ['1P','1N','2P','2N'] PROPKA_INPUT_TYPES = ['1P', '1N', '2P', '2N', 'C3', 'H', 'C2', 'Hsp', 'C1',
#for type in all_sybyl_types: 'Ht3', 'Car', 'LP', 'Cca', 'Du', 'N3', 'DuC', 'N2',
# temp = type.replace('.','') 'Any', 'N1', 'Hal', 'Nar', 'Het', 'Nam', 'Hev', 'Npl',
# if len(temp)>3: 'Li', 'N4', 'Na', 'O3', 'Mg', 'O2', 'Al', 'Oco', 'Si',
# temp = temp[0:3] 'Osp', 'K', 'Ot3', 'Ca', 'S3', 'Crt', 'S2', 'Cro', 'SO',
# propka_input_types.append(temp) 'Mn', 'SO2', 'Fe', 'P3', 'Coo', 'F', 'Cu', 'Cl', 'Zn',
# 'Br', 'Se', 'I', 'Mo', 'Sn']
#for t in propka_input_types:
# print (t)
propka_input_types = [
'1P',
'1N',
'2P',
'2N',
'C3',
'H',
'C2',
'Hsp',
'C1',
'Ht3',
'Car',
'LP',
'Cca',
'Du',
'N3',
'DuC',
'N2',
'Any',
'N1',
'Hal',
'Nar',
'Het',
'Nam',
'Hev',
'Npl',
'Li',
'N4',
'Na',
'O3',
'Mg',
'O2',
'Al',
'Oco',
'Si',
'Osp',
'K',
'Ot3',
'Ca',
'S3',
'Crt',
'S2',
'Cro',
'SO',
'Mn',
'SO2',
'Fe',
'P3',
'Coo',
'F',
'Cu',
'Cl',
'Zn',
'Br',
'Se',
'I',
'Mo',
'Sn']
max_C_double_bond = 1.3
max_C_triple_bond = 1.2
max_C_double_bond_squared = max_C_double_bond*max_C_double_bond
max_C_triple_bond_squared = max_C_triple_bond*max_C_triple_bond
MAX_C_DOUBLE_BOND = 1.3
MAX_C_TRIPLE_BOND = 1.2
MAX_C_DOUBLE_BOND_SQUARED = MAX_C_DOUBLE_BOND*MAX_C_DOUBLE_BOND
MAX_C_TRIPLE_BOND_SQUARED = MAX_C_TRIPLE_BOND*MAX_C_TRIPLE_BOND
PLANARITY_MARGIN = 0.20
def assign_sybyl_type(atom): def assign_sybyl_type(atom):
"""Assign Sybyl type to atom.
Args:
atom: atom to assign
"""
# check if we already have assigned a name to this atom # check if we already have assigned a name to this atom
if atom.sybyl_assigned: if atom.sybyl_assigned:
#info(atom.name,'already assigned')
return return
# find some properties of the atom # find some properties of the atom
ring_atoms = is_ring_member(atom) ring_atoms = is_ring_member(atom)
aromatic = is_aromatic_ring(ring_atoms) aromatic = is_aromatic_ring(ring_atoms)
planar = is_planar(atom) planar = is_planar(atom)
bonded_elements = {} bonded_elements = {}
for i in range(len(atom.bonded_atoms)): for i, bonded_atom in enumerate(atom.bonded_atoms):
bonded_elements[i]=atom.bonded_atoms[i].element bonded_elements[i] = bonded_atom.element
# Aromatic carbon/nitrogen # Aromatic carbon/nitrogen
if aromatic: if aromatic:
for ra in ring_atoms: for ring_atom in ring_atoms:
if ra.element in ['C','N']: if ring_atom.element in ['C', 'N']:
set_type(ra, ra.element+'.ar') set_type(ring_atom, ring_atom.element+'.ar')
return return
# check for amide # check for amide
if atom.element in ['O', 'N', 'C']: if atom.element in ['O', 'N', 'C']:
O=None o_atom = None
N=None n_atom = None
C=None c_atom = None
# oxygen, nitrogen # oxygen, nitrogen
if atom.element in ['O', 'N']: if atom.element in ['O', 'N']:
for b in atom.get_bonded_elements('C'): for bonded_elem in atom.get_bonded_elements('C'):
for bb in b.bonded_atoms: for bonded_atom in bonded_elem.bonded_atoms:
if (bb.element =='N' and atom.element == 'O'): if (bonded_atom.element == 'N' and atom.element == 'O'):
O=atom o_atom = atom
C=b c_atom = bonded_elem
N=bb n_atom = bonded_atom
elif (bb.element =='O' and atom.element == 'N'): elif (bonded_atom.element == 'O' and atom.element == 'N'):
N=atom n_atom = atom
C=b c_atom = bonded_elem
O=bb o_atom = bonded_atom
# carbon # carbon
if atom.element == 'C': if atom.element == 'C':
nitrogens = atom.get_bonded_elements('N') nitrogens = atom.get_bonded_elements('N')
oxygens = atom.get_bonded_elements('O') oxygens = atom.get_bonded_elements('O')
if len(nitrogens) == 1 and len(oxygens) == 1: if len(nitrogens) == 1 and len(oxygens) == 1:
C = atom c_atom = atom
N = nitrogens[0] n_atom = nitrogens[0]
O = oxygens[0] o_atom = oxygens[0]
if c_atom and n_atom and o_atom:
if C and N and O:
# make sure that the Nitrogen is not aromatic and that it has two heavy atom bonds # make sure that the Nitrogen is not aromatic and that it has two heavy atom bonds
if not is_aromatic_ring(is_ring_member(N)) and len(N.get_bonded_heavy_atoms())==2: if not is_aromatic_ring(is_ring_member(n_atom)) \
set_type(N,'N.am') and len(n_atom.get_bonded_heavy_atoms()) == 2:
set_type(C,'C.2') set_type(n_atom, 'N.am')
set_type(O,'O.2') set_type(c_atom, 'C.2')
set_type(o_atom, 'O.2')
return return
if atom.element == 'C': if atom.element == 'C':
# check for carboxyl # check for carboxyl
if len(atom.bonded_atoms) == 3 and list(bonded_elements.values()).count('O') == 2: if len(atom.bonded_atoms) == 3 and list(bonded_elements.values()).count('O') == 2:
i1 = list(bonded_elements.values()).index('O') index1 = list(bonded_elements.values()).index('O')
i2 = list(bonded_elements.values()).index('O',i1+1) index2 = list(bonded_elements.values()).index('O', index1+1)
if len(atom.bonded_atoms[i1].bonded_atoms)==1 and len(atom.bonded_atoms[i2].bonded_atoms)==1: if len(atom.bonded_atoms[index1].bonded_atoms) == 1 \
set_type(atom.bonded_atoms[i1],'O.co2-') and len(atom.bonded_atoms[index2].bonded_atoms) == 1:
set_type(atom.bonded_atoms[i2],'O.co2') set_type(atom.bonded_atoms[index1], 'O.co2-')
set_type(atom.bonded_atoms[index2], 'O.co2')
set_type(atom, 'C.2') set_type(atom, 'C.2')
return return
# sp carbon # sp carbon
if len(atom.bonded_atoms) <= 2: if len(atom.bonded_atoms) <= 2:
for b in atom.bonded_atoms: for bonded_atom in atom.bonded_atoms:
if propka.calculations.squared_distance(atom, b) < max_C_triple_bond_squared: if squared_distance(atom, bonded_atom) < MAX_C_TRIPLE_BOND_SQUARED:
set_type(atom, 'C.1') set_type(atom, 'C.1')
set_type(b,b.element+'.1') set_type(bonded_atom, bonded_atom.element + '.1')
if atom.sybyl_assigned: if atom.sybyl_assigned:
return return
# sp2 carbon # sp2 carbon
if planar: if planar:
set_type(atom, 'C.2') set_type(atom, 'C.2')
# check for N.pl3 # check for N.pl3
for b in atom.bonded_atoms: for bonded_atom in atom.bonded_atoms:
if b.element=='N': if bonded_atom.element == 'N':
if len(b.bonded_atoms)<3 or is_planar(b): if len(bonded_atom.bonded_atoms) < 3 \
set_type(b,'N.pl3') or is_planar(bonded_atom):
set_type(bonded_atom, 'N.pl3')
return return
# sp3 carbon # sp3 carbon
set_type(atom, 'C.3') set_type(atom, 'C.3')
return return
# Nitrogen # Nitrogen
if atom.element == 'N': if atom.element == 'N':
# check for planar N # check for planar N
@@ -250,160 +169,162 @@ def assign_sybyl_type(atom):
if is_planar(atom.bonded_atoms[0]): if is_planar(atom.bonded_atoms[0]):
set_type(atom, 'N.pl3') set_type(atom, 'N.pl3')
return return
if planar: if planar:
set_type(atom, 'N.pl3') set_type(atom, 'N.pl3')
return return
set_type(atom, 'N.3') set_type(atom, 'N.3')
return return
# Oxygen # Oxygen
if atom.element == 'O': if atom.element == 'O':
set_type(atom, 'O.3') set_type(atom, 'O.3')
if len(atom.bonded_atoms) == 1: if len(atom.bonded_atoms) == 1:
# check for carboxyl # check for carboxyl
if atom.bonded_atoms[0].element == 'C': if atom.bonded_atoms[0].element == 'C':
the_carbon = atom.bonded_atoms[0] the_carbon = atom.bonded_atoms[0]
if len(the_carbon.bonded_atoms)==3 and the_carbon.count_bonded_elements('O')==2: if len(the_carbon.bonded_atoms) == 3 \
[O1,O2] = the_carbon.get_bonded_elements('O') and the_carbon.count_bonded_elements('O') == 2:
[oxy1, oxy2] = the_carbon.get_bonded_elements('O')
if len(O1.bonded_atoms)==1 and len(O2.bonded_atoms)==1: if len(oxy1.bonded_atoms) == 1 and len(oxy2.bonded_atoms) == 1:
set_type(O1,'O.co2-') set_type(oxy1, 'O.co2-')
set_type(O2,'O.co2') set_type(oxy2, 'O.co2')
set_type(the_carbon, 'C.2') set_type(the_carbon, 'C.2')
return return
# check for X=O # check for X=O
if propka.calculations.squared_distance(atom, atom.bonded_atoms[0]) < max_C_double_bond_squared: if squared_distance(atom, atom.bonded_atoms[0]) < MAX_C_DOUBLE_BOND_SQUARED:
set_type(atom, 'O.2') set_type(atom, 'O.2')
if atom.bonded_atoms[0].element == 'C': if atom.bonded_atoms[0].element == 'C':
set_type(atom.bonded_atoms[0], 'C.2') set_type(atom.bonded_atoms[0], 'C.2')
return return
# Sulphur # Sulphur
if atom.element == 'S': if atom.element == 'S':
# check for SO2 # check for SO2
if list(bonded_elements.values()).count('O') == 2: if list(bonded_elements.values()).count('O') == 2:
i1 = list(bonded_elements.values()).index('O') index1 = list(bonded_elements.values()).index('O')
i2 = list(bonded_elements.values()).index('O',i1+1) index2 = list(bonded_elements.values()).index('O', index1+1)
set_type(atom.bonded_atoms[i1],'O.2') set_type(atom.bonded_atoms[index1], 'O.2')
set_type(atom.bonded_atoms[i2],'O.2') set_type(atom.bonded_atoms[index2], 'O.2')
set_type(atom, 'S.o2') set_type(atom, 'S.o2')
return return
# check for SO4 # check for SO4
if list(bonded_elements.values()).count('O') == 4: if list(bonded_elements.values()).count('O') == 4:
no_O2 = 0 no_o2 = 0
for i in range(len(atom.bonded_atoms)): for i in range(len(atom.bonded_atoms)):
if len(atom.bonded_atoms[i].bonded_atoms)==1 and no_O2<2: if len(atom.bonded_atoms[i].bonded_atoms) == 1 and no_o2 < 2:
set_type(atom.bonded_atoms[i], 'O.2') set_type(atom.bonded_atoms[i], 'O.2')
no_O2+=1 no_o2 += 1
else: else:
set_type(atom.bonded_atoms[i], 'O.3') set_type(atom.bonded_atoms[i], 'O.3')
set_type(atom, 'S.3') set_type(atom, 'S.3')
return return
# Phosphorus # Phosphorus
if atom.element == 'P': if atom.element == 'P':
set_type(atom, 'P.3') set_type(atom, 'P.3')
# check for phosphate group # check for phosphate group
bonded_oxygens = atom.get_bonded_elements('O') bonded_oxygens = atom.get_bonded_elements('O')
for o in bonded_oxygens: set_type(o,'O.3') for o_atom in bonded_oxygens:
# if len(bonded_oxygens)>=3: set_type(o_atom, 'O.3')
# # find oxygens only bonded to current phosphorus
# bonded_oxygens_1 = [o for o in bonded_oxygens if len(o.get_bonded_heavy_atoms())==1]
# # find the closest oxygen ...
# closest_oxygen = min(bonded_oxygens_1,
# key= lambda o:propka.calculations.squared_distance(atom,o))
# # ... and set it to O.2
# set_type(closest_oxygen,'O.2')
return return
element = atom.element.capitalize() element = atom.element.capitalize()
set_type(atom, element) set_type(atom, element)
# info('Using element as type for %s'%atom.element)
return
def is_ring_member(atom): def is_ring_member(atom):
"""Determine if atom is a member of a ring.
Args:
atom: atom to test
Returns:
list of atoms
"""
return identify_ring(atom, atom, 0, []) return identify_ring(atom, atom, 0, [])
def identify_ring(this_atom, original_atom, number, past_atoms): def identify_ring(this_atom, original_atom, number, past_atoms):
"""Identify the atoms in a ring
Args:
this_atom: atom to test
original_atom: some other atom
number: number of atoms
past_atoms: atoms that have already been found
Returns:
list of atoms
"""
number += 1 number += 1
past_atoms = past_atoms + [this_atom] past_atoms = past_atoms + [this_atom]
return_atoms = [] return_atoms = []
if number > 10: if number > 10:
return return_atoms return return_atoms
for atom in this_atom.get_bonded_heavy_atoms(): for atom in this_atom.get_bonded_heavy_atoms():
if atom == original_atom and number > 2: if atom == original_atom and number > 2:
return past_atoms return past_atoms
if atom not in past_atoms: if atom not in past_atoms:
these_return_atoms = identify_ring(atom, original_atom, number, past_atoms) these_return_atoms = identify_ring(atom, original_atom, number,
past_atoms)
if len(these_return_atoms) > 0: if len(these_return_atoms) > 0:
if len(return_atoms)>len(these_return_atoms) or len(return_atoms)==0: if len(return_atoms) > len(these_return_atoms) \
or len(return_atoms) == 0:
return_atoms = these_return_atoms return_atoms = these_return_atoms
return return_atoms return return_atoms
def set_type(atom, type_):
"""Set atom type..
Args:
def set_type(atom,type): atom: atom to set
#info(atom, '->',type) type_: type value to set
atom.sybyl_type = type """
atom.sybyl_type = type_
atom.sybyl_assigned = True atom.sybyl_assigned = True
return
def is_planar(atom): def is_planar(atom):
""" Finds out if atom forms a plane together with its bonded atoms""" """Finds out if atom forms a plane together with its bonded atoms.
Args:
atom: atom to test
Returns:
Boolean
"""
atoms = [atom] + atom.bonded_atoms atoms = [atom] + atom.bonded_atoms
return are_atoms_planar(atoms) return are_atoms_planar(atoms)
def are_atoms_planar(atoms): def are_atoms_planar(atoms):
"""Test whether a group of atoms are planar.
Args:
atoms: list of atoms
Returns:
Boolean
"""
if len(atoms) == 0: if len(atoms) == 0:
return False return False
if len(atoms) < 4: if len(atoms) < 4:
return False return False
v1 = vector(atom1=atoms[0], atom2=atoms[1]) vec1 = vector(atom1=atoms[0], atom2=atoms[1])
v2 = vector(atom1=atoms[0], atom2=atoms[2]) vec2 = vector(atom1=atoms[0], atom2=atoms[2])
n = (v1**v2).rescale(1.0) norm = (vec1**vec2).rescale(1.0)
margin = PLANARITY_MARGIN
margin = 0.20 for atom in atoms[3:]:
for b in atoms[3:]: vec = vector(atom1=atoms[0], atom2=atom).rescale(1.0)
v = vector(atom1=atoms[0], atom2=b).rescale(1.0) if abs(vec*norm) > margin:
#info(atoms[0],abs(v*n) )
if abs(v*n)>margin:
return False return False
return True return True
def is_aromatic_ring(atoms): def is_aromatic_ring(atoms):
"""Determine whether group of atoms form aromatic ring.
Args:
atoms: list of atoms to test
Returns:
Boolean
"""
if len(atoms) < 5: if len(atoms) < 5:
return False return False
for i in range(len(atoms)): for i in range(len(atoms)):
if not are_atoms_planar(atoms[i:]+atoms[:i]): if not are_atoms_planar(atoms[i:]+atoms[:i]):
return False return False
return True return True