De-lint bonds.py

This commit is contained in:
Nathan Baker
2020-05-21 19:29:54 -07:00
parent 51a59f9979
commit 013167026f
2 changed files with 185 additions and 233 deletions

View File

@@ -19,9 +19,11 @@ DISULFIDE_DISTANCE = 2.5
FLUORIDE_DISTANCE = 1.7
HYDROGEN_DISTANCE = 1.5
DEFAULT_DISTANCE = 2.0
BOX_SIZE = 2.5
POS_MAX = 1e6
class bondmaker:
class BondMaker:
"""Makes bonds?
TODO - the documentation for this class needs to be improved.
@@ -44,54 +46,32 @@ class bondmaker:
self.protein_bonds = json.load(json_file)
self.intra_residue_backbone_bonds = {'N': ['CA'], 'CA': ['N', 'C'],
'C': ['CA', 'O'], 'O': ['C']}
self.number_of_pi_electrons_in_bonds_in_backbone = {'C': 1, 'O': 1}
self.number_of_pi_electrons_in_conjugate_bonds_in_backbone = {'N': 1}
self.number_of_pi_electrons_in_bonds_in_sidechains = {'ARG-CZ' : 1,
'ARG-NH1': 1,
'ASN-OD1': 1,
'ASN-CG' : 1,
'ASP-OD1': 1,
'ASP-CG' : 1,
'GLU-OE1': 1,
'GLU-CD' : 1,
'GLN-OE1': 1,
'GLN-CD' : 1,
'HIS-CG' : 1,
'HIS-CD2': 1,
'HIS-ND1': 1,
'HIS-CE1': 1,
'PHE-CG' : 1,
'PHE-CD1': 1,
'PHE-CE1': 1,
'PHE-CZ' : 1,
'PHE-CE2': 1,
'PHE-CD2': 1,
'TRP-CG' : 1,
'TRP-CD1': 1,
'TRP-CE2': 1,
'TRP-CD2': 1,
'TRP-CE3': 1,
'TRP-CZ3': 1,
'TRP-CH2': 1,
'TRP-CZ2': 1,
'TYR-CG' : 1,
'TYR-CD1': 1,
'TYR-CE1': 1,
'TYR-CZ' : 1,
'TYR-CE2': 1,
'TYR-CD2': 1}
self.number_of_pi_electrons_in_conjugate_bonds_in_sidechains = {'ARG-NE': 1,
'ARG-NH2': 1,
'ASN-ND2': 1,
'GLN-NE2': 1,
'HIS-NE2': 1,
'TRP-NE1': 1}
self.number_of_pi_electrons_in_bonds_ligands = {'C.ar': 1, 'N.pl3': 0,
'C.2': 1, 'O.2': 1,
'O.co2': 1, 'N.ar': 1,
self.num_pi_elec_bonds_backbone = {'C': 1, 'O': 1}
self.num_pi_elec_conj_bonds_backbone = {'N': 1}
self.num_pi_elec_bonds_sidechains = {'ARG-CZ' : 1, 'ARG-NH1': 1,
'ASN-OD1': 1, 'ASN-CG' : 1,
'ASP-OD1': 1, 'ASP-CG' : 1,
'GLU-OE1': 1, 'GLU-CD' : 1,
'GLN-OE1': 1, 'GLN-CD' : 1,
'HIS-CG' : 1, 'HIS-CD2': 1,
'HIS-ND1': 1, 'HIS-CE1': 1,
'PHE-CG' : 1, 'PHE-CD1': 1,
'PHE-CE1': 1, 'PHE-CZ' : 1,
'PHE-CE2': 1, 'PHE-CD2': 1,
'TRP-CG' : 1, 'TRP-CD1': 1,
'TRP-CE2': 1, 'TRP-CD2': 1,
'TRP-CE3': 1, 'TRP-CZ3': 1,
'TRP-CH2': 1, 'TRP-CZ2': 1,
'TYR-CG' : 1, 'TYR-CD1': 1,
'TYR-CE1': 1, 'TYR-CZ' : 1,
'TYR-CE2': 1, 'TYR-CD2': 1}
self.num_pi_elec_conj_bonds_sidechains = {'ARG-NE': 1, 'ARG-NH2': 1,
'ASN-ND2': 1, 'GLN-NE2': 1,
'HIS-NE2': 1, 'TRP-NE1': 1}
self.num_pi_elec_bonds_ligands = {'C.ar': 1, 'N.pl3': 0, 'C.2': 1,
'O.2': 1, 'O.co2': 1, 'N.ar': 1,
'C.1': 2, 'N.1': 2}
self.number_of_pi_electrons_in_conjugate_bonds_in_ligands = {'N.am': 1,
'N.pl3': 1}
self.num_pi_elec_conj_bonds_ligands = {'N.am': 1, 'N.pl3': 1}
self.backbone_atoms = list(self.intra_residue_backbone_bonds.keys())
self.terminal_oxygen_names = ['OXT', 'O\'\'']
@@ -160,47 +140,57 @@ class bondmaker:
# TODO - stopped here.
def connect_backbone(self, residue1, residue2):
""" Sets up bonds in the backbone """
# residue 1
"""Sets up bonds in the backbone
Args:
residue1: first residue to connect
residue2: second residue to connect
"""
self.find_bonds_for_residue_backbone(residue1)
# residue 2
self.find_bonds_for_residue_backbone(residue2)
# inter-residue bond
for atom1 in residue1.atoms:
if atom1.name == 'C':
for atom2 in residue2.atoms:
if atom2.name == 'N':
if propka.calculations.squared_distance(atom1,atom2) < self.default_dist_squared:
if propka.calculations.squared_distance(atom1, atom2) \
< self.default_dist_squared:
self.make_bond(atom1, atom2)
return
def find_bonds_for_residue_backbone(self, residue):
"""Find bonds for this residue's backbone.
Args:
residue: reside to search for backbone bonds.
"""
for atom1 in residue.atoms:
if atom1.name in list(self.number_of_pi_electrons_in_bonds_in_backbone.keys()):
atom1.num_pi_elec_2_3_bonds = self.number_of_pi_electrons_in_bonds_in_backbone[atom1.name]
if atom1.name in list(self.number_of_pi_electrons_in_conjugate_bonds_in_backbone.keys()) and len(atom1.bonded_atoms)>1: # last part to avoid including N-term
atom1.num_pi_elec_conj_2_3_bonds = self.number_of_pi_electrons_in_conjugate_bonds_in_backbone[atom1.name]
if atom1.name in list(self.num_pi_elec_bonds_backbone.keys()):
atom1.num_pi_elec_2_3_bonds \
= self.num_pi_elec_bonds_backbone[atom1.name]
if atom1.name in \
list(self.num_pi_elec_conj_bonds_backbone.keys()) \
and len(atom1.bonded_atoms) > 1: # last part to avoid including N-term
atom1.num_pi_elec_conj_2_3_bonds \
= self.num_pi_elec_conj_bonds_backbone[atom1.name]
if atom1.name in self.backbone_atoms:
for atom2 in residue.atoms:
if atom2.name in self.intra_residue_backbone_bonds[atom1.name]:
self.make_bond(atom1, atom2)
return
def find_bonds_for_side_chain(self, atoms):
""" Finds bonds for a side chain """
for atom1 in atoms:
"""Finds bonds for a side chain.
Args:
atoms: list of atoms to check for bonds
"""
for atom1 in atoms:
key = '%s-%s' % (atom1.res_name, atom1.name)
if key in list(self.number_of_pi_electrons_in_bonds_in_sidechains.keys()):
atom1.num_pi_elec_2_3_bonds = self.number_of_pi_electrons_in_bonds_in_sidechains[key]
if key in list(self.number_of_pi_electrons_in_conjugate_bonds_in_sidechains.keys()):
atom1.num_pi_elec_conj_2_3_bonds = self.number_of_pi_electrons_in_conjugate_bonds_in_sidechains[key]
if key in list(self.num_pi_elec_bonds_sidechains.keys()):
atom1.num_pi_elec_2_3_bonds \
= self.num_pi_elec_bonds_sidechains[key]
if key in list(self.num_pi_elec_conj_bonds_sidechains.keys()):
atom1.num_pi_elec_conj_2_3_bonds \
= self.num_pi_elec_conj_bonds_sidechains[key]
if not atom1.name in self.backbone_atoms:
if not atom1.name in self.terminal_oxygen_names:
@@ -208,45 +198,55 @@ class bondmaker:
if atom2.name in self.protein_bonds[atom1.res_name][atom1.name]:
self.make_bond(atom1, atom2)
return
def find_bonds_for_ligand(self, ligand):
""" Finds bonds for all atoms in the molecule """
"""Finds bonds for all atoms in the ligand molecule
Args:
ligand: ligand molecule to search for bonds
"""
# identify bonding atoms
self.find_bonds_for_atoms(ligand.atoms)
self.add_pi_electron_table_info(ligand.atoms)
return
def add_pi_electron_table_info(self, atoms):
"""Add table information on pi electrons
Args:
atoms: list of atoms for pi electron table information checking
"""
# apply table information on pi-electrons
for atom in atoms:
# for ligands
if atom.type == 'hetatm':
if atom.sybyl_type in self.number_of_pi_electrons_in_bonds_ligands.keys():
atom.num_pi_elec_2_3_bonds = self.number_of_pi_electrons_in_bonds_ligands[atom.sybyl_type]
if atom.sybyl_type in self.number_of_pi_electrons_in_conjugate_bonds_in_ligands.keys():
atom.num_pi_elec_conj_2_3_bonds = self.number_of_pi_electrons_in_conjugate_bonds_in_ligands[atom.sybyl_type]
if atom.sybyl_type in self.num_pi_elec_bonds_ligands.keys():
atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_ligands[atom.sybyl_type]
if atom.sybyl_type in self.num_pi_elec_conj_bonds_ligands.keys():
atom.num_pi_elec_conj_2_3_bonds \
= self.num_pi_elec_conj_bonds_ligands[atom.sybyl_type]
# for protein
if atom.type == 'atom':
key = '%s-%s' % (atom.res_name, atom.name)
if key in list(self.number_of_pi_electrons_in_bonds_in_sidechains.keys()):
atom.num_pi_elec_2_3_bonds = self.number_of_pi_electrons_in_bonds_in_sidechains[key]
if key in list(self.number_of_pi_electrons_in_conjugate_bonds_in_sidechains.keys()):
atom.num_pi_elec_conj_2_3_bonds = self.number_of_pi_electrons_in_conjugate_bonds_in_sidechains[key]
if atom.name in list(self.number_of_pi_electrons_in_bonds_in_backbone.keys()):
atom.num_pi_elec_2_3_bonds = self.number_of_pi_electrons_in_bonds_in_backbone[atom.name]
if atom.name in list(self.number_of_pi_electrons_in_conjugate_bonds_in_backbone.keys()) and len(atom.bonded_atoms)>1: # last part to avoid including N-term
atom.num_pi_elec_conj_2_3_bonds = self.number_of_pi_electrons_in_conjugate_bonds_in_backbone[atom.name]
return
if key in list(self.num_pi_elec_bonds_sidechains.keys()):
atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_sidechains[key]
if key in list(self.num_pi_elec_conj_bonds_sidechains.keys()):
atom.num_pi_elec_conj_2_3_bonds = self.num_pi_elec_conj_bonds_sidechains[key]
if atom.name in list(self.num_pi_elec_bonds_backbone.keys()):
atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_backbone[atom.name]
if atom.name in list(self.num_pi_elec_conj_bonds_backbone.keys()) \
and len(atom.bonded_atoms) > 1:
# last part to avoid including N-term
atom.num_pi_elec_conj_2_3_bonds \
= self.num_pi_elec_conj_bonds_backbone[atom.name]
def find_bonds_for_protein_by_distance(self, molecule):
""" Finds bonds for all atoms in the molecule """
"""Finds bonds for all atoms in the molecule.
Args:
molecule: molecule in which to find bonds.
Returns:
list of atoms
"""
#self.find_bonds_for_protein(molecule)
atoms = []
for chain in molecule.chains:
@@ -254,21 +254,20 @@ class bondmaker:
if residue.res_name.replace(' ', '') not in ['N+', 'C-']:
for atom in residue.atoms:
atoms.append(atom)
self.find_bonds_for_atoms_using_boxes(atoms) #####
#self.find_bonds_for_atoms(atoms) #####
self.find_bonds_for_atoms_using_boxes(atoms)
return atoms
def find_bonds_for_atoms(self, atoms):
""" Finds all bonds for a list of atoms"""
no_atoms = len(atoms)
"""Finds all bonds for a list of atoms
Args:
atoms: list of atoms in which to find bonds.
"""
no_atoms = len(atoms)
for i in range(no_atoms):
for j in range(i+1, no_atoms):
if atoms[i] in atoms[j].bonded_atoms:
continue
if self.check_distance(atoms[i], atoms[j]):
self.make_bond(atoms[i], atoms[j])
# di-sulphide bonds
@@ -276,19 +275,20 @@ class bondmaker:
atoms[i].cysteine_bridge = True
atoms[j].cysteine_bridge = True
return
def check_distance(self, atom1, atom2):
sq_dist = propka.calculations.squared_distance(atom1, atom2)
"""Check distance between two atoms
Args:
atom1: first atom for distance check
atom2: second atom for distance check
Returns:
True if within distance, False otherwise
"""
sq_dist = propka.calculations.squared_distance(atom1, atom2)
if sq_dist > self.max_sq_distance:
return False
key = '%s-%s' % (atom1.element, atom2.element)
h_count = key.count('H')
if sq_dist < self.H_dist_squared and h_count == 1:
return True
if sq_dist < self.default_dist_squared and h_count == 0:
@@ -296,35 +296,39 @@ class bondmaker:
if key in self.distances_squared.keys():
if sq_dist < self.distances_squared[key]:
return True
return False
def find_bonds_for_molecules_using_boxes(self, molecules):
""" Finds all bonds for a molecular container"""
""" Finds all bonds for a molecular container.
Args:
molecules: list of molecules for finding bonds.
"""
for name in molecules.conformation_names:
self.find_bonds_for_atoms_using_boxes(molecules.conformations[name].atoms)
#for chain in molecules.conformations[name].chains:
# self.find_bonds_for_atoms_using_boxes(molecules.conformations[name].get_chain(chain))
return
def add_pi_electron_information(self, molecules):
"""Add pi electron information to a molecule.
Args:
molecules: list of molecules for adding pi electron information.
"""
for name in molecules.conformation_names:
self.add_pi_electron_table_info(molecules.conformations[name].atoms)
return
def find_bonds_for_atoms_using_boxes(self, atoms):
""" Finds all bonds for a list of atoms"""
"""Finds all bonds for a list of atoms.
box_size = 2.5
# find min and max coordinates
xmin = 1e6; xmax = -1e6
ymin = 1e6; ymax = -1e6
zmin = 1e6; zmax = -1e6
Args:
atoms: list of atoms for finding bonds
"""
box_size = BOX_SIZE
xmin = POS_MAX
xmax = -1.0 * POS_MAX
ymin = POS_MAX
ymax = -1.0 * POS_MAX
zmin = POS_MAX
zmax = -1.0 * POS_MAX
for atom in atoms:
if atom.x > xmax:
xmax = atom.x
@@ -338,52 +342,34 @@ class bondmaker:
ymin = atom.y
if atom.z < zmin:
zmin = atom.z
xlen = xmax - xmin
ylen = ymax - ymin
zlen = zmax - zmin
#info('x range: [%6.2f;%6.2f] %6.2f'%(xmin,xmax,xlen))
#info('y range: [%6.2f;%6.2f] %6.2f'%(ymin,ymax,ylen))
#info('z range: [%6.2f;%6.2f] %6.2f'%(zmin,zmax,zlen))
# how many boxes do we need in each dimension?
# NOTE: math.ceil() returns an int in python3 and a float in
# python2, so we need to convert it to an int for range() to work in
# both versions. See PEP 3141.
self.no_box_x = max(1, int(math.ceil(xlen/box_size)))
self.no_box_y = max(1, int(math.ceil(ylen/box_size)))
self.no_box_z = max(1, int(math.ceil(zlen/box_size)))
#info('No. box x: %6.2f'%self.no_box_x)
#info('No. box y: %6.2f'%self.no_box_y)
#info('No. box z: %6.2f'%self.no_box_z)
# initialize boxes
self.num_box_x = max(1, int(math.ceil(xlen/box_size)))
self.num_box_y = max(1, int(math.ceil(ylen/box_size)))
self.num_box_z = max(1, int(math.ceil(zlen/box_size)))
self.boxes = {}
for x in range(self.no_box_x):
for y in range(self.no_box_y):
for z in range(self.no_box_z):
for x in range(self.num_box_x):
for y in range(self.num_box_y):
for z in range(self.num_box_z):
self.boxes[(x, y, z)] = []
# put atoms into boxes
for atom in atoms:
x = math.floor((atom.x - xmin)/box_size)
y = math.floor((atom.y - ymin)/box_size)
z = math.floor((atom.z - zmin)/box_size)
self.put_atom_in_box(x, y, z, atom)
# assign bonds
for key, value in self.boxes.items():
for value in self.boxes.values():
self.find_bonds_for_atoms(value)
return
def put_atom_in_box(self, x, y, z, atom):
# atom in the x,y,z box and the up to 7 neighboring boxes on
# one side of the x,y,z box in each dimension
"""Put an atom in a box.
Args:
x: box x-coordinates
y: box y-coordinates
z: box z-coordinates
atom: the atom to place in a box
"""
for bx in [x, x+1]:
for by in [y, y+1]:
for bz in [z, z+1]:
@@ -391,94 +377,60 @@ class bondmaker:
try:
self.boxes[key].append(atom)
except KeyError:
# No box exists for this coordinate
pass
#info(atom,'->',key,':',len(self.boxes[key]))
return
def has_bond(self, atom1, atom2):
"""Look for bond between two atoms.
Args:
atom1: first atom to check
atom2: second atom to check
Returns:
True if there is a bond between atoms
"""
if atom1 in atom2.bonded_atoms or atom2 in atom1.bonded_atoms:
return True
return False
def make_bond(self, atom1, atom2):
""" Makes a bond between atom1 and atom2 """
"""Makes a bond between atom1 and atom2
Args:
atom1: first atom to bond
atom2: second atom to bond
"""
if atom1 == atom2:
return
#info('making bond for',atom1,atom2)
if not atom1 in atom2.bonded_atoms:
atom2.bonded_atoms.append(atom1)
if not atom2 in atom1.bonded_atoms:
atom1.bonded_atoms.append(atom2)
return
def generate_protein_bond_dictionary(self, atoms):
"""Generate dictionary of protein bonds.
Args:
atoms: list of atoms for bonding
"""
for atom in atoms:
for bonded_atom in atom.bonded_atoms:
resi_i = atom.res_name
name_i = atom.name
resi_j = bonded_atom.res_name
name_j = bonded_atom.name
if not name_i in self.backbone_atoms or\
not name_j in self.backbone_atoms:
if not name_i in self.terminal_oxygen_names and\
not name_j in self.terminal_oxygen_names:
if not resi_i in list(self.protein_bonds.keys()):
self.protein_bonds[resi_i] = {}
if not name_i in self.protein_bonds[resi_i]:
self.protein_bonds[resi_i][name_i] = []
if not name_j in self.protein_bonds[resi_i][name_i]:
self.protein_bonds[resi_i][name_i].append(name_j)
if not resi_j in list(self.protein_bonds.keys()):
self.protein_bonds[resi_j] = {}
if not name_j in self.protein_bonds[resi_j]:
self.protein_bonds[resi_j][name_j] = []
if not name_i in self.protein_bonds[resi_j][name_j]:
self.protein_bonds[resi_j][name_j].append(name_i)
return
if __name__ == '__main__':
# If called directly, set up protein bond dictionary
import protein, pdb, sys,os
arguments = sys.argv
if len(arguments) != 2:
info('Usage: bonds.py <pdb_file>')
sys.exit(0)
filename = arguments[1]
if not os.path.isfile(filename):
info('Error: Could not find \"%s\"' % filename)
sys.exit(1)
pdblist = pdb.readPDB(filename)
my_protein = protein.Protein(pdblist,'test.pdb')
for chain in my_protein.chains:
for residue in chain.residues:
residue.atoms = [atom for atom in residue.atoms if atom.element != 'H']
b = bondmaker()
#b.protein_bonds = {}
atoms = b.find_bonds_for_protein_by_distance(my_protein)
# b.generate_protein_bond_dictionary(atoms)
#file = open(b.data_file_name,'wb')
#pickle.dump(b.protein_bonds, file)
#file.close()

View File

@@ -32,7 +32,7 @@ def setup_bonding_and_protonation(parameters, molecular_container):
def setup_bonding(parameters, molecular_container):
# make bonds
my_bond_maker = propka.bonds.bondmaker()
my_bond_maker = propka.bonds.BondMaker()
my_bond_maker.find_bonds_for_molecules_using_boxes(molecular_container)
return my_bond_maker
@@ -53,7 +53,7 @@ def setup_bonding_and_protonation_30_style(parameters, molecular_container):
protonate_30_style(molecular_container)
# make bonds
my_bond_maker = propka.bonds.bondmaker()
my_bond_maker = propka.bonds.BondMaker()
my_bond_maker.find_bonds_for_molecules_using_boxes(molecular_container)
return