Merge pull request #40 from Electrostatics/nathan/delint

De-lint PROPKA
This commit is contained in:
Nathan Baker
2020-05-30 08:14:45 -07:00
committed by GitHub
26 changed files with 5238 additions and 4327 deletions

2
.pylintrc Normal file
View File

@@ -0,0 +1,2 @@
[MESSAGES CONTROL]
disable = no-else-return

View File

@@ -5,15 +5,35 @@ import propka.group
from . import hybrid36
# Format strings that get used in multiple places (or are very complex)
PKA_FMT = "{:6.2f}"
INPUT_LINE_FMT = (
"{type:6s}{r.numb:>5d} {atom_label} {r.res_name}{r.chain_id:>2s}"
"{r.res_num:>4d}{r.x:>12.3f}{r.y:>8.3f}{r.z:>8.3f}{group:>6s}{pka:>6s} \n")
PDB_LINE_FMT1 = (
"{type:6s}{r.numb:>5d} {atom_label} {r.res_name}{r.chain_id:>2s}"
"{r.res_num:>4d}{r.x:>12.3f}{r.y:>8.3f}{r.z:>8.3f}{r.occ:>6s}"
"{r.beta:>6s}\n")
MOL2_LINE_FMT = (
"{id:<4d} {atom_label:4s} "
"{r.x:>10.4f} {r.y:>10.4f} {r.z:>10.4f} "
"{r.sybyl_type:>6s} {r.res_num:>6d} {r.res_name:>10s} 0.0000\n")
PDB_LINE_FMT2 = (
"ATOM {numb:>6d} {atom_label} {res_name}{chain_id:>2s}{res_num:>4d}"
"{x:>12.3f}{y:>8.3f}{z:>8.3f}{occ:>6.2f}{beta:>6.2f}\n")
STR_FMT = (
"{r.numb:>5d}-{r.name:>4s} {r.res_num:>5d}-{r.res_name:>3s} "
"({r.chain_id:1s}) [{r.x:>8.3f} {r.y:>8.3f} {r.z:>8.3f}] {r.element:s}")
class Atom(object):
"""Atom class - contains all atom information found in the PDB file"""
def __init__(self, line=None, verbose=False):
def __init__(self, line=None):
"""Initialize Atom object.
Args:
line: Line from a PDB file to set properties of atom.
verbose: TODO - this does not appear to be used. Can we remove it?
"""
self.occ = None
self.numb = None
@@ -48,7 +68,8 @@ class Atom(object):
self.num_pi_elec_conj_2_3_bonds = 0
self.groups_extracted = 0
self.set_properties(line)
self.residue_label = "%-3s%4d%2s" % (self.name, self.res_num, self.chain_id)
fmt = "{r.name:3s}{r.res_num:>4d}{r.chain_id:>2s}"
self.residue_label = fmt.format(r=self)
# ligand atom types
self.sybyl_type = ''
@@ -82,13 +103,14 @@ class Atom(object):
self.y = float(line[38:46].strip())
self.z = float(line[46:54].strip())
self.res_num = int(line[22:26].strip())
self.res_name = "%-3s" % (line[17:20].strip())
self.res_name = "{0:<3s}".format(line[17:20].strip())
self.chain_id = line[21]
# Set chain id to "_" if it is just white space.
if not self.chain_id.strip():
self.chain_id = '_'
self.type = line[:6].strip().lower()
# TODO - define nucleic acid residue names elsewhere
if self.res_name in ['DA ', 'DC ', 'DG ', 'DT ']:
self.type = 'hetatm'
@@ -101,7 +123,8 @@ class Atom(object):
if len(self.name) == 4:
self.element = self.element[0]
if len(self.element) == 2:
self.element = '%1s%1s' % (self.element[0], self.element[1].lower())
self.element = '{0:1s}{1:1s}'.format(
self.element[0], self.element[1].lower())
def set_group_type(self, type_):
"""Set group type of atom.
@@ -130,9 +153,9 @@ class Atom(object):
array of bonded atoms.
"""
res = []
for ba in self.bonded_atoms:
if ba.element == element:
res.append(ba)
for bond_atom in self.bonded_atoms:
if bond_atom.element == element:
res.append(bond_atom)
return res
def get_bonded_heavy_atoms(self):
@@ -156,12 +179,14 @@ class Atom(object):
if ba == other_atom:
return True
if max_bonds > cur_bond:
if ba.is_atom_within_bond_distance(other_atom, max_bonds, cur_bond+1):
if ba.is_atom_within_bond_distance(other_atom, max_bonds,
cur_bond+1):
return True
return False
def set_property(self, numb=None, name=None, res_name=None, chain_id=None,
res_num=None, x=None, y=None, z=None, occ=None, beta=None):
res_num=None, x=None, y=None, z=None, occ=None,
beta=None):
"""Set properties of the atom object.
Args:
@@ -225,7 +250,8 @@ class Atom(object):
"""PDB line for this atom.
TODO - Could be @property method/attribute
TODO - figure out difference between make_pdb_line, make_input_line, and make_pdb_line2
TODO - figure out difference between make_pdb_line, make_input_line,
and make_pdb_line2
Returns:
String with PDB-format line.
@@ -238,12 +264,11 @@ class Atom(object):
group = 'C-' ## circumventing C-/COO parameter unification
if self.group.titratable:
model_pka = '%6.2f'%self.group.model_pka
str_ = "%-6s%5d %s " % (self.type.upper(), self.numb,
propka.lib.makeTidyAtomLabel(self.name, self.element))
str_ += "%s%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s \n" % (self.res_name, self.chain_id,
self.res_num, self.x, self.y,
self.z, group, model_pka)
model_pka = PKA_FMT.format(self.group.model_pka)
str_ = INPUT_LINE_FMT.format(
type=self.type.upper(), r=self,
atom_label=propka.lib.make_tidy_atom_label(self.name, self.element),
group=group, pka=model_pka)
return str_
def make_conect_line(self):
@@ -252,15 +277,15 @@ class Atom(object):
Returns:
String with PDB line.
"""
res = 'CONECT%5d' % self.numb
res = 'CONECT{0:5d}'.format(self.numb)
bonded = []
for atom in self.bonded_atoms:
bonded.append(atom.numb)
bonded.sort()
for b in bonded:
res += '%5d'%b
for bond in bonded:
res += '{0:5d}'.format(bond)
res += '\n'
return res
@@ -290,10 +315,15 @@ class Atom(object):
self.occ = self.occ.replace('LG', 'non_titratable_ligand')
# try to initialise the group
try:
# TODO - get rid of this exec() statement for security reasons
exec('self.group = propka.group.%s_group(self)' % self.occ)
group_attr = "{0:s}_group".format(self.occ)
group_attr = getattr(propka.group, group_attr)
self.group = group_attr(self)
except:
raise Exception('%s in input_file is not recognized as a group' % self.occ)
# TODO - be more specific with expection handling here
str_ = (
'{0:s} in input_file is not recognized as a group'.format(
self.occ))
raise Exception(str_)
# set the model pKa value
if self.beta != '-':
self.group.model_pka = float(self.beta)
@@ -306,17 +336,15 @@ class Atom(object):
"""Create PDB line.
TODO - this could/should be a @property method/attribute
TODO - figure out difference between make_pdb_line, make_input_line, and make_pdb_line2
TODO - figure out difference between make_pdb_line, make_input_line,
and make_pdb_line2
Returns:
String with PDB line.
"""
str_ = "%-6s%5d " % (self.type.upper(), self.numb)
str_ += "%s %s" % (propka.lib.makeTidyAtomLabel(self.name, self.element),
self.res_name)
str_ += "%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s\n" % (self.chain_id, self.res_num,
self.x, self.y, self.z,
self.occ, self.beta)
str_ = PDB_LINE_FMT1.format(
type=self.type.upper(), r=self,
atom_label=propka.lib.make_tidy_atom_label(self.name, self.element))
return str_
def make_mol2_line(self, id_):
@@ -329,19 +357,19 @@ class Atom(object):
Returns:
String with MOL2 line.
"""
str_ = "%-4d %-4s " % (id_, propka.lib.makeTidyAtomLabel(self.name,
self.element))
str_ += "%10.4f %10.4f %10.4f " % (self.x, self.y, self.z)
str_ += "%6s %6d %10s %10.4f\n" % (self.sybyl_type.replace('-', ''),
self.res_num, self.res_name, 0.0)
str_ = MOL2_LINE_FMT.format(
id=id_, r=self,
atom_label=propka.lib.make_tidy_atom_label(self.name, self.element))
return str_
def make_pdb_line2(self, numb=None, name=None, res_name=None, chain_id=None,
res_num=None, x=None, y=None, z=None, occ=None, beta=None):
res_num=None, x=None, y=None, z=None, occ=None,
beta=None):
"""Create a PDB line.
TODO - this could/should be a @property method/attribute
TODO - figure out difference between make_pdb_line, make_input_line, and make_pdb_line2
TODO - figure out difference between make_pdb_line, make_input_line,
and make_pdb_line2
Returns:
String with PDB line.
@@ -366,18 +394,11 @@ class Atom(object):
occ = self.occ
if beta is None:
beta = self.beta
str_ = "ATOM "
str_ += "%6d" % (numb)
str_ += " %s" % (propka.lib.makeTidyAtomLabel(name, self.element))
str_ += " %s" % (res_name)
str_ += "%2s" % (chain_id)
str_ += "%4d" % (res_num)
str_ += "%12.3lf" % (x)
str_ += "%8.3lf" % (y)
str_ += "%8.3lf" % (z)
str_ += "%6.2lf" % (occ)
str_ += "%6.2lf" % (beta)
str_ += '\n'
str_ = PDB_LINE_FMT2.format(
numb=numb, res_name=res_name, chain_id=chain_id, res_num=res_num,
x=x, y=y, z=z, occ=occ, beta=beta,
atom_label=propka.lib.make_tidy_atom_label(name, self.element)
)
return str_
def get_tidy_label(self):
@@ -387,14 +408,11 @@ class Atom(object):
Returns:
String with label"""
return propka.lib.makeTidyAtomLabel(self.name, self.element)
return propka.lib.make_tidy_atom_label(self.name, self.element)
def __str__(self):
"""Return an undefined-format string version of this atom."""
return '%5d-%4s %5d-%3s (%1s) [%8.3f %8.3f %8.3f] %s' % (self.numb, self.name,
self.res_num, self.res_name,
self.chain_id, self.x, self.y,
self.z, self.element)
return STR_FMT.format(r=self)
def set_residue(self, residue):
""" Makes a reference to the parent residue

View File

@@ -1,16 +1,10 @@
"""PROPKA representation of bonds."""
# TODO - is pickle still used?
import pickle
# TODO - eliminate use of sys
import sys
# TODO - eliminate use of os
import os
import math
import json
import pkg_resources
import propka.calculations
# TODO - replace the info/warning imports with logging functionality
from propka.lib import info, warning
from propka.lib import info
# TODO - should these constants be defined higher up in the module?
@@ -30,18 +24,23 @@ class BondMaker:
"""
def __init__(self):
# predefined bonding distances
self.distances = {'S-S' : DISULFIDE_DISTANCE, 'F-F' : FLUORIDE_DISTANCE}
self.distances = {'S-S' : DISULFIDE_DISTANCE,
'F-F' : FLUORIDE_DISTANCE}
self.distances_squared = {}
for k in self.distances.keys():
self.distances_squared[k] = self.distances[k] * self.distances[k]
self.H_dist = HYDROGEN_DISTANCE
for key in self.distances:
self.distances_squared[key] = (
self.distances[key] * self.distances[key])
h_dist = HYDROGEN_DISTANCE
self.default_dist = DEFAULT_DISTANCE
self.H_dist_squared = self.H_dist * self.H_dist
self.h_dist_squared = h_dist * h_dist
self.default_dist_squared = self.default_dist * self.default_dist
distances = list(self.distances_squared.values()) + [self.default_dist_squared]
distances = (
list(self.distances_squared.values())
+ [self.default_dist_squared])
self.max_sq_distance = max(distances)
# protein bonding data
self.data_file_name = pkg_resources.resource_filename(__name__, 'protein_bonds.json')
self.data_file_name = (
pkg_resources.resource_filename(__name__, 'protein_bonds.json'))
with open(self.data_file_name, 'rt') as json_file:
self.protein_bonds = json.load(json_file)
self.intra_residue_backbone_bonds = {'N': ['CA'], 'CA': ['N', 'C'],
@@ -74,9 +73,13 @@ class BondMaker:
self.num_pi_elec_conj_bonds_ligands = {'N.am': 1, 'N.pl3': 1}
self.backbone_atoms = list(self.intra_residue_backbone_bonds.keys())
self.terminal_oxygen_names = ['OXT', 'O\'\'']
self.boxes = {}
self.num_box_x = None
self.num_box_y = None
self.num_box_z = None
def find_bonds_for_protein(self, protein):
"""Finds bonds proteins based on the way atoms normally bond in proteins.
"""Bonds proteins based on the way atoms normally bond.
Args:
protein: the protein to search for bonds
@@ -92,9 +95,12 @@ class BondMaker:
last_residues = []
for chain in protein.chains:
for i in range(1, len(chain.residues)):
if chain.residues[i-1].res_name.replace(' ', '') not in ['N+', 'C-']:
if chain.residues[i].res_name.replace(' ', '') not in ['N+', 'C-']:
self.connect_backbone(chain.residues[i-1], chain.residues[i])
if (chain.residues[i-1].res_name.replace(' ', '')
not in ['N+', 'C-']):
if (chain.residues[i].res_name.replace(' ', '')
not in ['N+', 'C-']):
self.connect_backbone(chain.residues[i-1],
chain.residues[i])
last_residues.append(chain.residues[i])
info('++++ terminal oxygen ++++')
# terminal OXT
@@ -121,7 +127,8 @@ class BondMaker:
if atom1.name == 'SG':
for atom2 in cys2.atoms:
if atom2.name == 'SG':
dist = propka.calculations.squared_distance(atom1, atom2)
dist = propka.calculations.squared_distance(atom1,
atom2)
# TODO - is SS_dist_squared an attribute of this class?
if dist < self.SS_dist_squared:
self.make_bond(atom1, atom2)
@@ -138,7 +145,6 @@ class BondMaker:
if atom2.name == 'C':
self.make_bond(atom1, atom2)
# TODO - stopped here.
def connect_backbone(self, residue1, residue2):
"""Sets up bonds in the backbone
@@ -152,8 +158,8 @@ class BondMaker:
if atom1.name == 'C':
for atom2 in residue2.atoms:
if atom2.name == 'N':
if propka.calculations.squared_distance(atom1, atom2) \
< self.default_dist_squared:
if (propka.calculations.squared_distance(atom1, atom2)
< self.default_dist_squared):
self.make_bond(atom1, atom2)
def find_bonds_for_residue_backbone(self, residue):
@@ -164,17 +170,18 @@ class BondMaker:
"""
for atom1 in residue.atoms:
if atom1.name in list(self.num_pi_elec_bonds_backbone.keys()):
atom1.num_pi_elec_2_3_bonds \
= self.num_pi_elec_bonds_backbone[atom1.name]
if atom1.name in \
list(self.num_pi_elec_conj_bonds_backbone.keys()) \
and len(atom1.bonded_atoms) > 1: # last part to avoid including N-term
atom1.num_pi_elec_conj_2_3_bonds \
= self.num_pi_elec_conj_bonds_backbone[atom1.name]
atom1.num_pi_elec_2_3_bonds = (
self.num_pi_elec_bonds_backbone[atom1.name])
if atom1.name in (
list(self.num_pi_elec_conj_bonds_backbone.keys())
and len(atom1.bonded_atoms) > 1): # avoid N-term
atom1.num_pi_elec_conj_2_3_bonds = (
self.num_pi_elec_conj_bonds_backbone[atom1.name])
if atom1.name in self.backbone_atoms:
for atom2 in residue.atoms:
if atom2.name in self.intra_residue_backbone_bonds[atom1.name]:
if atom2.name in (
self.intra_residue_backbone_bonds[atom1.name]):
self.make_bond(atom1, atom2)
def find_bonds_for_side_chain(self, atoms):
@@ -184,18 +191,19 @@ class BondMaker:
atoms: list of atoms to check for bonds
"""
for atom1 in atoms:
key = '%s-%s' % (atom1.res_name, atom1.name)
key = '{0:s}-{1:s}'.format(atom1.res_name, atom1.name)
if key in list(self.num_pi_elec_bonds_sidechains.keys()):
atom1.num_pi_elec_2_3_bonds \
= self.num_pi_elec_bonds_sidechains[key]
atom1.num_pi_elec_2_3_bonds = (
self.num_pi_elec_bonds_sidechains[key])
if key in list(self.num_pi_elec_conj_bonds_sidechains.keys()):
atom1.num_pi_elec_conj_2_3_bonds \
= self.num_pi_elec_conj_bonds_sidechains[key]
atom1.num_pi_elec_conj_2_3_bonds = (
self.num_pi_elec_conj_bonds_sidechains[key])
if not atom1.name in self.backbone_atoms:
if not atom1.name in self.terminal_oxygen_names:
for atom2 in atoms:
if atom2.name in self.protein_bonds[atom1.res_name][atom1.name]:
if atom2.name in (
self
.protein_bonds[atom1.res_name][atom1.name]):
self.make_bond(atom1, atom2)
def find_bonds_for_ligand(self, ligand):
@@ -219,25 +227,30 @@ class BondMaker:
# for ligands
if atom.type == 'hetatm':
if atom.sybyl_type in self.num_pi_elec_bonds_ligands.keys():
atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_ligands[atom.sybyl_type]
if atom.sybyl_type in self.num_pi_elec_conj_bonds_ligands.keys():
atom.num_pi_elec_conj_2_3_bonds \
= self.num_pi_elec_conj_bonds_ligands[atom.sybyl_type]
atom.num_pi_elec_2_3_bonds = (
self.num_pi_elec_bonds_ligands[atom.sybyl_type])
if atom.sybyl_type in (
self.num_pi_elec_conj_bonds_ligands.keys()):
atom.num_pi_elec_conj_2_3_bonds = (
self.num_pi_elec_conj_bonds_ligands[atom.sybyl_type])
# for protein
if atom.type == 'atom':
key = '%s-%s' % (atom.res_name, atom.name)
key = '{0:s}-{1:s}'.format(atom.res_name, atom.name)
if key in list(self.num_pi_elec_bonds_sidechains.keys()):
atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_sidechains[key]
atom.num_pi_elec_2_3_bonds = (
self.num_pi_elec_bonds_sidechains[key])
if key in list(self.num_pi_elec_conj_bonds_sidechains.keys()):
atom.num_pi_elec_conj_2_3_bonds = self.num_pi_elec_conj_bonds_sidechains[key]
atom.num_pi_elec_conj_2_3_bonds = (
self.num_pi_elec_conj_bonds_sidechains[key])
if atom.name in list(self.num_pi_elec_bonds_backbone.keys()):
atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_backbone[atom.name]
if atom.name in list(self.num_pi_elec_conj_bonds_backbone.keys()) \
and len(atom.bonded_atoms) > 1:
atom.num_pi_elec_2_3_bonds = (
self.num_pi_elec_bonds_backbone[atom.name])
if atom.name in list(
self.num_pi_elec_conj_bonds_backbone.keys()) and (
len(atom.bonded_atoms) > 1):
# last part to avoid including N-term
atom.num_pi_elec_conj_2_3_bonds \
= self.num_pi_elec_conj_bonds_backbone[atom.name]
atom.num_pi_elec_conj_2_3_bonds = (
self.num_pi_elec_conj_bonds_backbone[atom.name])
def find_bonds_for_protein_by_distance(self, molecule):
"""Finds bonds for all atoms in the molecule.
@@ -287,9 +300,9 @@ class BondMaker:
sq_dist = propka.calculations.squared_distance(atom1, atom2)
if sq_dist > self.max_sq_distance:
return False
key = '%s-%s' % (atom1.element, atom2.element)
key = '{0:s}-{1:s}'.format(atom1.element, atom2.element)
h_count = key.count('H')
if sq_dist < self.H_dist_squared and h_count == 1:
if sq_dist < self.h_dist_squared and h_count == 1:
return True
if sq_dist < self.default_dist_squared and h_count == 0:
return True
@@ -305,7 +318,8 @@ class BondMaker:
molecules: list of molecules for finding bonds.
"""
for name in molecules.conformation_names:
self.find_bonds_for_atoms_using_boxes(molecules.conformations[name].atoms)
self.find_bonds_for_atoms_using_boxes(
molecules.conformations[name].atoms)
def add_pi_electron_information(self, molecules):
"""Add pi electron information to a molecule.
@@ -314,7 +328,8 @@ class BondMaker:
molecules: list of molecules for adding pi electron information.
"""
for name in molecules.conformation_names:
self.add_pi_electron_table_info(molecules.conformations[name].atoms)
self.add_pi_electron_table_info(
molecules.conformations[name].atoms)
def find_bonds_for_atoms_using_boxes(self, atoms):
"""Finds all bonds for a list of atoms.
@@ -370,16 +385,17 @@ class BondMaker:
z: box z-coordinates
atom: the atom to place in a box
"""
for bx in [x, x+1]:
for by in [y, y+1]:
for bz in [z, z+1]:
key = (bx, by, bz)
for box_x in [x, x+1]:
for box_y in [y, y+1]:
for box_z in [z, z+1]:
key = (box_x, box_y, box_z)
try:
self.boxes[key].append(atom)
except KeyError:
pass
def has_bond(self, atom1, atom2):
@staticmethod
def has_bond(atom1, atom2):
"""Look for bond between two atoms.
Args:
@@ -392,7 +408,8 @@ class BondMaker:
return True
return False
def make_bond(self, atom1, atom2):
@staticmethod
def make_bond(atom1, atom2):
"""Makes a bond between atom1 and atom2
Args:
@@ -418,10 +435,12 @@ class BondMaker:
name_i = atom.name
resi_j = bonded_atom.res_name
name_j = bonded_atom.name
if not name_i in self.backbone_atoms or\
not name_j in self.backbone_atoms:
if not name_i in self.terminal_oxygen_names and\
not name_j in self.terminal_oxygen_names:
if not name_i in (
self.backbone_atoms
or not name_j in self.backbone_atoms):
if not name_i in (
self.terminal_oxygen_names
and not name_j in self.terminal_oxygen_names):
if not resi_i in list(self.protein_bonds.keys()):
self.protein_bonds[resi_i] = {}
if not name_i in self.protein_bonds[resi_i]:

File diff suppressed because it is too large Load Diff

View File

@@ -1,451 +1,610 @@
#
# Container for molecular conformations
#
"""Container for molecular conformations"""
import functools
import propka.ligand
from propka.output import make_interaction_map
from propka.determinant import Determinant
from propka.coupled_groups import NCCG
from propka.determinants import set_backbone_determinants, set_ion_determinants
from propka.determinants import set_determinants
from propka.group import Group, is_group
from propka.lib import info
from __future__ import division
from __future__ import print_function
import propka.group, propka.determinants, propka.determinant, propka.ligand, propka.output, propka.coupled_groups, functools
from propka.lib import info, warning
# A large number that gets multipled with the integer obtained from applying
# ord() to the atom chain ID. Used in calculating atom keys for sorting.
UNICODE_MULTIPLIER = 1e7
# A number that gets mutiplied with an atom's residue number. Used in
# calculating keys for atom sorting.
RESIDUE_MULTIPLIER = 1000
class ConformationContainer:
"""Container for molecular conformations"""
class Conformation_container:
def __init__(self, name='', parameters=None, molecular_container=None):
"""Initialize conformation container.
Args:
name: name for conformation
parameters: parmameters for conformation
molecular_container: container for molecule
"""
self.molecular_container = molecular_container
self.name=name
self.parameters=parameters
self.name = name
self.parameters = parameters
self.atoms = []
self.groups = []
self.chains = []
self.current_iter_item = 0
# TODO - what is marvin_pkas_calculated?
self.marvin_pkas_calculated = False
self.non_covalently_coupled_groups = False
return
#
# Group related methods
#
def extract_groups(self):
""" Generates at list of molecular groups needed for calculating pKa values """
"""Generate molecular groups needed for calculating pKa values."""
for atom in self.get_non_hydrogen_atoms():
# has this atom been checked for groups?
if atom.groups_extracted == 0:
group = propka.group.is_group(self.parameters, atom)
group = is_group(self.parameters, atom)
else:
group = atom.group
# if the atom has been checked in a another conformation, check if it has a
# group that should be used in this conformation as well
# if the atom has been checked in a another conformation, check
# if it has a group that should be used in this conformation as well
if group:
self.setup_and_add_group(group)
return
def additional_setup_when_reading_input_file(self):
# if a group is coupled and we are reading a .propka_input file,
"""Generate interaction map and charge centers."""
# if a group is coupled and we are reading a .propka_input file, then
# some more configuration might be needed
# print coupling map
map = propka.output.make_interaction_map('Covalent coupling map for %s'%self,
self.get_covalently_coupled_groups(),
lambda g1,g2: g1 in g2.covalently_coupled_groups)
info(map)
map_ = make_interaction_map(
'Covalent coupling map for {0:s}'.format(str(self)),
self.get_covalently_coupled_groups(),
lambda g1, g2: g1 in g2.covalently_coupled_groups)
info(map_)
# check if we should set a common charge centre as well
if self.parameters.common_charge_centre:
self.set_common_charge_centres()
return
def set_common_charge_centres(self):
for system in self.get_coupled_systems(self.get_covalently_coupled_groups(), propka.group.Group.get_covalently_coupled_groups):
"""Assign charge centers to groups."""
for system in self.get_coupled_systems(
self.get_covalently_coupled_groups(),
Group.get_covalently_coupled_groups):
# make a list of the charge centre coordinates
all_coordinates = list(map(lambda g: [g.x, g.y, g.z], system))
# find the common charge center
ccc = functools.reduce(lambda g1,g2: [g1[0]+g2[0], g1[1]+g2[1], g1[2]+g2[2]], all_coordinates)
ccc = functools.reduce(
lambda g1, g2: [g1[0]+g2[0], g1[1]+g2[1], g1[2]+g2[2]],
all_coordinates)
ccc = list(map(lambda c: c/len(system), ccc))
# set the ccc for all coupled groups in this system
for g in system:
[g.x, g.y, g.z] = ccc
g.common_charge_centre = True
return
for group in system:
[group.x, group.y, group.z] = ccc
group.common_charge_centre = True
def find_covalently_coupled_groups(self):
""" Finds covalently coupled groups and sets common charge centres if needed """
"""Find covalently coupled groups and set common charge centres."""
for group in self.get_titratable_groups():
# Find covalently bonded groups
bonded_groups = self.find_bonded_titratable_groups(group.atom, 1, group.atom)
# couple groups
for cg in bonded_groups:
if cg in group.covalently_coupled_groups:
bonded_groups = self.find_bonded_titratable_groups(
group.atom, 1, group.atom)
# coupled groups
for bond_group in bonded_groups:
if bond_group in group.covalently_coupled_groups:
continue
if cg.atom.sybyl_type == group.atom.sybyl_type:
group.couple_covalently(cg)
if bond_group.atom.sybyl_type == group.atom.sybyl_type:
group.couple_covalently(bond_group)
# check if we should set a common charge centre as well
if self.parameters.common_charge_centre:
self.set_common_charge_centres()
# print coupling map
map = propka.output.make_interaction_map('Covalent coupling map for %s'%self,
#self.get_titratable_groups(),
self.get_covalently_coupled_groups(),
lambda g1,g2: g1 in g2.covalently_coupled_groups)
info(map)
return
map_ = make_interaction_map(
'Covalent coupling map for {0:s}'.format(str(self)),
self.get_covalently_coupled_groups(),
lambda g1, g2: g1 in g2.covalently_coupled_groups)
info(map_)
def find_non_covalently_coupled_groups(self, verbose=False):
"""Find non-covalently coupled groups and set common charge centres.
Args:
verbose: verbose output
"""
# check if non-covalent coupling has already been set up in an input file
if len(list(filter(lambda g: len(g.non_covalently_coupled_groups)>0, self.get_titratable_groups())))>0:
if len(list(filter(lambda g: len(g.non_covalently_coupled_groups) > 0,
self.get_titratable_groups()))) > 0:
self.non_covalently_coupled_groups = True
propka.coupled_groups.nccg.identify_non_covalently_coupled_groups(self,verbose=verbose)
NCCG.identify_non_covalently_coupled_groups(self, verbose=verbose)
# re-do the check
if len(list(filter(lambda g: len(g.non_covalently_coupled_groups)>0, self.get_titratable_groups())))>0:
if len(list(filter(lambda g: len(g.non_covalently_coupled_groups) > 0,
self.get_titratable_groups()))) > 0:
self.non_covalently_coupled_groups = True
return
def find_bonded_titratable_groups(self, atom, num_bonds, original_atom):
"""Find bonded titrable groups.
def find_bonded_titratable_groups(self, atom, no_bonds, original_atom):
Args:
atom: atom to check for bonds
num_bonds: number of bonds for coupling
original_atom: another atom to check for bonds
Returns:
a set of bonded atom groups
"""
res = set()
for ba in atom.bonded_atoms:
for bond_atom in atom.bonded_atoms:
# skip the original atom
if ba == original_atom:
if bond_atom == original_atom:
continue
# check if this atom has a titratable group
if ba.group and ba.group.titratable and no_bonds <= self.parameters.coupling_max_number_of_bonds:
res.add(ba.group)
if (bond_atom.group and bond_atom.group.titratable
and num_bonds
<= self.parameters.coupling_max_number_of_bonds):
res.add(bond_atom.group)
# check for titratable groups bonded to this atom
if no_bonds < self.parameters.coupling_max_number_of_bonds:
res |= self.find_bonded_titratable_groups(ba,no_bonds+1, original_atom)
if num_bonds < self.parameters.coupling_max_number_of_bonds:
res |= self.find_bonded_titratable_groups(
bond_atom, num_bonds+1, original_atom)
return res
def setup_and_add_group(self, group):
""" Checks if we want to include this group in the calculations """
"""Check if we want to include this group in the calculations.
Args:
group: group to check
"""
# Is it recognized as a group at all?
if not group:
return
# Other checks (include ligands, which chains etc.)
# if all ok, accept the group
self.init_group(group)
self.groups.append(group)
def init_group(self, group):
"""
Initialize the given Group object.
"""Initialize the given Group object.
Args:
group: group object to initialize
"""
# set up the group
group.parameters=self.parameters
group.parameters = self.parameters
group.setup()
# If --titrate_only option is set, make non-specified residues un-titratable:
# If --titrate_only option is set, make non-specified residues
# un-titratable:
titrate_only = self.molecular_container.options.titrate_only
if titrate_only is not None:
at = group.atom
if not (at.chain_id, at.res_num, at.icode) in titrate_only:
atom = group.atom
if not (atom.chain_id, atom.res_num, atom.icode) in titrate_only:
group.titratable = False
if group.residue_type == 'CYS':
group.exclude_cys_from_results = True
#
# pka calculation methods
#
def calculate_pka(self, version, options):
"""Calculate pKas for conformation container.
Args:
version: version object
options: option object
"""
info('\nCalculating pKas for', self)
# calculate desolvation
for group in self.get_titratable_groups()+self.get_ions():
for group in self.get_titratable_groups() + self.get_ions():
version.calculate_desolvation(group)
# calculate backbone interactions
propka.determinants.setBackBoneDeterminants(self.get_titratable_groups(), self.get_backbone_groups(), version)
set_backbone_determinants(
self.get_titratable_groups(), self.get_backbone_groups(), version)
# setting ion determinants
propka.determinants.setIonDeterminants(self, version)
set_ion_determinants(self, version)
# calculating the back-bone reorganization/desolvation term
version.calculateBackBoneReorganization(self)
# setting remaining non-iterative and iterative side-chain & Coulomb interaction determinants
propka.determinants.setDeterminants(self.get_sidechain_groups(), version=version, options=options)
version.calculate_backbone_reorganization(self)
# setting remaining non-iterative and iterative side-chain & Coulomb
# interaction determinants
set_determinants(
self.get_sidechain_groups(), version=version, options=options)
# calculating the total pKa values
for group in self.groups: group.calculate_total_pka()
for group in self.groups:
group.calculate_total_pka()
# take coupling effects into account
penalised_labels = self.coupling_effects()
if self.parameters.remove_penalised_group and len(penalised_labels)>0:
if (self.parameters.remove_penalised_group
and len(penalised_labels) > 0):
info('Removing penalised groups!!!')
for g in self.get_titratable_groups():
g.remove_determinants(penalised_labels)
for group in self.get_titratable_groups():
group.remove_determinants(penalised_labels)
# re-calculating the total pKa values
for group in self.groups: group.calculate_total_pka()
return
for group in self.groups:
group.calculate_total_pka()
def coupling_effects(self):
#
# Bases: The group with the highest pKa (the most stable one in the
# charged form) will be the first one to adopt a proton as pH
# is lowered and this group is allowed to titrate.
# The remaining groups are penalised
#
# Acids: The group with the highest pKa (the least stable one in the
# charged form) will be the last group to loose the proton as
# pH is raised and will be penalised.
# The remaining groups are allowed to titrate.
#
"""Penalize groups based on coupling effects.
Bases: The group with the highest pKa (the most stable one in the
charged form) will be the first one to adopt a proton as pH is lowered
and this group is allowed to titrate. The remaining groups are
penalised.
Acids: The group with the highest pKa (the least stable one in the
charged form) will be the last group to loose the proton as pH is
raised and will be penalised. The remaining groups are allowed to
titrate.
"""
penalised_labels = []
for all_groups in self.get_coupled_systems(self.get_covalently_coupled_groups(),
propka.group.Group.get_covalently_coupled_groups):
for all_groups in self.get_coupled_systems(
self.get_covalently_coupled_groups(),
Group.get_covalently_coupled_groups):
# check if we should share determinants
if self.parameters.shared_determinants:
self.share_determinants(all_groups)
# find the group that has the highest pKa value
first_group = max(all_groups, key=lambda g:g.pka_value)
first_group = max(all_groups, key=lambda g: g.pka_value)
# In case of acids
if first_group.charge < 0:
first_group.coupled_titrating_group = min(all_groups, key=lambda g:g.pka_value)
penalised_labels.append(first_group.label) # group with the highest pKa is penalised
first_group.coupled_titrating_group = min(
all_groups, key=lambda g: g.pka_value)
# group with the highest pKa is penalised
penalised_labels.append(first_group.label)
# In case of bases
else:
for a in all_groups:
if a == first_group:
continue # group with the highest pKa is allowed to titrate...
a.coupled_titrating_group = first_group
penalised_labels.append(a.label) #... and the rest is penalised
for group in all_groups:
if group == first_group:
# group with the highest pKa is allowed to titrate...
continue
group.coupled_titrating_group = first_group
#... and the rest are penalised
penalised_labels.append(group.label)
return penalised_labels
@staticmethod
def share_determinants(groups):
"""Share sidechain, backbone, and Coloumb determinants between groups.
def share_determinants(self, groups):
Args:
groups: groups to share between
"""
# make a list of the determinants to share
types = ['sidechain','backbone','coulomb']
for type in types:
types = ['sidechain', 'backbone', 'coulomb']
for type_ in types:
# find maximum value for each determinant
max_dets = {}
for g in groups:
for d in g.determinants[type]:
for group in groups:
for det in group.determinants[type_]:
# update max dets
if d.group not in max_dets.keys():
max_dets[d.group] = d.value
if det.group not in max_dets.keys():
max_dets[det.group] = det.value
else:
max_dets[d.group] = max(d.value, max_dets[d.group], key= lambda v: abs(v))
max_dets[det.group] = max(det.value,
max_dets[det.group],
key=lambda v: abs(v))
# overwrite/add maximum value for each determinant
for det_group in max_dets.keys():
new_determinant = propka.determinant.Determinant(det_group, max_dets[det_group])
for g in groups:
g.set_determinant(new_determinant,type)
return
for det_group in max_dets:
new_determinant = Determinant(det_group, max_dets[det_group])
for group in groups:
group.set_determinant(new_determinant, type_)
def get_coupled_systems(self, groups, get_coupled_groups):
""" This generator will yield one covalently coupled system at the time """
"""A generator that yields covalently coupled systems.
Args:
groups: groups for generating coupled systems
get_coupled_groups: TODO - I don't know what this is
Yields:
covalently coupled systems
"""
groups = set(groups)
while len(groups)>0:
while len(groups) > 0:
# extract a system of coupled groups ...
system = set()
self.get_a_coupled_system_of_groups(groups.pop(), system, get_coupled_groups)
self.get_a_coupled_system_of_groups(
groups.pop(), system, get_coupled_groups)
# ... and remove them from the list
groups -= system
yield system
return
def get_a_coupled_system_of_groups(self, new_group, coupled_groups,
get_coupled_groups):
"""Set up coupled systems of groups.
def get_a_coupled_system_of_groups(self, new_group, coupled_groups, get_coupled_groups):
Args:
new_group: added to coupled_groups
coupled_groups: existing coupled groups
get_coupled_groups: TODO - I don't know what this
"""
coupled_groups.add(new_group)
[self.get_a_coupled_system_of_groups(c, coupled_groups, get_coupled_groups) for c in get_coupled_groups(new_group) if c not in coupled_groups]
return
for coupled_group in get_coupled_groups(new_group):
if coupled_group not in coupled_groups:
self.get_a_coupled_system_of_groups(coupled_group,
coupled_groups,
get_coupled_groups)
def calculate_folding_energy(self, ph=None, reference=None):
"""Calculate folding energy over all groups in conformation container.
#
# Energy/summary-related methods
#
def calculate_folding_energy(self, pH=None, reference=None):
Args:
ph: pH for calculation
reference: reference state
Returns:
folding energy
TODO - need units
"""
ddg = 0.0
for group in self.groups:
#info('Folding energy for %s at pH %f: %f'%(group,pH,group.calculate_folding_energy(self.parameters, pH=pH, reference=reference)))
ddg += group.calculate_folding_energy(self.parameters, pH=pH, reference=reference)
ddg += group.calculate_folding_energy(self.parameters, ph=ph,
reference=reference)
return ddg
def calculate_charge(self, parmaeters, pH=None):
def calculate_charge(self, parameters, ph=None):
"""Calculate charge for folded and unfolded states.
Args:
parameters: parameters for calculation
ph: pH for calculation
Returns:
1. charge for unfolded state
2. charge for folded state
"""
unfolded = folded = 0.0
for group in self.get_titratable_groups():
unfolded += group.calculate_charge(parmaeters, pH=pH, state='unfolded')
folded += group.calculate_charge(parmaeters, pH=pH, state='folded')
return unfolded,folded
#
# conformation/bookkeeping/atom methods
#
unfolded += group.calculate_charge(parameters, ph=ph,
state='unfolded')
folded += group.calculate_charge(parameters, ph=ph,
state='folded')
return unfolded, folded
def get_backbone_groups(self):
""" returns all backbone groups needed for the pKa calculations """
"""Get backbone groups needed for the pKa calculations.
Returns:
list of groups
"""
return [group for group in self.groups if 'BB' in group.type]
def get_sidechain_groups(self):
""" returns all sidechain groups needed for the pKa calculations """
return [group for group in self.groups if ('BB' not in group.type\
and not group.atom.cysteine_bridge)]
"""Get sidechain groups needed for the pKa calculations.
Returns:
list of groups
"""
return [
group for group in self.groups
if ('BB' not in group.type and not group.atom.cysteine_bridge)]
def get_covalently_coupled_groups(self):
return [g for g in self.groups if len(g.covalently_coupled_groups)>0]
"""Get covalently coupled groups needed for pKa calculations.
Returns:
list of groups
"""
return [
g for g in self.groups
if len(g.covalently_coupled_groups) > 0]
def get_non_covalently_coupled_groups(self):
return [g for g in self.groups if len(g.non_covalently_coupled_groups)>0]
"""Get non-covalently coupled groups needed for pKa calculations.
def get_backbone_NH_groups(self):
""" returns all NH backbone groups needed for the pKa calculations """
Returns:
list of groups
"""
return [
g for g in self.groups
if len(g.non_covalently_coupled_groups) > 0]
def get_backbone_nh_groups(self):
"""Get NH backbone groups needed for pKa calculations.
Returns:
list of groups
"""
return [group for group in self.groups if group.type == 'BBN']
def get_backbone_CO_groups(self):
""" returns all CO backbone groups needed for the pKa calculations """
def get_backbone_co_groups(self):
"""Get CO backbone groups needed for pKa calculations.
Returns:
list of groups
"""
return [group for group in self.groups if group.type == 'BBC']
def get_groups_in_residue(self, residue):
return [group for group in self.groups if group.residue_type == residue]
"""Get residue groups needed for pKa calculations.
Args:
residue: specific residue with groups
Returns:
list of groups
"""
return [
group for group in self.groups if group.residue_type == residue]
def get_titratable_groups(self):
"""Get all titratable groups needed for pKa calculations.
Returns:
list of groups
"""
return [group for group in self.groups if group.titratable]
def get_groups_for_calculations(self):
"""
Returns a list of groups that should be included in results report.
"""Get a list of groups that should be included in results report.
If --titrate_only option is specified, only residues that are titratable
and are in that list are included; otherwise all titratable residues
and CYS residues are included.
Returns:
list of groups
"""
return [group for group in self.groups if group.use_in_calculations()]
def get_acids(self):
return [group for group in self.groups if (group.residue_type in self.parameters.acid_list
and not group.atom.cysteine_bridge)]
"""Get acid groups needed for pKa calculations.
Returns:
list of groups
"""
return [
group for group in self.groups
if (group.residue_type in self.parameters.acid_list
and not group.atom.cysteine_bridge)]
def get_backbone_reorganisation_groups(self):
return [group for group in self.groups if (group.residue_type in self.parameters.backbone_reorganisation_list
and not group.atom.cysteine_bridge)]
"""Get groups involved with backbone reorganization.
Returns:
list of groups
"""
return [
group for group in self.groups
if (group.residue_type
in self.parameters.backbone_reorganisation_list
and not group.atom.cysteine_bridge)]
def get_ions(self):
return [group for group in self.groups if group.residue_type in self.parameters.ions.keys()]
"""Get ion groups.
def get_group_names(self, list):
return [group for group in self.groups if group.type in list]
Returns:
list of groups
"""
return [
group for group in self.groups
if group.residue_type in self.parameters.ions.keys()]
def get_group_names(self, group_list):
"""Get names of groups in list.
Args:
group_list: list to check
Returns:
list of groups
"""
return [group for group in self.groups if group.type in group_list]
def get_ligand_atoms(self):
return [atom for atom in self.atoms if atom.type=='hetatm']
"""Get atoms associated with ligands.
Returns:
list of atoms
"""
return [atom for atom in self.atoms if atom.type == 'hetatm']
def get_heavy_ligand_atoms(self):
return [atom for atom in self.atoms if atom.type=='hetatm' and atom.element != 'H']
"""Get heavy atoms associated with ligands.
def get_chain(self,chain):
Returns:
list of atoms
"""
return [
atom for atom in self.atoms
if atom.type == 'hetatm' and atom.element != 'H']
def get_chain(self, chain):
"""Get atoms associated with a specific chain.
Args:
chain: chain to select
Returns:
list of atoms
"""
return [atom for atom in self.atoms if atom.chain_id != chain]
def add_atom(self, atom):
#info(self,'adding',atom)
"""Add atom to container.
Args:
atom: atom to add
"""
self.atoms.append(atom)
if not atom.conformation_container:
atom.conformation_container = self
if not atom.molecular_container:
atom.molecular_container = self.molecular_container
# store chain id for bookkeeping
if not atom.chain_id in self.chains:
self.chains.append(atom.chain_id)
return
def copy_atom(self, atom):
new_atom = atom.make_copy()
"""Add a copy of the atom to container.
Args:
atom: atom to copy and add
"""
new_atom = atom.make_copy()
self.atoms.append(new_atom)
new_atom.conformation_container = self
return
def get_non_hydrogen_atoms(self):
return [atom for atom in self.atoms if atom.element!='H']
"""Get atoms that are not hydrogens.
Returns:
list of atoms
"""
return [atom for atom in self.atoms if atom.element != 'H']
def top_up(self, other):
""" Tops up self with all atoms found in other but not in self """
my_residue_labels = { a.residue_label for a in self.atoms }
"""Adds any atoms found in `other` but not in this container.
Tops up self with all atoms found in other but not in self.
Args:
other: conformation container with atoms to add
"""
my_residue_labels = {a.residue_label for a in self.atoms}
for atom in other.atoms:
if not atom.residue_label in my_residue_labels:
self.copy_atom(atom)
return
def find_group(self, group):
for g in self.groups:
if g.atom.residue_label == group.atom.residue_label:
if g.type == group.type:
return g
"""Find a group in the container.
Args:
group: group to find
Returns:
False (if group not found) or group
"""
for group_ in self.groups:
if group_.atom.residue_label == group.atom.residue_label:
if group_.type == group.type:
return group_
return False
def set_ligand_atom_names(self):
"""Set names for atoms in ligands."""
for atom in self.get_ligand_atoms():
propka.ligand.assign_sybyl_type(atom)
return
def __str__(self):
return'Conformation container %s with %d atoms and %d groups'%(self.name,len(self),len(self.groups))
"""String that lists statistics of atoms and groups."""
fmt = (
"Conformation container {name} with {natoms:d} atoms and "
"{ngroups:d} groups")
str_ = fmt.format(
name=self.name, natoms=len(self), ngroups=len(self.groups))
return str_
def __len__(self):
"""Number of atoms in container."""
return len(self.atoms)
def sort_atoms(self):
"""Sort atoms by `self.sort_atoms_key()` and renumber."""
# sort the atoms ...
self.atoms.sort(key=self.sort_atoms_key)
# ... and re-number them
for i in range(len(self.atoms)):
self.atoms[i].numb = i+1
return
@staticmethod
def sort_atoms_key(atom):
"""Generate key for atom sorting.
def sort_atoms_key(self, atom):
key = ord(atom.chain_id)*1e7
key += atom.res_num*1000
Args:
atom: atom for key generation.
Returns:
key for atom
"""
key = ord(atom.chain_id) * UNICODE_MULTIPLIER
key += atom.res_num * RESIDUE_MULTIPLIER
if len(atom.name) > len(atom.element):
key += ord(atom.name[len(atom.element)])
#info(atom,ord(atom.name[len(atom.element)]), '|%s||%s|'%(atom.name,atom.element))
return key

View File

@@ -1,113 +1,118 @@
from __future__ import division
from __future__ import print_function
import math, propka.output, propka.group, propka.lib, itertools
from propka.lib import info, warning
"""Describe coupling between groups."""
import itertools
import propka.lib
from propka.group import Group
from propka.output import make_interaction_map
from propka.lib import info
class non_covalently_couple_groups:
class NonCovalentlyCoupledGroups:
"""Groups that are coupled without covalent bonding."""
def __init__(self):
self.parameters = None
# self.do_intrinsic = False
# self.do_pair_wise = False
self.do_prot_stat = True
return
#
# Methods for finding coupled groups
#
def is_coupled_protonation_state_probability(self, group1, group2, energy_method, return_on_fail=True):
def is_coupled_protonation_state_probability(self, group1, group2,
energy_method,
return_on_fail=True):
"""Check whether two groups are energetically coupled.
Args:
group1: first group for interaction
group2: second group for interaction
energy_method: function for calculating energy
return_on_fail: return if part of the calculation fails
Returns:
dictionary describing coupling
"""
# check if the interaction energy is high enough
interaction_energy = max(self.get_interaction(group1,group2), self.get_interaction(group2,group1))
if interaction_energy<=self.parameters.min_interaction_energy and return_on_fail:
return {'coupling_factor':-1.0}
interaction_energy = max(self.get_interaction(group1, group2),
self.get_interaction(group2, group1))
if (interaction_energy <= self.parameters.min_interaction_energy
and return_on_fail):
return {'coupling_factor': -1.0}
# calculate intrinsic pKa's, if not already done
for group in [group1, group2]:
if not hasattr(group, 'intrinsic_pKa'):
if group.intrinsic_pka is None:
group.calculate_intrinsic_pka()
use_pH = self.parameters.pH
use_ph = self.parameters.pH
if self.parameters.pH == 'variable':
use_pH = min(group1.pka_value, group2.pka_value)
default_energy = energy_method(pH=use_pH, reference=self.parameters.reference)
use_ph = min(group1.pka_value, group2.pka_value)
default_energy = energy_method(ph=use_ph,
reference=self.parameters.reference)
default_pka1 = group1.pka_value
default_pka2 = group2.pka_value
# check that pka values are within relevant limits
if max(default_pka1, default_pka2) < self.parameters.min_pka or \
min(default_pka1, default_pka2) > self.parameters.max_pka:
if (max(default_pka1, default_pka2) < self.parameters.min_pka
or min(default_pka1, default_pka2) > self.parameters.max_pka):
if return_on_fail:
return {'coupling_factor':-1.0}
return {'coupling_factor': -1.0}
# Swap interactions and re-calculate pKa values
self.swap_interactions([group1], [group2])
group1.calculate_total_pka()
group2.calculate_total_pka()
# store swapped energy and pka's
swapped_energy = energy_method(pH=use_pH, reference=self.parameters.reference)
swapped_energy = energy_method(
ph=use_ph, reference=self.parameters.reference)
swapped_pka1 = group1.pka_value
swapped_pka2 = group2.pka_value
pka_shift1 = swapped_pka1 - default_pka1
pka_shift2 = swapped_pka2 - default_pka2
# Swap back to original protonation state
self.swap_interactions([group1], [group2])
group1.calculate_total_pka()
group2.calculate_total_pka()
# check difference in free energy
if abs(default_energy - swapped_energy) > self.parameters.max_free_energy_diff and return_on_fail:
return {'coupling_factor':-1.0}
if (abs(default_energy - swapped_energy)
> self.parameters.max_free_energy_diff and return_on_fail):
return {'coupling_factor': -1.0}
# check pka shift
if max(abs(pka_shift1), abs(pka_shift2)) < self.parameters.min_swap_pka_shift and return_on_fail:
return {'coupling_factor':-1.0}
if (max(abs(pka_shift1), abs(pka_shift2))
< self.parameters.min_swap_pka_shift and return_on_fail):
return {'coupling_factor': -1.0}
# check intrinsic pka diff
if abs(group1.intrinsic_pKa - group2.intrinsic_pKa) > self.parameters.max_intrinsic_pKa_diff and return_on_fail:
return {'coupling_factor':-1.0}
if (abs(group1.intrinsic_pka - group2.intrinsic_pka)
> self.parameters.max_intrinsic_pka_diff and return_on_fail):
return {'coupling_factor': -1.0}
# if everything is OK, calculate the coupling factor and return all info
factor = self.get_free_energy_diff_factor(default_energy, swapped_energy)*\
self.get_pka_diff_factor(group1.intrinsic_pKa, group2.intrinsic_pKa)*\
self.get_interaction_factor(interaction_energy)
return {'coupling_factor':factor,
'default_energy':default_energy,
'swapped_energy':swapped_energy,
'interaction_energy':interaction_energy,
'swapped_pka1':swapped_pka1,
'swapped_pka2':swapped_pka2,
'pka_shift1':pka_shift1,
'pka_shift2':pka_shift2,
'pH':use_pH}
#
# Methods for calculating the coupling factor
#
factor = (
self.get_free_energy_diff_factor(default_energy, swapped_energy)
* self.get_pka_diff_factor(group1.intrinsic_pka,
group2.intrinsic_pka)
* self.get_interaction_factor(interaction_energy))
return {'coupling_factor': factor, 'default_energy': default_energy,
'swapped_energy': swapped_energy,
'interaction_energy': interaction_energy,
'swapped_pka1': swapped_pka1, 'swapped_pka2': swapped_pka2,
'pka_shift1': pka_shift1, 'pka_shift2': pka_shift2,
'pH': use_ph}
def get_pka_diff_factor(self, pka1, pka2):
"""Get scaling factor for difference between intrinsic pKa values.
Args:
pka1: first pKa to compare
pka2: second pKa to compare
Returns:
float value of scaling factor
"""
intrinsic_pka_diff = abs(pka1-pka2)
res = 0.0
if intrinsic_pka_diff <= self.parameters.max_intrinsic_pKa_diff:
res = 1-(intrinsic_pka_diff/self.parameters.max_intrinsic_pKa_diff)**2
if intrinsic_pka_diff <= self.parameters.max_intrinsic_pka_diff:
res = (
1-(intrinsic_pka_diff
/self.parameters.max_intrinsic_pka_diff)**2)
return res
def get_free_energy_diff_factor(self, energy1, energy2):
"""Get scaling factor for difference between free energies.
Args:
energy1: first energy to compare
energy2: second energy to compare
Returns:
float value of scaling factor
"""
free_energy_diff = abs(energy1-energy2)
res = 0.0
if free_energy_diff <= self.parameters.max_free_energy_diff:
@@ -115,208 +120,267 @@ class non_covalently_couple_groups:
return res
def get_interaction_factor(self, interaction_energy):
"""Get scaling factor related to interaction energy.
Args:
interaction_energy: interaction energy
Returns:
float value of scaling factor
"""
res = 0.0
interaction_energy = abs(interaction_energy)
if interaction_energy >= self.parameters.min_interaction_energy:
res = (interaction_energy-self.parameters.min_interaction_energy)/(1.0+interaction_energy-self.parameters.min_interaction_energy)
res = (
(interaction_energy-self.parameters.min_interaction_energy)
/ (1.0+interaction_energy
-self.parameters.min_interaction_energy))
return res
def identify_non_covalently_coupled_groups(self, conformation,
verbose=True):
"""Find coupled residues in protein.
def identify_non_covalently_coupled_groups(self, conformation, verbose=True):
""" Finds coupled residues in protein """
Args:
conformation: protein conformation to test
verbose: verbose output (boolean)
"""
self.parameters = conformation.parameters
if verbose:
info('')
info(' Warning: When using the -d option, pKa values based on \'swapped\' interactions')
info(' will be writting to the output .pka file')
info('')
info('-' * 103)
info(' Detecting non-covalently coupled residues')
info('-' * 103)
info(' Maximum pKa difference: %4.2f pKa units' % self.parameters.max_intrinsic_pKa_diff)
info(' Minimum interaction energy: %4.2f pKa units' % self.parameters.min_interaction_energy)
info(' Maximum free energy diff.: %4.2f pKa units' % self.parameters.max_free_energy_diff)
info(' Minimum swap pKa shift: %4.2f pKa units' % self.parameters.min_swap_pka_shift)
info(' pH: %6s ' % str(self.parameters.pH))
info(' Reference: %s' % self.parameters.reference)
info(' Min pKa: %4.2f' % self.parameters.min_pka)
info(' Max pKa: %4.2f' % self.parameters.max_pka)
info('')
info_fmt = (
'\n'
' Warning: When using the -d option, pKa values based on \n'
'\'swapped\' interactions\n'
' will be writting to the output .pka file\n'
'\n'
'{sep}\n'
'\n'
' Detecting non-covalently coupled residues\n'
'{sep}\n'
' Maximum pKa difference: {c.max_intrinsic_pka_diff:>4.2f} pKa units\n'
' Minimum interaction energy: {c.min_interaction_energy:>4.2f} pKa units\n'
' Maximum free energy diff.: {c.max_free_energy_diff:>4.2f} pKa units\n'
' Minimum swap pKa shift: {c.min_swap_pka_shift:>4.2f} pKa units\n'
' pH: {c.pH:>6} \n'
' Reference: {c.reference}\n'
' Min pKa: {c.min_pka:>4.2f}\n'
' Max pKa: {c.max_pka:>4.2f}\n'
'\n')
sep = "-" * 103
info(info_fmt.format(sep=sep, c=self))
# find coupled residues
titratable_groups = conformation.get_titratable_groups()
if not conformation.non_covalently_coupled_groups:
for g1 in titratable_groups:
for g2 in titratable_groups:
if g1==g2:
for group1 in titratable_groups:
for group2 in titratable_groups:
if group1 == group2:
break
if not g1 in g2.non_covalently_coupled_groups and self.do_prot_stat:
data = self.is_coupled_protonation_state_probability(g1, g2,conformation.calculate_folding_energy)
if data['coupling_factor'] >0.0:
g1.couple_non_covalently(g2)
if (group1 not in group2.non_covalently_coupled_groups
and self.do_prot_stat):
data = (
self
.is_coupled_protonation_state_probability(
group1, group2,
conformation.calculate_folding_energy))
if data['coupling_factor'] > 0.0:
group1.couple_non_covalently(group2)
if verbose:
self.print_out_swaps(conformation)
return
def print_out_swaps(self, conformation):
"""Print out something having to do with coupling interactions.
def print_out_swaps(self, conformation, verbose=True):
map = propka.output.make_interaction_map('Non-covalent coupling map for %s'%conformation,
conformation.get_non_covalently_coupled_groups(),
lambda g1,g2: g1 in g2.non_covalently_coupled_groups)
info(map)
for system in conformation.get_coupled_systems(conformation.get_non_covalently_coupled_groups(),propka.group.Group.get_non_covalently_coupled_groups):
Args:
conformation: conformation to print
"""
map_ = make_interaction_map(
'Non-covalent coupling map for {0:s}'.format(str(conformation)),
conformation.get_non_covalently_coupled_groups(),
lambda g1, g2: g1 in g2.non_covalently_coupled_groups)
info(map_)
for system in conformation.get_coupled_systems(
conformation.get_non_covalently_coupled_groups(),
Group.get_non_covalently_coupled_groups):
self.print_system(conformation, list(system))
return
def print_system(self, conformation, system):
"""Print out something about the system.
info('System containing %d groups:' % len(system))
# make list of interactions withi this system
interactions = list(itertools.combinations(system,2))
Args:
conformation: conformation to print
system: system to print
"""
info(
'System containing {0:d} groups:'.format(len(system)))
# make list of interactions within this system
interactions = list(itertools.combinations(system, 2))
# print out coupling info for each interaction
coup_info = ''
for interaction in interactions:
data = self.is_coupled_protonation_state_probability(interaction[0], interaction[1],conformation.calculate_folding_energy, return_on_fail=False)
coup_info += self.make_data_to_string(data,interaction[0],interaction[1])+'\n\n'
data = (
self.is_coupled_protonation_state_probability(
interaction[0], interaction[1],
conformation.calculate_folding_energy,
return_on_fail=False))
coup_info += (
self.make_data_to_string(data, interaction[0], interaction[1])
+ '\n\n')
info(coup_info)
# make list of possible combinations of swap to try out
combinations = propka.lib.generate_combinations(interactions)
# Make possible swap combinations
swap_info = ''
swap_info += self.print_determinants_section(system,'Original')
swap_info += self.print_determinants_section(system, 'Original')
for combination in combinations:
# Tell the user what is swap in this combination
swap_info += 'Swapping the following interactions:\n'
for interaction in combination:
swap_info += ' %s %s\n'%(interaction[0].label, interaction[1].label)
swap_info += ' {0:s} {1:s}\n'.format(
interaction[0].label, interaction[1].label)
# swap...
for interaction in combination:
self.swap_interactions([interaction[0]],[interaction[1]])
swap_info += self.print_determinants_section(system,'Swapped')
# ...and swap back
#for interaction in combination:
# self.swap_interactions([interaction[0]], [interaction[1]])
self.swap_interactions([interaction[0]], [interaction[1]])
swap_info += self.print_determinants_section(system, 'Swapped')
info(swap_info)
return
#
# Interaction and swapping methods
#
@staticmethod
def get_interaction(group1, group2, include_side_chain_hbs=True):
"""Get interaction energy between two groups.
def get_interaction(self, group1, group2, include_side_chain_hbs = True):
Args:
group1: first group for interaction
group2: second group for interaction
include_side_chain_hbs: include sidechain hydrogen bonds in energy
Returns:
interaction energy (float)
"""
determinants = group1.determinants['coulomb']
if include_side_chain_hbs:
determinants = group1.determinants['sidechain'] + group1.determinants['coulomb']
determinants = (
group1.determinants['sidechain']
+ group1.determinants['coulomb'])
interaction_energy = 0.0
for det in determinants:
if group2 == det.group:
interaction_energy += det.value
return interaction_energy
def print_determinants_section(self, system, tag):
"""Print determinants of system.
Args:
system: set of groups
tag: something to add to output
Returns:
string with summary
"""
all_labels = [g.label for g in system]
s = ' '+'-'*113+'\n'
str_ = ' ' + '-' * 113 + '\n'
for group in system:
s += self.tagged_format(' %-8s|' % tag, group.getDeterminantString(), all_labels)
str_ += self.tagged_format(
' {0:<8s}|'.format(tag), group.get_determinant_string(),
all_labels)
return str_ + '\n'
return s+'\n'
def swap_interactions(self, groups1, groups2, include_side_chain_hbs=True):
"""Swap interactions between two groups.
def swap_interactions(self, groups1, groups2, include_side_chain_hbs = True):
for i in range(len(groups1)):
group1 = groups1[i]
Args:
group1: first group to swap
group2: second group to swap
"""
for i, group1 in enumerate(groups1):
group2 = groups2[i]
# swap the interactions!
self.transfer_determinant(group1.determinants['coulomb'], group2.determinants['coulomb'], group1.label, group2.label)
self.transfer_determinant(group1.determinants['coulomb'],
group2.determinants['coulomb'],
group1.label, group2.label)
if include_side_chain_hbs:
self.transfer_determinant(group1.determinants['sidechain'], group2.determinants['sidechain'], group1.label, group2.label)
# re-calculate pKa values
self.transfer_determinant(group1.determinants['sidechain'],
group2.determinants['sidechain'],
group1.label, group2.label)
# re-calculate pKa values
group1.calculate_total_pka()
group2.calculate_total_pka()
return
@staticmethod
def transfer_determinant(determinants1, determinants2,
label1, label2):
"""Transfer information between two sets of determinants.
def transfer_determinant(self, determinants1, determinants2, label1, label2):
Args:
determinants1: determinant list
determinants2: determinant list
label1: label for list 1
label2: label for list 2
"""
# find out what to transfer...
from1to2 = []
from2to1 = []
for det in determinants1:
if det.label == label2:
from1to2.append(det)
for det in determinants2:
if det.label == label1:
from2to1.append(det)
# ...and transfer it!
for det in from1to2:
det.label = label1
determinants2.append(det)
determinants1.remove(det)
for det in from2to1:
det.label = label2
determinants1.append(det)
determinants2.remove(det)
return
@staticmethod
def tagged_format(tag, str_, labels):
"""Tag a string.
#
# Output methods
#
def tagged_format(self, tag, s, labels):
s = "%s %s"%(tag,s)
s = s.replace('\n','\n%s '%tag)
Args:
tag: tag to add
str_: string to tag
labels: labels to replace
Returns:
tagged string
"""
str_ = "{0:s} {1:s}".format(tag, str_)
str_ = str_.replace('\n', '\n{0:s} '.format(tag))
for label in labels:
s = s.replace(label, '\033[31m%s\033[30m'%label)
return s+'\n'
str_ = str_.replace(label, '\033[31m{0:s}\033[30m'.format(label))
return str_ + '\n'
@staticmethod
def make_data_to_string(data, group1, group2):
"""Describe interaction between groups.
Args:
data: data about interactions
group1: first group
group2: second group
Returns:
formatted string with information.
"""
str_ = (
" {label1} and {label2} coupled (prot.state): {coupl_fact:>5.2f}\n"
" Energy levels: {def_energy:>6.2f}, {swap_energy:>6.2f} "
"(difference: {diff_energy:>6.2f}) at pH {ph:>6.2f}\n"
" Interaction energy: {int_energy:>6.2f}\n"
" Intrinsic pka's: {pka1:>6.2f}, {pka2:>6.2f} "
"(difference: {diff_pka:>6.2f})\n"
" Swapped pKa's: {swap1:>6.2f}, {swap2:>6.2f} "
"(difference: {shift1:>6.2f}, {shift2:>6.2f})"
).format(
label1=group1.label, label2=group2.label,
coupl_fact=data['coupling_factor'],
def_energy=data['default_energy'],
swap_energy=data['swapped_energy'],
diff_energy=data['default_energy']-data['swapped_energy'],
ph=data['pH'], int_energy=data['interaction_energy'],
pka1=group1.intrinsic_pka, pka2=group2.intrinsic_pka,
diff_pka=group1.intrinsic_pka-group2.intrinsic_pka,
swap1=data['swapped_pka1'], swap2=data['swapped_pka2'],
shift1=data['pka_shift1'], shift2=data['pka_shift2'])
return str_
def make_data_to_string(self, data, group1, group2):
s = """ %s and %s coupled (prot.state): %5.2f
Energy levels: %6.2f, %6.2f (difference: %6.2f) at pH %6.2f
Interaction energy: %6.2f
Intrinsic pka's: %6.2f, %6.2f (difference: %6.2f)
Swapped pKa's: %6.2f, %6.2f (difference: %6.2f, %6.2f)"""%(group1.label,
group2.label,
data['coupling_factor'],
data['default_energy'], data['swapped_energy'],
data['default_energy'] - data['swapped_energy'],
data['pH'],
data['interaction_energy'],
group1.intrinsic_pKa,
group2.intrinsic_pKa,
group1.intrinsic_pKa-group2.intrinsic_pKa,
data['swapped_pka1'],
data['swapped_pka2'],
data['pka_shift1'],
data['pka_shift2'])
return s
nccg = non_covalently_couple_groups()
NCCG = NonCovalentlyCoupledGroups()

View File

@@ -1,29 +1,37 @@
"""Holds the Determinant class
TODO - it is confusing to have both `determinant.py` and `determinants.py`.
Should these be merged?
"""
from __future__ import division
from __future__ import print_function
class Determinant:
"""
Determinant class - set up for later structurization
"""Determinant class.
Appears to be a container for storing information and values about
groups that interact to influence titration states.
TODO - figure out what this class does.
"""
def __init__(self, group, value):
"""
Contructer of determinant object - simple, but helps in creating structure!
"""Initialize the object.
Args:
group: group associated with Determinant object
value: value to assign to group
"""
self.group = group
self.label = group.label
self.value = value
return
def add(self, value):
"""
adding a value to determinant
"""Increment determinant value.
Args:
value: value to add to determinant
"""
self.value += value
return
def __str__(self):
return '%s: %8.2f'%(self.label,self.value)
return '{0:s}: {1:8.2f}'.format(self.label, self.value)

View File

@@ -1,19 +1,33 @@
"""Functions to manipulate Determinant objects.
from __future__ import division
from __future__ import print_function
import math, time
import propka.iterative, propka.lib, propka.vector_algebra
import propka.calculations
TODO - it is confusing to have both `determinant.py` and `determinants.py`.
Should these be merged?
"""
import math
import propka.iterative
import propka.lib
import propka.vector_algebra
from propka.calculations import squared_distance, get_smallest_distance
from propka.calculations import angle_distance_factors, hydrogen_bond_energy
from propka.determinant import Determinant
def setDeterminants(propka_groups, version=None, options=None):
"""
adding side-chain and coulomb determinants/perturbations to all residues - note, backbone determinants are set separately
"""
# Cutoff for angle factor
# TODO - this constant appears elsewhere in the package.
# It should be moved to a configuration file.
FANGLE_MIN = 0.001
def set_determinants(propka_groups, version=None, options=None):
"""Add side-chain and coulomb determinants/perturbations to all residues.
NOTE - backbone determinants are set separately
Args:
propka_groups: groups to adjust
version: version object
options: options object
"""
iterative_interactions = []
# --- NonIterative section ---#
for group1 in propka_groups:
@@ -23,172 +37,202 @@ def setDeterminants(propka_groups, version=None, options=None):
# do not calculate interactions for coupled groups
if group2 in group1.covalently_coupled_groups:
break
distance = propka.calculations.distance(group1, group2)
if distance < version.parameters.coulomb_cutoff2:
interaction_type = version.parameters.interaction_matrix.get_value(group1.type,group2.type)
interaction_type = (
version.parameters.interaction_matrix.get_value(
group1.type, group2.type))
if interaction_type == 'I':
propka.iterative.addtoDeterminantList(group1, group2, distance, iterative_interactions, version=version)
propka.iterative.add_to_determinant_list(
group1, group2, distance, iterative_interactions,
version=version)
elif interaction_type == 'N':
addDeterminants(group1, group2, distance, version)
add_determinants(group1, group2, distance, version)
# --- Iterative section ---#
propka.iterative.addDeterminants(iterative_interactions, version, options=options)
propka.iterative.add_determinants(iterative_interactions, version)
def addDeterminants(group1, group2, distance, version):
def add_determinants(group1, group2, distance, version):
"""Add determinants and perturbations for distance(R1,R2) < coulomb_cutoff.
Args:
group1: first group to add
group2: second group to add
distance: distance between groups
version: version object
"""
adding determinants/perturbations, distance(R1, R2) < coulomb_cutoff always
"""
# side-chain determinant
addSidechainDeterminants(group1, group2, version)
add_sidechain_determinants(group1, group2, version)
# Coulomb determinant
addCoulombDeterminants(group1, group2, distance, version)
add_coulomb_determinants(group1, group2, distance, version)
return
def addSidechainDeterminants(group1, group2, version=None):
def add_sidechain_determinants(group1, group2, version=None):
"""Add side-chain determinants and perturbations.
NOTE - res_num1 > res_num2
Args:
group1: first group to add
group2: second group to add
version: version object
"""
adding side-chain determinants/perturbations
Note, res_num1 > res_num2
"""
hbond_interaction = version.hydrogen_bond_interaction(group1, group2)
if hbond_interaction:
if group1.charge == group2.charge:
# acid pair or base pair
if group1.model_pka < group2.model_pka:
newDeterminant1 = Determinant(group2, -hbond_interaction)
newDeterminant2 = Determinant(group1, hbond_interaction)
new_determinant1 = Determinant(group2, -hbond_interaction)
new_determinant2 = Determinant(group1, hbond_interaction)
else:
newDeterminant1 = Determinant(group2, hbond_interaction)
newDeterminant2 = Determinant(group1, -hbond_interaction)
new_determinant1 = Determinant(group2, hbond_interaction)
new_determinant2 = Determinant(group1, -hbond_interaction)
else:
newDeterminant1 = Determinant(group2, hbond_interaction*group1.charge)
newDeterminant2 = Determinant(group1, hbond_interaction*group2.charge)
new_determinant1 = Determinant(
group2, hbond_interaction*group1.charge)
new_determinant2 = Determinant(
group1, hbond_interaction*group2.charge)
group1.determinants['sidechain'].append(new_determinant1)
group2.determinants['sidechain'].append(new_determinant2)
group1.determinants['sidechain'].append(newDeterminant1)
group2.determinants['sidechain'].append(newDeterminant2)
return
def add_coulomb_determinants(group1, group2, distance, version):
"""Add non-iterative Coulomb determinants and perturbations.
def addCoulombDeterminants(group1, group2, distance, version):
Args:
group1: first group to add
group2: second group to add
distance: distance between groups
version: version object
"""
adding NonIterative Coulomb determinants/perturbations
"""
coulomb_interaction = version.electrostatic_interaction(group1, group2, distance)
coulomb_interaction = version.electrostatic_interaction(
group1, group2, distance)
if coulomb_interaction:
Q1 = group1.charge
Q2 = group2.charge
q1 = group1.charge
q2 = group2.charge
# assigning the Coulombic interaction
if Q1 < 0.0 and Q2 < 0.0:
""" both are acids """
addCoulombAcidPair(group1, group2, coulomb_interaction)
elif Q1 > 0.0 and Q2 > 0.0:
""" both are bases """
addCoulombBasePair(group1, group2, coulomb_interaction)
if q1 < 0.0 and q2 < 0.0:
# both are acids
add_coulomb_acid_pair(group1, group2, coulomb_interaction)
elif q1 > 0.0 and q2 > 0.0:
# both are bases
add_coulomb_base_pair(group1, group2, coulomb_interaction)
else:
""" one of each """
addCoulombIonPair(group1, group2, coulomb_interaction)
return
# one of each
add_coulomb_ion_pair(group1, group2, coulomb_interaction)
def addCoulombAcidPair(object1, object2, value):
def add_coulomb_acid_pair(object1, object2, value):
"""Add the Coulomb interaction (an acid pair).
The higher pKa is raised.
Args:
object1: first part of pair
object2: second part of pair
value: determinant value
"""
Adding the Coulomb interaction (an acid pair):
the higher pKa is raised
"""
if object1.model_pka > object2.model_pka:
newDeterminant = Determinant(object2, value)
object1.determinants['coulomb'].append(newDeterminant)
new_determinant = Determinant(object2, value)
object1.determinants['coulomb'].append(new_determinant)
else:
newDeterminant = Determinant(object1, value)
object2.determinants['coulomb'].append(newDeterminant)
new_determinant = Determinant(object1, value)
object2.determinants['coulomb'].append(new_determinant)
def addCoulombBasePair(object1, object2, value):
"""
Adding the Coulomb interaction (a base pair):
the lower pKa is lowered
def add_coulomb_base_pair(object1, object2, value):
"""Add the Coulomb interaction (a base pair).
The lower pKa is lowered.
Args:
object1: first part of pair
object2: second part of pair
value: determinant value
"""
if object1.model_pka < object2.model_pka:
newDeterminant = Determinant(object2, -value)
object1.determinants['coulomb'].append(newDeterminant)
new_determinant = Determinant(object2, -value)
object1.determinants['coulomb'].append(new_determinant)
else:
newDeterminant = Determinant(object1, -value)
object2.determinants['coulomb'].append(newDeterminant)
new_determinant = Determinant(object1, -value)
object2.determinants['coulomb'].append(new_determinant)
def addCoulombIonPair(object1, object2, value):
def add_coulomb_ion_pair(object1, object2, value):
"""Add the Coulomb interaction (an acid-base pair).
The pKa of the acid is lowered & the pKa of the base is raised.
Args:
object1: first part of pair
object2: second part of pair
value: determinant value
"""
Adding the Coulomb interaction (an acid-base pair):
the pKa of the acid is lowered & the pKa of the base is raised
"""
# residue1
Q1 = object1.charge
newDeterminant = Determinant(object2, Q1*value)
object1.determinants['coulomb'].append(newDeterminant)
q1 = object1.charge
new_determinant = Determinant(object2, q1*value)
object1.determinants['coulomb'].append(new_determinant)
# residue2
Q2 = object2.charge
newDeterminant = Determinant(object1, Q2*value)
object2.determinants['coulomb'].append(newDeterminant)
q2 = object2.charge
new_determinant = Determinant(object1, q2*value)
object2.determinants['coulomb'].append(new_determinant)
def set_ion_determinants(conformation_container, version):
"""Add ion determinants and perturbations.
def setIonDeterminants(conformation_container, version):
"""
adding ion determinants/perturbations
Args:
conformation_container: conformation to set
version: version object
"""
for titratable_group in conformation_container.get_titratable_groups():
for ion_group in conformation_container.get_ions():
squared_distance = propka.calculations.squared_distance(titratable_group, ion_group)
if squared_distance < version.parameters.coulomb_cutoff2_squared:
weight = version.calculatePairWeight(titratable_group.Nmass, ion_group.Nmass)
# the pKa of both acids and bases are shifted up by negative ions (and vice versa)
value = (-ion_group.charge) * version.calculateCoulombEnergy(math.sqrt(squared_distance), weight)
newDeterminant = Determinant(ion_group, value)
titratable_group.determinants['coulomb'].append(newDeterminant)
dist_sq = squared_distance(titratable_group, ion_group)
if dist_sq < version.parameters.coulomb_cutoff2_squared:
weight = version.calculate_pair_weight(
titratable_group.num_volume, ion_group.num_volume)
# the pKa of both acids and bases are shifted up by negative
# ions (and vice versa)
value = (
-ion_group.charge
* version.calculate_coulomb_energy(
math.sqrt(dist_sq), weight))
new_det = Determinant(ion_group, value)
titratable_group.determinants['coulomb'].append(new_det)
return
def setBackBoneDeterminants(titratable_groups, backbone_groups, version):
def set_backbone_determinants(titratable_groups, backbone_groups, version):
"""Set determinants between titrable and backbone groups.
Args:
titratable_groups: list of titratable groups
backbone_groups: list of backbone groups
version: version object
"""
for titratable_group in titratable_groups:
titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids
titratable_group_interaction_atoms = (
titratable_group.interaction_atoms_for_acids)
if not titratable_group_interaction_atoms:
continue
# find out which backbone groups this titratable is interacting with
for backbone_group in backbone_groups:
# find the interacting atoms
backbone_interaction_atoms = backbone_group.get_interaction_atoms(titratable_group)
backbone_interaction_atoms = (
backbone_group.get_interaction_atoms(titratable_group))
if not backbone_interaction_atoms:
continue
# find the smallest distance
[backbone_atom, distance, titratable_atom] = propka.calculations.get_smallest_distance(backbone_interaction_atoms,
titratable_group_interaction_atoms)
[backbone_atom, distance, titratable_atom] = (
get_smallest_distance(
backbone_interaction_atoms,
titratable_group_interaction_atoms))
# get the parameters
parameters = version.get_backbone_hydrogen_bond_parameters(backbone_atom, titratable_atom)
parameters = (
version.get_backbone_hydrogen_bond_parameters(
backbone_atom, titratable_atom))
if not parameters:
continue
[dpKa_max, [cutoff1, cutoff2]] = parameters
[dpka_max, [cutoff1, cutoff2]] = parameters
if distance < cutoff2:
# calculate angle factor
f_angle = 1.0
@@ -202,19 +246,20 @@ def setBackBoneDeterminants(titratable_groups, backbone_groups, version):
# ||
# C
if backbone_group.type == 'BBC':
if titratable_group.type in version.parameters.angular_dependent_sidechain_interactions:
if (titratable_group.type
in version.parameters.angular_dependent_sidechain_interactions):
if titratable_atom.element == 'H':
heavy_atom = titratable_atom.bonded_atoms[0]
heavy_atom = titratable_atom.bonded_atoms[0]
hydrogen_atom = titratable_atom
[d1, f_angle, d2] = propka.calculations.AngleFactorX(atom1=heavy_atom,
atom2=hydrogen_atom,
atom3=backbone_atom)
[_, f_angle, _] = angle_distance_factors(
atom1=heavy_atom, atom2=hydrogen_atom,
atom3=backbone_atom)
else:
# Either the structure is corrupt (no hydrogen), or the heavy atom is closer to
# the titratable atom than the hydrogen. In either case we set the angle factor
# to 0
# Either the structure is corrupt (no hydrogen),
# or the heavy atom is closer to the titratable
# atom than the hydrogen. In either case we set the
# angle factor to 0
f_angle = 0.0
# for BBN groups, the hydrogen is on the backbone group
#
# Titra.
@@ -225,23 +270,22 @@ def setBackBoneDeterminants(titratable_groups, backbone_groups, version):
# / \
if backbone_group.type == 'BBN':
if backbone_atom.element == 'H':
backbone_N = backbone_atom.bonded_atoms[0]
backbone_H = backbone_atom
[d1, f_angle, d2] = propka.calculations.AngleFactorX(atom1=titratable_atom,
atom2=backbone_H,
atom3=backbone_N)
backbone_n = backbone_atom.bonded_atoms[0]
backbone_h = backbone_atom
[_, f_angle, _] = (
angle_distance_factors(
atom1=titratable_atom, atom2=backbone_h,
atom3=backbone_n))
else:
# Either the structure is corrupt (no hydrogen), or the heavy atom is closer to
# the titratable atom than the hydrogen. In either case we set the angle factor
# to 0
# Either the structure is corrupt (no hydrogen), or the
# heavy atom is closer to the titratable atom than the
# hydrogen. In either case we set the angle factor to 0
f_angle = 0.0
if f_angle > 0.001:
value = titratable_group.charge * propka.calculations.HydrogenBondEnergy(distance, dpKa_max, [cutoff1,cutoff2], f_angle)
newDeterminant = Determinant(backbone_group, value)
titratable_group.determinants['backbone'].append(newDeterminant)
return
if f_angle > FANGLE_MIN:
value = (
titratable_group.charge
* hydrogen_bond_energy(
distance, dpka_max, [cutoff1, cutoff2], f_angle))
new_determinant = Determinant(backbone_group, value)
titratable_group.determinants['backbone'].append(
new_determinant)

File diff suppressed because it is too large Load Diff

View File

@@ -1,21 +1,26 @@
"""Provides an alternative PDB format that can transparently encode larger atom numbers.
"""Provides alternative PDB format that can encode larger atom numbers.
http://cci.lbl.gov/hybrid_36/
"""
import string
_hybrid36_upper_chars = set(string.ascii_uppercase)
_hybrid36_lower_chars = set(string.ascii_lowercase)
_hybrid36_digits = set(string.digits)
_hybrid36_upper_set = _hybrid36_upper_chars | _hybrid36_digits
_hybrid36_lower_set = _hybrid36_lower_chars | _hybrid36_digits
_HYBRID36_UPPER_CHARS = set(string.ascii_uppercase)
_HYBRID36_LOWER_CHARS = set(string.ascii_lowercase)
_HYBRID36_DIGITS = set(string.digits)
_HYBRID36_UPPER_SET = _HYBRID36_UPPER_CHARS | _HYBRID36_DIGITS
_HYBRID36_LOWER_SET = _HYBRID36_LOWER_CHARS | _HYBRID36_DIGITS
def decode(input_string):
"""
Convert an input string of a number in hybrid-36 format to an integer.
"""Convert an input string of a number in hybrid-36 format to an integer.
Args:
input_string: input string
Returns:
integer
"""
value_error_message = "invalid literal for hybrid-36 conversion: '%s'"
value_error_message = "invalid literal for hybrid-36 conversion: '{0:s}'"
original_input_string = input_string
input_string = input_string.strip()
@@ -27,30 +32,30 @@ def decode(input_string):
else:
sign = 1
if not len(input_string):
raise ValueError(value_error_message % input_string)
if len(input_string) == 0:
raise ValueError(value_error_message.format(input_string))
# See http://cci.lbl.gov/hybrid_36/ for documentation on the format.
num_chars = len(input_string)
first_char = input_string[0]
if first_char in _hybrid36_digits:
if first_char in _HYBRID36_DIGITS:
return sign * int(input_string)
elif first_char in _hybrid36_upper_chars:
elif first_char in _HYBRID36_UPPER_CHARS:
reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars)
_hybrid36_set = _hybrid36_upper_set
elif first_char in _hybrid36_lower_chars:
_hybrid36_set = _HYBRID36_UPPER_SET
elif first_char in _HYBRID36_LOWER_CHARS:
reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars)
_hybrid36_set = _hybrid36_lower_set
_hybrid36_set = _HYBRID36_LOWER_SET
else:
raise ValueError(value_error_message % original_input_string)
raise ValueError(value_error_message.format(original_input_string))
# Check the validity of the input string: ASCII characters should be
# either all uppercase or all lowercase.
for c in input_string[1:]:
if c not in _hybrid36_set:
raise ValueError(value_error_message % original_input_string)
for char in input_string[1:]:
if char not in _hybrid36_set:
raise ValueError(value_error_message.format(original_input_string))
# Convert with the int function.
return sign * (int(input_string, 36) + reference)

View File

@@ -1,60 +1,68 @@
"""Iterative functions for pKa calculations.
from __future__ import division
from __future__ import print_function
import math, time
import propka.lib as lib
These appear to mostly involve determinants.
"""
from propka.determinant import Determinant
import propka.calculations
from propka.lib import info, warning, debug
# Some library functions for the interative pKa determinants
from propka.lib import info, debug
def addtoDeterminantList(group1, group2, distance, iterative_interactions, version):
# TODO - these are undocumented constants
UNK_MIN_VALUE = 0.005
def add_to_determinant_list(group1, group2, distance, iterative_interactions,
version):
"""Add iterative determinantes to the list.
[[R1, R2], [side-chain, coulomb], [A1, A2]], ...
NOTE - sign is determined when the interaction is added to the iterative
object!
NOTE - distance < coulomb_cutoff here
Args:
group1: first group in pair
group2: second group in pair
distance: distance between groups
iterative_interactions: interaction list to modify
version: version object
"""
Adds 'iterative determinants' to list ..., [[R1, R2], [side-chain, coulomb], [A1, A2]], ...
Note, the sign is determined when the interaction is added to the iterative object!
Note, distance < coulomb_cutoff here
"""
hbond_value = version.hydrogen_bond_interaction(group1, group2)
hbond_value = version.hydrogen_bond_interaction(group1, group2)
coulomb_value = version.electrostatic_interaction(group1, group2, distance)
# adding the interaction to 'iterative_interactions'
if hbond_value or coulomb_value:
pair = [group1, group2]
values = [hbond_value, coulomb_value]
pair = [group1, group2]
values = [hbond_value, coulomb_value]
while None in values:
values[values.index(None)] = 0.0
annihilation = [0., 0.]
interaction = [pair, values, annihilation]
interaction = [pair, values, annihilation]
iterative_interactions.append(interaction)
return
def add_iterative_acid_pair(object1, object2, interaction):
"""Add the Coulomb 'iterative' interaction (an acid pair).
def addIterativeAcidPair(object1, object2, interaction):
The higher pKa is raised with QQ+HB
The lower pKa is lowered with HB
Args:
object1: first object in pair
object2: second object in pair
interaction: list with [values, annihilation]
"""
Adding the Coulomb 'iterative' interaction (an acid pair):
the higher pKa is raised with QQ+HB
the lower pKa is lowered with HB
"""
values = interaction[1]
values = interaction[1]
annihilation = interaction[2]
hbond_value = values[0]
hbond_value = values[0]
coulomb_value = values[1]
diff = coulomb_value + 2*hbond_value
comp1 = object1.pKa_old + annihilation[0] + diff
comp2 = object2.pKa_old + annihilation[1] + diff
annihilation[0] = 0.
annihilation[1] = 0.
comp1 = object1.pka_old + annihilation[0] + diff
comp2 = object2.pka_old + annihilation[1] + diff
annihilation[0] = 0.0
annihilation[1] = 0.0
if comp1 > comp2:
# side-chain
determinant = [object2, hbond_value]
determinant = [object2, hbond_value]
object1.determinants['sidechain'].append(determinant)
determinant = [object1, -hbond_value]
object2.determinants['sidechain'].append(determinant)
@@ -64,7 +72,7 @@ def addIterativeAcidPair(object1, object2, interaction):
annihilation[0] = -diff
else:
# side-chain
determinant = [object1, hbond_value]
determinant = [object1, hbond_value]
object2.determinants['sidechain'].append(determinant)
determinant = [object2, -hbond_value]
object1.determinants['sidechain'].append(determinant)
@@ -74,26 +82,31 @@ def addIterativeAcidPair(object1, object2, interaction):
annihilation[1] = -diff
def addIterativeBasePair(object1, object2, interaction):
def add_iterative_base_pair(object1, object2, interaction):
"""Add the Coulomb 'iterative' interaction (a base pair).
The lower pKa is lowered
Args:
object1: first object in pair
object2: second object in pair
interaction: list with [values, annihilation]
"""
Adding the Coulomb 'iterative' interaction (a base pair):
the lower pKa is lowered
"""
values = interaction[1]
values = interaction[1]
annihilation = interaction[2]
hbond_value = values[0]
hbond_value = values[0]
coulomb_value = values[1]
diff = coulomb_value + 2*hbond_value
diff = -diff
comp1 = object1.pKa_old + annihilation[0] + diff
comp2 = object2.pKa_old + annihilation[1] + diff
annihilation[0] = 0.
annihilation[1] = 0.
comp1 = object1.pka_old + annihilation[0] + diff
comp2 = object2.pka_old + annihilation[1] + diff
annihilation[0] = 0.0
annihilation[1] = 0.0
if comp1 < comp2:
# side-chain
determinant = [object2, -hbond_value]
object1.determinants['sidechain'].append(determinant)
determinant = [object1, hbond_value]
determinant = [object1, hbond_value]
object2.determinants['sidechain'].append(determinant)
# Coulomb
determinant = [object2, -coulomb_value]
@@ -103,7 +116,7 @@ def addIterativeBasePair(object1, object2, interaction):
# side-chain
determinant = [object1, -hbond_value]
object2.determinants['sidechain'].append(determinant)
determinant = [object2, hbond_value]
determinant = [object2, hbond_value]
object1.determinants['sidechain'].append(determinant)
# Coulomb
determinant = [object1, -coulomb_value]
@@ -111,254 +124,249 @@ def addIterativeBasePair(object1, object2, interaction):
annihilation[1] = -diff
def addIterativeIonPair(object1, object2, interaction, version):
"""
Adding the Coulomb 'iterative' interaction (an acid-base pair):
def add_iterative_ion_pair(object1, object2, interaction, version):
"""Add the Coulomb 'iterative' interaction (an acid-base pair)
the pKa of the acid is lowered & the pKa of the base is raised
Args:
object1: first object in pair
object2: second object in pair
interaction: list with [values, annihilation]
version: version object
"""
values = interaction[1]
values = interaction[1]
annihilation = interaction[2]
hbond_value = values[0]
hbond_value = values[0]
coulomb_value = values[1]
Q1 = object1.Q
Q2 = object2.Q
comp1 = object1.pKa_old + annihilation[0] + Q1*coulomb_value
comp2 = object2.pKa_old + annihilation[1] + Q2*coulomb_value
if object1.res_name not in version.parameters.exclude_sidechain_interactions:
comp1 += Q1*hbond_value
if object2.res_name not in version.parameters.exclude_sidechain_interactions:
comp2 += Q2*hbond_value
if Q1 == -1.0 and comp1 < comp2:
add_term = True # pKa(acid) < pKa(base)
elif Q1 == 1.0 and comp1 > comp2:
add_term = True # pKa(base) > pKa(acid)
q1 = object1.q
q2 = object2.q
comp1 = object1.pka_old + annihilation[0] + q1*coulomb_value
comp2 = object2.pka_old + annihilation[1] + q2*coulomb_value
if (object1.res_name
not in version.parameters.exclude_sidechain_interactions):
comp1 += q1*hbond_value
if (object2.res_name
not in version.parameters.exclude_sidechain_interactions):
comp2 += q2*hbond_value
if q1 == -1.0 and comp1 < comp2:
# pKa(acid) < pKa(base)
add_term = True
elif q1 == 1.0 and comp1 > comp2:
# pKa(base) > pKa(acid)
add_term = True
else:
add_term = False
add_term = False
annihilation[0] = 0.00
annihilation[1] = 0.00
if add_term == True:
# Coulomb
if coulomb_value > 0.005:
# residue1
interaction = [object2, Q1*coulomb_value]
annihilation[0] += -Q1*coulomb_value
object1.determinants['coulomb'].append(interaction)
# residue2
interaction = [object1, Q2*coulomb_value]
annihilation[1] += -Q2*coulomb_value
object2.determinants['coulomb'].append(interaction)
# Side-chain
if hbond_value > 0.005:
# residue1
if object1.res_name not in version.parameters.exclude_sidechain_interactions:
interaction = [object2, Q1*hbond_value]
annihilation[0] += -Q1*hbond_value
object1.determinants['sidechain'].append(interaction)
# residue2
if object2.res_name not in version.parameters.exclude_sidechain_interactions:
interaction = [object1, Q2*hbond_value]
annihilation[1] += -Q2*hbond_value
object2.determinants['sidechain'].append(interaction)
if add_term:
# Coulomb
if coulomb_value > UNK_MIN_VALUE:
# residue1
interaction = [object2, q1*coulomb_value]
annihilation[0] += -q1*coulomb_value
object1.determinants['coulomb'].append(interaction)
# residue2
interaction = [object1, q2*coulomb_value]
annihilation[1] += -q2*coulomb_value
object2.determinants['coulomb'].append(interaction)
# Side-chain
if hbond_value > UNK_MIN_VALUE:
# residue1
if (object1.res_name
not in version.parameters.exclude_sidechain_interactions):
interaction = [object2, q1*hbond_value]
annihilation[0] += -q1*hbond_value
object1.determinants['sidechain'].append(interaction)
# residue2
if (object2.res_name
not in version.parameters.exclude_sidechain_interactions):
interaction = [object1, q2*hbond_value]
annihilation[1] += -q2*hbond_value
object2.determinants['sidechain'].append(interaction)
def addDeterminants(iterative_interactions, version, options=None):
"""
def add_determinants(iterative_interactions, version, _=None):
"""Add determinants iteratively.
The iterative pKa scheme. Later it is all added in 'calculateTotalPKA'
Args:
iterative_interactions: list of iterative interactions
version: version object
_: options object
"""
# --- setup ---
iteratives = []
done_group = []
# creating iterative objects with references to their real group counterparts
# create iterative objects with references to their real group counterparts
for interaction in iterative_interactions:
pair = interaction[0]
for group in pair:
if group in done_group:
#print "done already"
""" do nothing - already have an iterative object for this group """
# do nothing - already have an iterative object for this group
pass
else:
newIterative = Iterative(group)
iteratives.append(newIterative)
new_iterative = Iterative(group)
iteratives.append(new_iterative)
done_group.append(group)
# Initialize iterative scheme
debug("\n --- pKa iterations (%d groups, %d interactions) ---" %
(len(iteratives), len(iterative_interactions)))
debug(
"\n --- pKa iterations ({0:d} groups, {1:d} interactions) ---".format(
len(iteratives), len(iterative_interactions)))
converged = False
iteration = 0
# set non-iterative pka values as first step
for itres in iteratives:
itres.pKa_iter.append(itres.pKa_NonIterative)
for iter_ in iteratives:
iter_.pka_iter.append(iter_.pka_noniterative)
# --- starting pKa iterations ---
while converged == False:
while not converged:
# initialize pka_new
iteration += 1
for itres in iteratives:
itres.determinants = {'sidechain': [], 'backbone': [],
'coulomb': []}
itres.pka_new = itres.pka_noniterative
# Adding interactions to temporary determinant container
for interaction in iterative_interactions:
pair = interaction[0]
object1, object2 = find_iterative(pair, iteratives)
q1 = object1.q
q2 = object2.q
if q1 < 0.0 and q2 < 0.0:
# both are acids
add_iterative_acid_pair(object1, object2, interaction)
elif q1 > 0.0 and q2 > 0.0:
# both are bases
add_iterative_base_pair(object1, object2, interaction)
else:
# one of each
add_iterative_ion_pair(object1, object2, interaction, version)
# Calculating pka_new values
for itres in iteratives:
for type_ in ['sidechain', 'backbone', 'coulomb']:
for determinant in itres.determinants[type_]:
itres.pka_new += determinant[1]
# initialize pKa_new
iteration += 1
for itres in iteratives:
itres.determinants = {'sidechain':[],'backbone':[],'coulomb':[]}
itres.pKa_new = itres.pKa_NonIterative
# Check convergence
converged = True
for itres in iteratives:
if itres.pka_new == itres.pka_old:
itres.converged = True
else:
itres.converged = False
converged = False
# reset pka_old & storing pka_new in pka_iter
for itres in iteratives:
itres.pka_old = itres.pka_new
itres.pka_iter.append(itres.pka_new)
# Adding interactions to temporary determinant container
for interaction in iterative_interactions:
pair = interaction[0]
values = interaction[1]
annihilation = interaction[2]
#print "len(interaction) = %d" % (len(interaction))
object1, object2 = findIterative(pair, iteratives)
Q1 = object1.Q
Q2 = object2.Q
if Q1 < 0.0 and Q2 < 0.0:
""" both are acids """
addIterativeAcidPair(object1, object2, interaction)
elif Q1 > 0.0 and Q2 > 0.0:
""" both are bases """
addIterativeBasePair(object1, object2, interaction)
else:
""" one of each """
addIterativeIonPair(object1, object2, interaction, version)
# Calculating pKa_new values
for itres in iteratives:
for type in ['sidechain','backbone','coulomb']:
for determinant in itres.determinants[type]:
itres.pKa_new += determinant[1]
# Check convergence
converged = True
for itres in iteratives:
if itres.pKa_new == itres.pKa_old:
itres.converged = True
else:
itres.converged = False
converged = False
# reset pKa_old & storing pKa_new in pKa_iter
for itres in iteratives:
itres.pKa_old = itres.pKa_new
itres.pKa_iter.append(itres.pKa_new)
if iteration == 10:
info("did not converge in %d iterations" % (iteration))
break
# --- Iterations finished ---
if iteration == 10:
info("did not converge in {0:d} iterations".format(iteration))
break
# printing pKa iterations
# formerly was conditioned on if options.verbosity >= 2 - now unnecessary
str = "%12s" % (" ")
for index in range(0, iteration+1 ):
str += "%8d" % (index)
debug(str)
str_ = ' '
for index in range(iteration+1):
str_ += "{0:>8d}".format(index)
debug(str_)
for itres in iteratives:
str = "%s " % (itres.label)
for pKa in itres.pKa_iter:
str += "%8.2lf" % (pKa)
if itres.converged == False:
str += " *"
debug(str)
str_ = "{0:s} ".format(itres.label)
for pka in itres.pka_iter:
str_ += "{0:>8.2f}".format(pka)
if not itres.converged:
str_ += " *"
debug(str_)
# creating real determinants and adding them to group object
for itres in iteratives:
for type in ['sidechain','backbone','coulomb']:
for interaction in itres.determinants[type]:
for type_ in ['sidechain', 'backbone', 'coulomb']:
for interaction in itres.determinants[type_]:
#info('done',itres.group.label,interaction[0],interaction[1])
value = interaction[1]
if value > 0.005 or value < -0.005:
g = interaction[0]
newDeterminant = Determinant(g, value)
itres.group.determinants[type].append(newDeterminant)
if value > UNK_MIN_VALUE or value < -UNK_MIN_VALUE:
group = interaction[0]
new_det = Determinant(group, value)
itres.group.determinants[type_].append(new_det)
def find_iterative(pair, iteratives):
"""Find the 'iteratives' that correspond to the groups in 'pair'.
def findIterative(pair, iteratives):
"""
Function to find the two 'iteratives' that corresponds to the groups in 'pair'
Args:
pair: groups to match
iteratives: list of iteratives to search
Returns:
1. first matched iterative
2. second matched iterative
"""
for iterative in iteratives:
if iterative.group == pair[0]:
iterative0 = iterative
elif iterative.group == pair[1]:
iterative1 = iterative
return iterative0, iterative1
class Iterative:
"""
Iterative class - pKa values and references of iterative groups
Note, this class has a fake determinant list, true determinants are
made after the iterations are finished.
"""Iterative class - pKa values and references of iterative groups.
NOTE - this class has a fake determinant list, true determinants are made
after the iterations are finished.
"""
def __init__(self, group):
"""
Contructer of the iterative object
"""
"""Initialize object with group.
#print "creating 'iterative object' for %s" % (group.label)
self.label = group.label
self.atom = group.atom
self.res_name = group.residue_type
self.Q = group.charge
self.pKa_old = None
self.pKa_new = None
self.pKa_iter = []
self.pKa_NonIterative = 0.00
self.determinants = {'sidechain':[],'backbone':[],'coulomb':[]}
Args:
group: group to use for initialization.
"""
self.label = group.label
self.atom = group.atom
self.res_name = group.residue_type
self.q = group.charge
self.pka_old = None
self.pka_new = None
self.pka_iter = []
self.pka_noniterative = 0.00
self.determinants = {'sidechain': [], 'backbone': [], 'coulomb': []}
self.group = group
self.converged = True
# Calculate the Non-Iterative part of pKa from the group object
# Side chain
side_chain = 0.00
for determinant in group.determinants['sidechain']:
value = determinant.value
side_chain += value
# Back bone
back_bone = 0.00
back_bone = 0.00
for determinant in group.determinants['backbone']:
value = determinant.value
back_bone += value
back_bone += value
# Coulomb
coulomb = 0.00
coulomb = 0.00
for determinant in group.determinants['coulomb']:
value = determinant.value
coulomb += value
self.pKa_NonIterative = group.model_pka
self.pKa_NonIterative += group.Emass
self.pKa_NonIterative += group.Elocl
self.pKa_NonIterative += side_chain
self.pKa_NonIterative += back_bone
self.pKa_NonIterative += coulomb
self.pKa_old = self.pKa_NonIterative
coulomb += value
self.pka_noniterative = group.model_pka
self.pka_noniterative += group.energy_volume
self.pka_noniterative += group.energy_local
self.pka_noniterative += side_chain
self.pka_noniterative += back_bone
self.pka_noniterative += coulomb
self.pka_old = self.pka_noniterative
def __eq__(self, other):
"""
Check if two groups should be considered identical
"""
"""Needed to use objects in sets."""
if self.atom.type == 'atom':
# In case of protein atoms we trust the labels
return self.label==other.label
return self.label == other.label
else:
# For heterogene atoms we also need to check the residue number
return self.label==other.label and self.atom.res_num == other.atom.res_num
return (
self.label == other.label
and self.atom.res_num == other.atom.res_num)
def __hash__(self):
""" Needed together with __eq__ - otherwise we can't make sets of groups """
"""Needed to use objects in sets."""
return id(self)

View File

@@ -1,112 +1,142 @@
from __future__ import division
from __future__ import print_function
"""Implements many of the main functions used to call PROPKA."""
import sys
import pkg_resources
import logging
import argparse
import pkg_resources
logger = logging.getLogger("propka")
stdout_handler = logging.StreamHandler(sys.stdout)
stdout_handler.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(stdout_handler)
_LOGGER = logging.getLogger("propka")
_STDOUT_HANDLER = logging.StreamHandler(sys.stdout)
_STDOUT_HANDLER.setFormatter(logging.Formatter("%(message)s"))
_LOGGER.addHandler(_STDOUT_HANDLER)
#
# file I/O
#
def open_file_for_reading(filename):
"""Open file or file-like stream *filename* for reading.
*filename* may be a string and then it is opened but if it is a
file-like object (such as an open :class:`file` or
:class:`StringIO.StringIO` --- really anything with ``next()``,
``read()``, ``readlines()``, ``readline``, ``close`` methods) then
the object is just passed through (the stream is attempted to be
reset to the beginning with ``fseek(0)``).
def open_file_for_reading(input_file):
"""Open file or file-like stream for reading.
TODO - convert this to a context manager
Args:
input_file: path to file or file-like object. If file-like object,
then will attempt fseek(0).
"""
if (hasattr(filename, 'next') or hasattr(filename, '__next__')) \
and hasattr(filename, 'read') \
and hasattr(filename, 'readline') and hasattr(filename, 'readlines') \
and hasattr(filename, 'close'):
# already a stream
try:
filename.fseek(0)
except AttributeError:
pass
return filename
try:
input_file.fseek(0)
return input_file
except AttributeError:
pass
try:
f = open(filename,'r')
file_ = open(input_file, 'rt')
except:
raise IOError('Cannot find file %s' %filename)
return f
raise IOError('Cannot find file {0:s}'.format(input_file))
return file_
def open_file_for_writing(filename):
"""Open file or file-like stream for writing"""
if hasattr(filename, 'write') and hasattr(filename, 'writeline') and hasattr(filename, 'writelines') \
and hasattr(filename, 'close'):
# already a stream
try:
mode = filename.mode
except AttributeError:
mode = "w"
def open_file_for_writing(input_file):
"""Open file or file-like stream for writing.
TODO - convert this to a context manager.
Args:
input_file: path to file or file-like object. If file-like object,
then will attempt to get file mode.
"""
try:
mode = input_file.mode
if not ("w" in mode or "a" in mode or "+" in mode):
raise IOError("File/stream not open for writing")
return filename
return input_file
except AttributeError:
pass
try:
f = open(filename,'w')
except:
raise Exception('Could not open %s'%filename)
return f
file_ = open(input_file, 'wt')
except FileNotFoundError:
raise Exception('Could not open {0:s}'.format(input_file))
return file_
#
# bookkeeping etc.
#
def conformation_sorter(conf):
"""TODO - figure out what this function does."""
model = int(conf[:-1])
altloc = conf[-1:]
return model*100+ord(altloc)
def split_atoms_into_molecules(atoms):
"""Maps atoms into molecules.
Args:
atoms: list of atoms
Returns:
list of molecules
"""
molecules = []
while len(atoms)>0:
while len(atoms) > 0:
initial_atom = atoms.pop()
molecules.append( make_molecule(initial_atom,atoms))
molecules.append(make_molecule(initial_atom, atoms))
return molecules
def make_molecule(atom, atoms):
"""Make a molecule from atoms.
Args:
atom: one of the atoms
atoms: a list of the remaining atoms
Return:
list of atoms
"""
bonded_atoms = [a for a in atoms if atom in a.bonded_atoms]
res_atoms = [atom,]
for ba in bonded_atoms:
if ba in atoms:
atoms.remove(ba)
res_atoms.extend(make_molecule(ba, atoms))
for bond_atom in bonded_atoms:
if bond_atom in atoms:
atoms.remove(bond_atom)
res_atoms.extend(make_molecule(bond_atom, atoms))
return res_atoms
def make_grid(min,max,step):
x = min
while x <= max:
def make_grid(min_, max_, step):
"""Make a grid across the specified tange.
TODO - figure out if this duplicates existing generators like `range` or
numpy function.
Args:
min_: minimum value of grid
max_: maximum value of grid
step: grid step size
"""
x = min_
while x <= max_:
yield x
x += step
return
def generate_combinations(interactions):
"""Generate combinations of interactions.
Args:
interactions: list of interactions
Returns:
list of combinations
"""
res = [[]]
for interaction in interactions:
res = make_combination(res, interaction)
res.remove([])
return res
def make_combination(combis, interaction):
"""Make a specific set of combinations.
Args:
combis: list of combinations
interaction: interaction to add to combinations
Returns:
list of combinations
"""
res = []
for combi in combis:
res.append(combi+[interaction])
@@ -115,15 +145,20 @@ def make_combination(combis, interaction):
def parse_res_string(res_str):
"""
Parse the residue string, in format "chain:resnum[inscode]", and return
a tuple of (chain, resnum, inscode). Raises ValueError if the input
string is invalid.
"""Parse a residue string.
Args:
res_string: residue string in format "chain:resnum[inscode]"
Returns:
a tuple of (chain, resnum, inscode).
Raises:
ValueError if the input string is invalid.
"""
try:
chain, resnum_str = res_str.split(":")
except ValueError:
raise ValueError("Invalid residue string (must contain 2 colon-separated values)")
raise ValueError("Invalid residue string (must contain 2 "
"colon-separated values)")
try:
resnum = int(resnum_str)
except ValueError:
@@ -142,7 +177,7 @@ def build_parser(parser=None):
"""Build an argument parser for PROPKA.
Args:
parser_: existing parser. If this is not None, then the PROPKA parser will
parser: existing parser. If this is not None, then the PROPKA parser will
be created as a subparser to this existing parser. Otherwise, a
new parser will be created.
Returns:
@@ -151,74 +186,96 @@ def build_parser(parser=None):
if parser is not None:
group = parser.add_argument_group(title="PROPKA invoation options")
else:
parser = argparse.ArgumentParser(description=("PROPKA predicts the pKa values of ionizable "
"groups in proteins and protein-ligand "
"complexes based in the 3D structure"),
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser = argparse.ArgumentParser(
description=("PROPKA predicts the pKa values of ionizable "
"groups in proteins and protein-ligand "
"complexes based in the 3D structure"),
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
# This is duck-typing at its finest
group = parser
group.add_argument("input_pdb", help="read data from <filename>")
group.add_argument("-f", "--file", action="append", dest="filenames", default=[],
help="read data from <filename>, i.e. <filename> is added to arguments")
group.add_argument("-r", "--reference", dest="reference", default="neutral",
help=("setting which reference to use for stability calculations "
"[neutral/low-pH]"))
group.add_argument("-c", "--chain", action="append", dest="chains",
help=('creating the protein with only a specified chain. Specify '
'" " for chains without ID [all]'))
group.add_argument("-i", "--titrate_only", dest="titrate_only",
help=('Treat only the specified residues as titratable. Value should '
'be a comma-separated list of "chain:resnum" values; for example: '
'-i "A:10,A:11"'))
group.add_argument("-t", "--thermophile", action="append", dest="thermophiles",
help=("defining a thermophile filename; usually used in "
"'alignment-mutations'"))
group.add_argument("-a", "--alignment", action="append", dest="alignment",
help=("alignment file connecting <filename> and <thermophile> "
"[<thermophile>.pir]"))
group.add_argument("-m", "--mutation", action="append", dest="mutations",
help=("specifying mutation labels which is used to modify "
"<filename> according to, e.g. N25R/N181D"))
group.add_argument("-v", "--version", dest="version_label", default="Jan15",
help="specifying the sub-version of propka [Jan15/Dec19]")
group.add_argument("-p", "--parameters", dest="parameters",
default=pkg_resources.resource_filename(__name__, "propka.cfg"),
help="set the parameter file [%(default)s]")
group.add_argument(
"-f", "--file", action="append", dest="filenames", default=[],
help="read data from <filename>, i.e. <filename> is added to arguments")
group.add_argument(
"-r", "--reference", dest="reference", default="neutral",
help=("setting which reference to use for stability calculations "
"[neutral/low-pH]"))
group.add_argument(
"-c", "--chain", action="append", dest="chains",
help=('creating the protein with only a specified chain. Specify '
'" " for chains without ID [all]'))
group.add_argument(
"-i", "--titrate_only", dest="titrate_only",
help=('Treat only the specified residues as titratable. Value should '
'be a comma-separated list of "chain:resnum" values; for example: '
'-i "A:10,A:11"'))
group.add_argument(
"-t", "--thermophile", action="append", dest="thermophiles",
help=("defining a thermophile filename; usually used in "
"'alignment-mutations'"))
group.add_argument(
"-a", "--alignment", action="append", dest="alignment",
help=("alignment file connecting <filename> and <thermophile> "
"[<thermophile>.pir]"))
group.add_argument(
"-m", "--mutation", action="append", dest="mutations",
help=("specifying mutation labels which is used to modify "
"<filename> according to, e.g. N25R/N181D"))
group.add_argument(
"-v", "--version", dest="version_label", default="Jan15",
help="specifying the sub-version of propka [Jan15/Dec19]")
group.add_argument(
"-p", "--parameters", dest="parameters",
default=pkg_resources.resource_filename(__name__, "propka.cfg"),
help="set the parameter file [{default:s}]")
try:
group.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="logging level verbosity", default="INFO")
group.add_argument(
"--log-level",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="logging level verbosity", default="INFO")
except argparse.ArgumentError:
# It is possible that --log-level has already been set by APBS
pass
group.add_argument("-o", "--pH", dest="pH", type=float, default=7.0,
help="setting pH-value used in e.g. stability calculations [7.0]")
group.add_argument("-w", "--window", dest="window", nargs=3, type=float,
default=(0.0, 14.0, 1.0),
help=("setting the pH-window to show e.g. stability profiles "
"[0.0, 14.0, 1.0]"))
group.add_argument("-g", "--grid", dest="grid", nargs=3, type=float,
default=(0.0, 14.0, 0.1),
help=("setting the pH-grid to calculate e.g. stability "
"related properties [0.0, 14.0, 0.1]"))
group.add_argument("--mutator", dest="mutator",
help="setting approach for mutating <filename> [alignment/scwrl/jackal]")
group.add_argument("--mutator-option", dest="mutator_options", action="append",
help="setting property for mutator [e.g. type=\"side-chain\"]")
group.add_argument("-d", "--display-coupled-residues", dest="display_coupled_residues",
action="store_true", help=("Displays alternative pKa values due "
"to coupling of titratable groups"))
group.add_argument("-l", "--reuse-ligand-mol2-files", dest="reuse_ligand_mol2_file",
action="store_true", default=False,
help=("Reuses the ligand mol2 files allowing the user to alter "
"ligand bond orders"))
group.add_argument("-k", "--keep-protons", dest="keep_protons", action="store_true",
help="Keep protons in input file", default=False)
group.add_argument("-q", "--quiet", action="store_const", const="WARNING",
dest="log_level", help="suppress non-warning messages")
group.add_argument("--protonate-all", dest="protonate_all", action="store_true",
help="Protonate all atoms (will not influence pKa calculation)",
default=False)
group.add_argument(
"-o", "--pH", dest="pH", type=float, default=7.0,
help="setting pH-value used in e.g. stability calculations [7.0]")
group.add_argument(
"-w", "--window", dest="window", nargs=3, type=float,
default=(0.0, 14.0, 1.0),
help=("setting the pH-window to show e.g. stability profiles "
"[0.0, 14.0, 1.0]"))
group.add_argument(
"-g", "--grid", dest="grid", nargs=3, type=float,
default=(0.0, 14.0, 0.1),
help=("setting the pH-grid to calculate e.g. stability "
"related properties [0.0, 14.0, 0.1]"))
group.add_argument(
"--mutator", dest="mutator",
help="setting approach for mutating <filename> [alignment/scwrl/jackal]")
group.add_argument(
"--mutator-option", dest="mutator_options", action="append",
help="setting property for mutator [e.g. type=\"side-chain\"]")
group.add_argument(
"-d", "--display-coupled-residues", dest="display_coupled_residues",
action="store_true",
help=("Displays alternative pKa values due "
"to coupling of titratable groups"))
group.add_argument(
"-l", "--reuse-ligand-mol2-files", dest="reuse_ligand_mol2_file",
action="store_true", default=False,
help=("Reuses the ligand mol2 files allowing the user to alter "
"ligand bond orders"))
group.add_argument(
"-k", "--keep-protons", dest="keep_protons", action="store_true",
help="Keep protons in input file", default=False)
group.add_argument(
"-q", "--quiet", action="store_const", const="WARNING",
dest="log_level", help="suppress non-warning messages")
group.add_argument(
"--protonate-all", dest="protonate_all", action="store_true",
help="Protonate all atoms (will not influence pKa calculation)",
default=False)
return parser
@@ -234,13 +291,11 @@ def loadOptions(args=None):
"""
# loading the parser
parser = build_parser()
# parsing and returning options and arguments
options = parser.parse_args(args)
# adding specified filenames to arguments
options.filenames.append(options.input_pdb)
# Convert titrate_only string to a list of (chain, resnum) items:
if options.titrate_only is not None:
res_list = []
@@ -248,76 +303,109 @@ def loadOptions(args=None):
try:
chain, resnum, inscode = parse_res_string(res_str)
except ValueError:
logger.critical('Invalid residue string: "%s"' % res_str)
_LOGGER.critical(
'Invalid residue string: "{0:s}"'.format(res_str))
sys.exit(1)
res_list.append((chain, resnum, inscode))
options.titrate_only = res_list
# Set the no-print variable
level = getattr(logging, options.log_level)
logger.setLevel(level)
_LOGGER.setLevel(level)
# done!
return options
def makeTidyAtomLabel(name,element):
"""
Returns a 'tidier' atom label for printing the new pdbfile
"""
def make_tidy_atom_label(name, element):
"""Returns a 'tidier' atom label for printing to the new PDB file.
if len(name)>4:# if longer than 4, just truncate the name
label=name[0:4]
elif len(name)==4:# if lenght is 4, otherwise use the name as it is
Args:
name: atom name
element: atom element
Returns:
string
"""
if len(name) > 4: # if longer than 4, just truncate the name
label = name[0:4]
elif len(name) == 4: # if length is 4, otherwise use the name as it is
label = name
else: # if less than 4 characters long, insert white space as needed
if len(element)==1:
label = ' %-3s'%name
else: # The element shoul occupy the two first chars
label = '%-4s'%name
if len(element) == 1:
label = ' {0:<3s}'.format(name)
else: # The element should occupy the two first chars
label = '{0:<4s}'.format(name)
return label
def get_sorted_configurations(configuration_keys):
"""
extract and sort configurations
"""Extract and sort configurations.
Args:
configuration_keys: list of configuration keys
Returns:
list of configurations
"""
configurations = list(configuration_keys)
configurations.sort(key=configuration_compare)
return configurations
def configuration_compare(conf):
"""TODO - figure out what this function does."""
return 100*int(conf[1:-2]) + ord(conf[-1])
def write_file(filename, lines):
"""Writes a new file.
def writeFile(filename, lines):
Args:
filename: name of file
lines: lines to write to file
"""
Writes a new file
"""
f = open_file_for_writing(filename)
file_ = open_file_for_writing(filename)
for line in lines:
f.write( "%s\n" % (line) )
f.close()
file_.write("{0:s}\n".format(line))
file_.close()
def _args_to_str(arg_list):
"""Summarize list of arguments in string.
Args:
arg_list: list of arguments
Returns:
string
"""
return " ".join(map(str, arg_list))
def info(*args):
"""Log a message. Level defaults to INFO unless overridden."""
logger.info(_args_to_str(args))
"""Log a message to info.
Level defaults to INFO unless overridden.
Args:
args: argument list
"""
_LOGGER.info(_args_to_str(args))
def debug(*args):
"""Log a message on the DEBUG level."""
logger.debug(_args_to_str(args))
"""Log a message to debug.
Level defaults to DEBUG unless overridden.
Args:
args: argument list
"""
_LOGGER.debug(_args_to_str(args))
def warning(*args):
"""Log a WARN message"""
logger.warning(_args_to_str(args))
"""Log a message to warning.
Level defaults to WARNING unless overridden.
Args:
args: argument list
"""
_LOGGER.warning(_args_to_str(args))

View File

@@ -1,16 +1,9 @@
#!/usr/bin/python
from __future__ import division
from __future__ import print_function
import sys
import propka.calculations
from propka.vector_algebra import *
from propka.lib import info, warning
"""Ligand classes and functions."""
from propka.calculations import squared_distance
from propka.vector_algebra import Vector
all_sybyl_types = [
ALL_SYBYL_TYPES = [
'C.3', # carbon sp3
'H', # hydrogen
'C.2', # carbon sp2
@@ -66,344 +59,277 @@ all_sybyl_types = [
'Sn'] # tin
#propka_input_types = ['1P','1N','2P','2N']
#for type in all_sybyl_types:
# temp = type.replace('.','')
# if len(temp)>3:
# temp = temp[0:3]
# propka_input_types.append(temp)
#
#for t in propka_input_types:
# print (t)
propka_input_types = [
'1P',
'1N',
'2P',
'2N',
'C3',
'H',
'C2',
'Hsp',
'C1',
'Ht3',
'Car',
'LP',
'Cca',
'Du',
'N3',
'DuC',
'N2',
'Any',
'N1',
'Hal',
'Nar',
'Het',
'Nam',
'Hev',
'Npl',
'Li',
'N4',
'Na',
'O3',
'Mg',
'O2',
'Al',
'Oco',
'Si',
'Osp',
'K',
'Ot3',
'Ca',
'S3',
'Crt',
'S2',
'Cro',
'SO',
'Mn',
'SO2',
'Fe',
'P3',
'Coo',
'F',
'Cu',
'Cl',
'Zn',
'Br',
'Se',
'I',
'Mo',
'Sn']
max_C_double_bond = 1.3
max_C_triple_bond = 1.2
max_C_double_bond_squared = max_C_double_bond*max_C_double_bond
max_C_triple_bond_squared = max_C_triple_bond*max_C_triple_bond
PROPKA_INPUT_TYPES = ['1P', '1N', '2P', '2N', 'C3', 'H', 'C2', 'Hsp', 'C1',
'Ht3', 'Car', 'LP', 'Cca', 'Du', 'N3', 'DuC', 'N2',
'Any', 'N1', 'Hal', 'Nar', 'Het', 'Nam', 'Hev', 'Npl',
'Li', 'N4', 'Na', 'O3', 'Mg', 'O2', 'Al', 'Oco', 'Si',
'Osp', 'K', 'Ot3', 'Ca', 'S3', 'Crt', 'S2', 'Cro', 'SO',
'Mn', 'SO2', 'Fe', 'P3', 'Coo', 'F', 'Cu', 'Cl', 'Zn',
'Br', 'Se', 'I', 'Mo', 'Sn']
MAX_C_DOUBLE_BOND = 1.3
MAX_C_TRIPLE_BOND = 1.2
MAX_C_DOUBLE_BOND_SQUARED = MAX_C_DOUBLE_BOND*MAX_C_DOUBLE_BOND
MAX_C_TRIPLE_BOND_SQUARED = MAX_C_TRIPLE_BOND*MAX_C_TRIPLE_BOND
PLANARITY_MARGIN = 0.20
def assign_sybyl_type(atom):
"""Assign Sybyl type to atom.
Args:
atom: atom to assign
"""
# check if we already have assigned a name to this atom
if atom.sybyl_assigned:
#info(atom.name,'already assigned')
return
# find some properties of the atom
ring_atoms = is_ring_member(atom)
aromatic = is_aromatic_ring(ring_atoms)
planar = is_planar(atom)
aromatic = is_aromatic_ring(ring_atoms)
planar = is_planar(atom)
bonded_elements = {}
for i in range(len(atom.bonded_atoms)):
bonded_elements[i]=atom.bonded_atoms[i].element
for i, bonded_atom in enumerate(atom.bonded_atoms):
bonded_elements[i] = bonded_atom.element
# Aromatic carbon/nitrogen
if aromatic:
for ra in ring_atoms:
if ra.element in ['C','N']:
set_type(ra, ra.element+'.ar')
for ring_atom in ring_atoms:
if ring_atom.element in ['C', 'N']:
set_type(ring_atom, ring_atom.element+'.ar')
return
# check for amide
if atom.element in ['O','N','C']:
O=None
N=None
C=None
if atom.element in ['O', 'N', 'C']:
o_atom = None
n_atom = None
c_atom = None
# oxygen, nitrogen
if atom.element in ['O','N']:
for b in atom.get_bonded_elements('C'):
for bb in b.bonded_atoms:
if (bb.element =='N' and atom.element == 'O'):
O=atom
C=b
N=bb
elif (bb.element =='O' and atom.element == 'N'):
N=atom
C=b
O=bb
if atom.element in ['O', 'N']:
for bonded_elem in atom.get_bonded_elements('C'):
for bonded_atom in bonded_elem.bonded_atoms:
if (bonded_atom.element == 'N' and atom.element == 'O'):
o_atom = atom
c_atom = bonded_elem
n_atom = bonded_atom
elif (bonded_atom.element == 'O' and atom.element == 'N'):
n_atom = atom
c_atom = bonded_elem
o_atom = bonded_atom
# carbon
if atom.element == 'C':
nitrogens = atom.get_bonded_elements('N')
oxygens = atom.get_bonded_elements('O')
if len(nitrogens)==1 and len(oxygens)==1:
C = atom
N = nitrogens[0]
O = oxygens[0]
if C and N and O:
# make sure that the Nitrogen is not aromatic and that it has two heavy atom bonds
if not is_aromatic_ring(is_ring_member(N)) and len(N.get_bonded_heavy_atoms())==2:
set_type(N,'N.am')
set_type(C,'C.2')
set_type(O,'O.2')
if len(nitrogens) == 1 and len(oxygens) == 1:
c_atom = atom
n_atom = nitrogens[0]
o_atom = oxygens[0]
if c_atom and n_atom and o_atom:
# make sure that the Nitrogen is not aromatic and that it has two
# heavy atom bonds
if (not is_aromatic_ring(is_ring_member(n_atom))
and len(n_atom.get_bonded_heavy_atoms()) == 2):
set_type(n_atom, 'N.am')
set_type(c_atom, 'C.2')
set_type(o_atom, 'O.2')
return
if atom.element=='C':
if atom.element == 'C':
# check for carboxyl
if len(atom.bonded_atoms)==3 and list(bonded_elements.values()).count('O')==2:
i1 = list(bonded_elements.values()).index('O')
i2 = list(bonded_elements.values()).index('O',i1+1)
if len(atom.bonded_atoms[i1].bonded_atoms)==1 and len(atom.bonded_atoms[i2].bonded_atoms)==1:
set_type(atom.bonded_atoms[i1],'O.co2-')
set_type(atom.bonded_atoms[i2],'O.co2')
set_type(atom,'C.2')
if (len(atom.bonded_atoms) == 3
and list(bonded_elements.values()).count('O') == 2):
index1 = list(bonded_elements.values()).index('O')
index2 = list(bonded_elements.values()).index('O', index1+1)
if (len(atom.bonded_atoms[index1].bonded_atoms) == 1
and len(atom.bonded_atoms[index2].bonded_atoms) == 1):
set_type(atom.bonded_atoms[index1], 'O.co2-')
set_type(atom.bonded_atoms[index2], 'O.co2')
set_type(atom, 'C.2')
return
# sp carbon
if len(atom.bonded_atoms)<=2:
for b in atom.bonded_atoms:
if propka.calculations.squared_distance(atom, b) < max_C_triple_bond_squared:
set_type(atom,'C.1')
set_type(b,b.element+'.1')
if len(atom.bonded_atoms) <= 2:
for bonded_atom in atom.bonded_atoms:
if (squared_distance(atom, bonded_atom)
< MAX_C_TRIPLE_BOND_SQUARED):
set_type(atom, 'C.1')
set_type(bonded_atom, bonded_atom.element + '.1')
if atom.sybyl_assigned:
return
# sp2 carbon
if planar:
set_type(atom,'C.2')
set_type(atom, 'C.2')
# check for N.pl3
for b in atom.bonded_atoms:
if b.element=='N':
if len(b.bonded_atoms)<3 or is_planar(b):
set_type(b,'N.pl3')
for bonded_atom in atom.bonded_atoms:
if bonded_atom.element == 'N':
if (len(bonded_atom.bonded_atoms) < 3
or is_planar(bonded_atom)):
set_type(bonded_atom, 'N.pl3')
return
# sp3 carbon
set_type(atom, 'C.3')
return
# Nitrogen
if atom.element == 'N':
# check for planar N
if len(atom.bonded_atoms)==1:
if len(atom.bonded_atoms) == 1:
if is_planar(atom.bonded_atoms[0]):
set_type(atom,'N.pl3')
set_type(atom, 'N.pl3')
return
if planar:
set_type(atom,'N.pl3')
set_type(atom, 'N.pl3')
return
set_type(atom,'N.3')
set_type(atom, 'N.3')
return
# Oxygen
if atom.element == 'O':
set_type(atom,'O.3')
set_type(atom, 'O.3')
if len(atom.bonded_atoms) == 1:
# check for carboxyl
if atom.bonded_atoms[0].element == 'C':
the_carbon = atom.bonded_atoms[0]
if len(the_carbon.bonded_atoms)==3 and the_carbon.count_bonded_elements('O')==2:
[O1,O2] = the_carbon.get_bonded_elements('O')
if len(O1.bonded_atoms)==1 and len(O2.bonded_atoms)==1:
set_type(O1,'O.co2-')
set_type(O2,'O.co2')
set_type(the_carbon,'C.2')
if (len(the_carbon.bonded_atoms) == 3
and the_carbon.count_bonded_elements('O') == 2):
[oxy1, oxy2] = the_carbon.get_bonded_elements('O')
if (len(oxy1.bonded_atoms) == 1
and len(oxy2.bonded_atoms) == 1):
set_type(oxy1, 'O.co2-')
set_type(oxy2, 'O.co2')
set_type(the_carbon, 'C.2')
return
# check for X=O
if propka.calculations.squared_distance(atom, atom.bonded_atoms[0]) < max_C_double_bond_squared:
set_type(atom,'O.2')
if atom.bonded_atoms[0].element=='C':
set_type(atom.bonded_atoms[0],'C.2')
if (squared_distance(atom, atom.bonded_atoms[0])
< MAX_C_DOUBLE_BOND_SQUARED):
set_type(atom, 'O.2')
if atom.bonded_atoms[0].element == 'C':
set_type(atom.bonded_atoms[0], 'C.2')
return
# Sulphur
if atom.element == 'S':
# check for SO2
if list(bonded_elements.values()).count('O')==2:
i1 = list(bonded_elements.values()).index('O')
i2 = list(bonded_elements.values()).index('O',i1+1)
set_type(atom.bonded_atoms[i1],'O.2')
set_type(atom.bonded_atoms[i2],'O.2')
set_type(atom,'S.o2')
if list(bonded_elements.values()).count('O') == 2:
index1 = list(bonded_elements.values()).index('O')
index2 = list(bonded_elements.values()).index('O', index1+1)
set_type(atom.bonded_atoms[index1], 'O.2')
set_type(atom.bonded_atoms[index2], 'O.2')
set_type(atom, 'S.o2')
return
# check for SO4
if list(bonded_elements.values()).count('O')==4:
no_O2 = 0
if list(bonded_elements.values()).count('O') == 4:
no_o2 = 0
for i in range(len(atom.bonded_atoms)):
if len(atom.bonded_atoms[i].bonded_atoms)==1 and no_O2<2:
set_type(atom.bonded_atoms[i],'O.2')
no_O2+=1
if len(atom.bonded_atoms[i].bonded_atoms) == 1 and no_o2 < 2:
set_type(atom.bonded_atoms[i], 'O.2')
no_o2 += 1
else:
set_type(atom.bonded_atoms[i],'O.3')
set_type(atom,'S.3')
set_type(atom.bonded_atoms[i], 'O.3')
set_type(atom, 'S.3')
return
# Phosphorus
if atom.element == 'P':
set_type(atom,'P.3')
set_type(atom, 'P.3')
# check for phosphate group
bonded_oxygens = atom.get_bonded_elements('O')
for o in bonded_oxygens: set_type(o,'O.3')
# if len(bonded_oxygens)>=3:
# # find oxygens only bonded to current phosphorus
# bonded_oxygens_1 = [o for o in bonded_oxygens if len(o.get_bonded_heavy_atoms())==1]
# # find the closest oxygen ...
# closest_oxygen = min(bonded_oxygens_1,
# key= lambda o:propka.calculations.squared_distance(atom,o))
# # ... and set it to O.2
# set_type(closest_oxygen,'O.2')
for o_atom in bonded_oxygens:
set_type(o_atom, 'O.3')
return
element = atom.element.capitalize()
set_type(atom,element)
# info('Using element as type for %s'%atom.element)
return
set_type(atom, element)
def is_ring_member(atom):
return identify_ring(atom,atom,0,[])
"""Determine if atom is a member of a ring.
Args:
atom: atom to test
Returns:
list of atoms
"""
return identify_ring(atom, atom, 0, [])
def identify_ring(this_atom, original_atom, number, past_atoms):
number+=1
past_atoms=past_atoms+[this_atom]
"""Identify the atoms in a ring
Args:
this_atom: atom to test
original_atom: some other atom
number: number of atoms
past_atoms: atoms that have already been found
Returns:
list of atoms
"""
number += 1
past_atoms = past_atoms + [this_atom]
return_atoms = []
if number > 10:
return return_atoms
for atom in this_atom.get_bonded_heavy_atoms():
if atom == original_atom and number>2:
if atom == original_atom and number > 2:
return past_atoms
if atom not in past_atoms:
these_return_atoms = identify_ring(atom, original_atom, number, past_atoms)
these_return_atoms = identify_ring(atom, original_atom, number,
past_atoms)
if len(these_return_atoms) > 0:
if len(return_atoms)>len(these_return_atoms) or len(return_atoms)==0:
if (len(return_atoms) > len(these_return_atoms)
or len(return_atoms) == 0):
return_atoms = these_return_atoms
return return_atoms
def set_type(atom, type_):
"""Set atom type..
def set_type(atom,type):
#info(atom, '->',type)
atom.sybyl_type = type
atom.sybyl_assigned=True
return
Args:
atom: atom to set
type_: type value to set
"""
atom.sybyl_type = type_
atom.sybyl_assigned = True
def is_planar(atom):
""" Finds out if atom forms a plane together with its bonded atoms"""
atoms = [atom]+atom.bonded_atoms
"""Finds out if atom forms a plane together with its bonded atoms.
Args:
atom: atom to test
Returns:
Boolean
"""
atoms = [atom] + atom.bonded_atoms
return are_atoms_planar(atoms)
def are_atoms_planar(atoms):
if len(atoms)==0:
return False
if len(atoms)<4:
return False
v1 = vector(atom1=atoms[0], atom2=atoms[1])
v2 = vector(atom1=atoms[0], atom2=atoms[2])
n = (v1**v2).rescale(1.0)
"""Test whether a group of atoms are planar.
margin = 0.20
for b in atoms[3:]:
v = vector(atom1=atoms[0], atom2=b).rescale(1.0)
#info(atoms[0],abs(v*n) )
if abs(v*n)>margin:
Args:
atoms: list of atoms
Returns:
Boolean
"""
if len(atoms) == 0:
return False
if len(atoms) < 4:
return False
vec1 = Vector(atom1=atoms[0], atom2=atoms[1])
vec2 = Vector(atom1=atoms[0], atom2=atoms[2])
norm = (vec1**vec2).rescale(1.0)
margin = PLANARITY_MARGIN
for atom in atoms[3:]:
vec = Vector(atom1=atoms[0], atom2=atom).rescale(1.0)
if abs(vec*norm) > margin:
return False
return True
def is_aromatic_ring(atoms):
if len(atoms)<5:
return False
def is_aromatic_ring(atoms):
"""Determine whether group of atoms form aromatic ring.
Args:
atoms: list of atoms to test
Returns:
Boolean
"""
if len(atoms) < 5:
return False
for i in range(len(atoms)):
if not are_atoms_planar(atoms[i:]+atoms[:i]):
return False
return True

View File

@@ -1,15 +1,25 @@
#!/usr/bin/env python
from __future__ import division
from __future__ import print_function
"""Ligand pKa values"""
import os
import subprocess
import sys
import propka.molecular_container
import propka.calculations
import propka.parameters
import propka.pdb
import propka.lib
from propka.lib import info, warning
import propka.molecular_container, propka.calculations, propka.calculations, propka.parameters, propka.pdb, propka.lib, os, subprocess, sys
class ligand_pka_values:
class LigandPkaValues:
"""Ligand pKa value class."""
def __init__(self, parameters):
self.parameters = parameters
"""Initialize object with parameters.
Args:
parameters: parameters
"""
self.parameters = parameters
# attempt to find Marvin executables in the path
self.molconvert = self.find_in_path('molconvert')
self.cxcalc = self.find_in_path('cxcalc')
@@ -17,106 +27,173 @@ class ligand_pka_values:
info(self.cxcalc)
info(self.molconvert)
return
@staticmethod
def find_in_path(program):
"""Find a program in the system path.
def find_in_path(self, program):
Args:
program: program to find
Returns:
location of program
"""
path = os.environ.get('PATH').split(os.pathsep)
l = [i for i in filter(lambda loc: os.access(loc, os.F_OK),
map(lambda dir: os.path.join(dir, program),path))]
if len(l) == 0:
info('Error: Could not find %s. Please make sure that it is found in the path.' % program)
locs = [
i for i in filter(lambda loc: os.access(loc, os.F_OK),
map(lambda dir: os.path.join(dir, program),
path))]
if len(locs) == 0:
str_ = "'Error: Could not find {0:s}.".format(program)
str_ += ' Please make sure that it is found in the path.'
info(str_)
sys.exit(-1)
return locs[0]
return l[0]
def get_marvin_pkas_for_pdb_file(self, pdbfile, num_pkas=10, min_ph=-10,
max_ph=20):
"""Use Marvin executables to get pKas for a PDB file.
def get_marvin_pkas_for_pdb_file(self, file, no_pkas=10, min_pH =-10, max_pH=20):
molecule = propka.molecular_container.Molecular_container(file)
self.get_marvin_pkas_for_molecular_container(molecule, no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH)
return
Args:
pdbfile: PDB file
num_pkas: number of pKas to get
min_ph: minimum pH value
max_ph: maximum pH value
"""
molecule = propka.molecular_container.Molecular_container(pdbfile)
self.get_marvin_pkas_for_molecular_container(
molecule, num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph)
def get_marvin_pkas_for_molecular_container(self, molecule, no_pkas=10, min_pH =-10, max_pH=20):
def get_marvin_pkas_for_molecular_container(self, molecule, num_pkas=10,
min_ph=-10, max_ph=20):
"""Use Marvin executables to calculate pKas for a molecular container.
Args:
molecule: molecular container
num_pkas: number of pKas to calculate
min_ph: minimum pH value
max_ph: maximum pH value
"""
for name in molecule.conformation_names:
filename = '%s_%s'%(molecule.name,name)
self.get_marvin_pkas_for_conformation_container(molecule.conformations[name], name=filename, reuse=molecule.options.reuse_ligand_mol2_file,
no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH)
filename = '{0:s}_{1:s}'.format(molecule.name, name)
self.get_marvin_pkas_for_conformation_container(
molecule.conformations[name], name=filename,
reuse=molecule.options.reuse_ligand_mol2_file,
num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph)
return
def get_marvin_pkas_for_conformation_container(self, conformation,
name='temp', reuse=False,
num_pkas=10, min_ph=-10,
max_ph=20):
"""Use Marvin executables to calculate pKas for a conformation container.
def get_marvin_pkas_for_conformation_container(self, conformation, name = 'temp', reuse=False, no_pkas=10, min_pH =-10, max_pH=20):
Args:
conformation: conformation container
name: filename
reuse: flag to reuse the structure files
num_pkas: number of pKas to calculate
min_ph: minimum pH value
max_ph: maximum pH value
"""
conformation.marvin_pkas_calculated = True
self.get_marvin_pkas_for_atoms(conformation.get_heavy_ligand_atoms(), name=name, reuse=reuse,
no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH)
self.get_marvin_pkas_for_atoms(
conformation.get_heavy_ligand_atoms(), name=name, reuse=reuse,
num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph)
return
def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False,
num_pkas=10, min_ph=-10, max_ph=20):
"""Use Marvin executables to calculate pKas for a list of atoms.
def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False, no_pkas=10, min_pH =-10, max_pH=20):
Args:
atoms: list of atoms
name: filename
reuse: flag to reuse the structure files
num_pkas: number of pKas to calculate
min_ph: minimum pH value
max_ph: maximum pH value
"""
# do one molecule at the time so we don't confuse marvin
molecules = propka.lib.split_atoms_into_molecules(atoms)
for i in range(len(molecules)):
filename = '%s_%d.mol2'%(name, i+1)
self.get_marvin_pkas_for_molecule(molecules[i], filename=filename, reuse=reuse, no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH)
for i, molecule in enumerate(molecules):
filename = '{0:s}_{1:d}.mol2'.format(name, i+1)
self.get_marvin_pkas_for_molecule(
molecule, filename=filename, reuse=reuse, num_pkas=num_pkas,
min_ph=min_ph, max_ph=max_ph)
return
def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2',
reuse=False, num_pkas=10, min_ph=-10,
max_ph=20):
"""Use Marvin executables to calculate pKas for a molecule.
def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2', reuse=False, no_pkas=10, min_pH =-10, max_pH=20):
Args:
molecule: the molecule
name: filename
reuse: flag to reuse the structure files
num_pkas: number of pKas to calculate
min_ph: minimum pH value
max_ph: maximum pH value
"""
# print out structure unless we are using user-modified structure
if not reuse:
propka.pdb.write_mol2_for_atoms(atoms, filename)
# check that we actually have a file to work with
if not os.path.isfile(filename):
warning('Didn\'t find a user-modified file \'%s\' - generating one' % filename)
errstr = (
"Didn't find a user-modified file '{0:s}' "
"- generating one".format(
filename))
warning(errstr)
propka.pdb.write_mol2_for_atoms(atoms, filename)
# Marvin
# calculate pKa values
options = 'pka -a %d -b %d --min %f --max %f -d large'%(no_pkas, no_pkas, min_pH, max_pH)
(output,errors) = subprocess.Popen([self.cxcalc, filename]+options.split(),
stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate()
if len(errors)>0:
info('********************************************************************************************************')
info('* Warning: Marvin execution failed: *')
info('* %-100s *' % errors)
info('* *')
info('* Please edit the ligand mol2 file and re-run PropKa with the -l option: %29s *' % filename)
info('********************************************************************************************************')
# Marvin calculate pKa values
fmt = (
'pka -a {num1} -b {num2} --min {min_ph} '
'--max {max_ph} -d large')
options = (
fmt.format(
num1=num_pkas, num2=num_pkas, min_ph=min_ph, max_ph=max_ph))
(output, errors) = subprocess.Popen(
[self.cxcalc, filename]+options.split(), stdout=subprocess.PIPE,
stderr=subprocess.PIPE).communicate()
if len(errors) > 0:
info('***********************************************************'
'*********************************************')
info('* Warning: Marvin execution failed: '
' *')
info('* {0:<100s} *'.format(errors))
info('* '
' *')
info('* Please edit the ligand mol2 file and re-run PropKa with '
'the -l option: {0:>29s} *'.format(filename))
info('***********************************************************'
'*********************************************')
sys.exit(-1)
# extract calculated pkas
indices,pkas,types = self.extract_pkas(output)
indices, pkas, types = self.extract_pkas(output)
# store calculated pka values
for i in range(len(indices)):
atoms[indices[i]].marvin_pka = pkas[i]
atoms[indices[i]].charge = {'a':-1,'b':+1}[types[i]]
info('%s model pKa: %.2f' % (atoms[indices[i]], pkas[i]))
for i, index in enumerate(indices):
atoms[index].marvin_pka = pkas[i]
atoms[index].charge = {'a': -1, 'b': 1}[types[i]]
info('{0:s} model pKa: {1:<.2f}'.format(atoms[index], pkas[i]))
return
@staticmethod
def extract_pkas(output):
"""Extract pKa value from output.
def extract_pkas(self, output):
Args:
output: output string to parse
Returns:
1. Indices
2. Values
3. Types
"""
# split output
[tags, values,empty_line] = output.decode().split('\n')
#info(tags)
#info(values)
[tags, values, _] = output.decode().split('\n')
tags = tags.split('\t')
values = values.split('\t')
# format values
types = [tags[i][0] for i in range(1,len(tags)-1) if len(values)>i and values[i] != '']
indices = [int(a)-1 for a in values[-1].split(',') if a !='']
values = [float(v.replace(',','.')) for v in values[1:-1] if v != '']
types = [
tags[i][0] for i in range(1, len(tags)-1)
if len(values) > i and values[i] != '']
indices = [int(a)-1 for a in values[-1].split(',') if a != '']
values = [float(v.replace(',', '.')) for v in values[1:-1] if v != '']
if len(indices) != len(values) != len(types):
raise Exception('Lengths of atoms and pka values mismatch')
return indices, values, types

View File

@@ -1,145 +1,148 @@
#!/usr/bin/python
#
# Molecular container for storing all contents of pdb files
#
#
from __future__ import division
from __future__ import print_function
import os, sys
import propka.pdb, propka.version, propka.output, propka.conformation_container, propka.group, propka.lib
"""Molecular container for storing all contents of PDB files."""
import os
import sys
import propka.pdb
import propka.version
import propka.output
import propka.group
import propka.lib
from propka.conformation_container import ConformationContainer
from propka.lib import info, warning
class Molecular_container:
def __init__(self, input_file, options=None):
# printing out header before parsing input
propka.output.printHeader()
# TODO - these are constants whose origins are a little murky
UNK_PI_CUTOFF = 0.01
# Maximum number of iterations for finding PI
MAX_ITERATION = 4
class Molecular_container:
"""Container for storing molecular contents of PDB files.
TODO - this class name does not conform to PEP8 but has external use.
We should deprecate and change eventually.
"""
def __init__(self, input_file, options=None):
"""Initialize molecular container.
Args:
input_file: molecular input file
options: options object
"""
# printing out header before parsing input
propka.output.print_header()
# set up some values
self.options = options
self.input_file = input_file
# TODO - replace this indelicate os.path code with pathlib
self.dir = os.path.split(input_file)[0]
self.file = os.path.split(input_file)[1]
self.name = self.file[0:self.file.rfind('.')]
input_file_extension = input_file[input_file.rfind('.'):]
# set the version
if options:
parameters = propka.parameters.Parameters(self.options.parameters)
else:
parameters = propka.parameters.Parameters('propka.cfg')
try:
exec('self.version = propka.version.%s(parameters)'%parameters.version)
except:
raise Exception('Error: Version %s does not exist'%parameters.version)
version_class = getattr(propka.version, parameters.version)
self.version = version_class(parameters)
except AttributeError as err:
print(err)
errstr = 'Error: Version {0:s} does not exist'.format(
parameters.version)
raise Exception(errstr)
# read the input file
if input_file_extension[0:4] == '.pdb':
# input is a pdb file
# read in atoms and top up containers to make sure that all atoms are present in all conformations
[self.conformations, self.conformation_names] = propka.pdb.read_pdb(input_file, self.version.parameters,self)
if len(self.conformations)==0:
info('Error: The pdb file does not seems to contain any molecular conformations')
# input is a pdb file. read in atoms and top up containers to make
# sure that all atoms are present in all conformations
[self.conformations, self.conformation_names] = (
propka.pdb.read_pdb(input_file, self.version.parameters, self))
if len(self.conformations) == 0:
info('Error: The pdb file does not seems to contain any '
'molecular conformations')
sys.exit(-1)
self.top_up_conformations()
# make a structure precheck
propka.pdb.protein_precheck(self.conformations, self.conformation_names)
propka.pdb.protein_precheck(self.conformations,
self.conformation_names)
# set up atom bonding and protonation
self.version.setup_bonding_and_protonation(self)
# Extract groups
self.extract_groups()
# sort atoms
for name in self.conformation_names:
self.conformations[name].sort_atoms()
# find coupled groups
self.find_covalently_coupled_groups()
# write out the input file
filename = self.file.replace(input_file_extension,'.propka_input')
filename = self.file.replace(input_file_extension, '.propka_input')
propka.pdb.write_input(self, filename)
elif input_file_extension == '.propka_input':
#input is a propka_input file
[self.conformations, self.conformation_names] = propka.pdb.read_input(input_file, self.version.parameters, self)
# Extract groups - this merely sets up the groups found in the input file
[self.conformations, self.conformation_names] = (
propka.pdb.read_input(input_file, self.version.parameters,
self))
# Extract groups - this merely sets up the groups found in the
# input file
self.extract_groups()
# do some additional set up
self.additional_setup_when_reading_input_file()
else:
info('Unrecognized input file:%s' % input_file)
info('Unrecognized input file:{0:s}'.format(input_file))
sys.exit(-1)
return
def top_up_conformations(self):
""" Makes sure that all atoms are present in all conformations """
"""Makes sure that all atoms are present in all conformations."""
for name in self.conformation_names:
if name!='1A' and (len(self.conformations[name]) < len(self.conformations['1A'])):
if (name != '1A' and (len(self.conformations[name])
< len(self.conformations['1A']))):
self.conformations[name].top_up(self.conformations['1A'])
return
def find_covalently_coupled_groups(self):
"""Find covalently coupled groups."""
info('-' * 103)
for name in self.conformation_names:
self.conformations[name].find_covalently_coupled_groups()
return
def find_non_covalently_coupled_groups(self):
"""Find non-covalently coupled groups."""
info('-' * 103)
verbose = self.options.display_coupled_residues
for name in self.conformation_names:
self.conformations[name].find_non_covalently_coupled_groups(verbose=self.options.display_coupled_residues)
return
self.conformations[name].find_non_covalently_coupled_groups(
verbose=verbose)
def extract_groups(self):
""" Identify the groups needed for pKa calculation """
"""Identify the groups needed for pKa calculation."""
for name in self.conformation_names:
self.conformations[name].extract_groups()
return
def additional_setup_when_reading_input_file(self):
"""Additional setup."""
for name in self.conformation_names:
self.conformations[name].additional_setup_when_reading_input_file()
return
def calculate_pka(self):
"""Calculate pKa values."""
# calculate for each conformation
for name in self.conformation_names:
self.conformations[name].calculate_pka(self.version, self.options)
self.conformations[name].calculate_pka(
self.version, self.options)
# find non-covalently coupled groups
self.find_non_covalently_coupled_groups()
# find the average of the conformations
self.average_of_conformations()
# print out the conformation-average results
propka.output.printResult(self, 'AVR', self.version.parameters)
return
propka.output.print_result(self, 'AVR', self.version.parameters)
def average_of_conformations(self):
"""Generate an average of conformations."""
parameters = self.conformations[self.conformation_names[0]].parameters
# make a new configuration to hold the average values
avr_conformation = propka.conformation_container.Conformation_container(name='average',
parameters=self.conformations[self.conformation_names[0]].parameters,
molecular_container=self)
avr_conformation = ConformationContainer(
name='average', parameters=parameters, molecular_container=self)
container = self.conformations[self.conformation_names[0]]
for group in container.get_groups_for_calculations():
# new group to hold average values
@@ -150,104 +153,135 @@ class Molecular_container:
if group_to_add:
avr_group += group_to_add
else:
warning('Group %s could not be found in conformation %s.' % (group.atom.residue_label, name))
str_ = (
'Group {0:s} could not be found in '
'conformation {0:s}.'.format(
group.atom.residue_label, name))
warning(str_)
# ... and store the average value
avr_group = avr_group / len(self.conformation_names)
avr_conformation.groups.append(avr_group)
# store information on coupling in the average container
if len(list(filter(lambda c: c.non_covalently_coupled_groups, self.conformations.values()))):
if len(list(filter(lambda c: c.non_covalently_coupled_groups,
self.conformations.values()))):
avr_conformation.non_covalently_coupled_groups = True
# store chain info
avr_conformation.chains = self.conformations[self.conformation_names[0]].chains
avr_conformation.chains = self.conformations[
self.conformation_names[0]].chains
self.conformations['AVR'] = avr_conformation
return
def write_pka(self, filename=None, reference="neutral", direction="folding", options=None):
#for name in self.conformation_names:
# propka.output.writePKA(self, self.version.parameters, filename='%s_3.1_%s.pka'%(self.name, name),
# conformation=name,reference=reference,
# direction=direction, options=options)
def write_pka(self, filename=None, reference="neutral",
direction="folding", options=None):
"""Write pKa information to a file.
Args:
filename: file to write to
reference: reference state
direction: folding vs. unfolding
options: options object
"""
# write out the average conformation
filename=os.path.join('%s.pka'%(self.name))
# if the display_coupled_residues option is true,
# write the results out to an alternative pka file
filename = os.path.join('{0:s}.pka'.format(self.name))
# if the display_coupled_residues option is true, write the results out
# to an alternative pka file
if self.options.display_coupled_residues:
filename=os.path.join('%s_alt_state.pka'%(self.name))
filename = os.path.join('{0:s}_alt_state.pka'.format(self.name))
if (hasattr(self.version.parameters, 'output_file_tag')
and len(self.version.parameters.output_file_tag) > 0):
filename = os.path.join(
'{0:s}_{1:s}.pka'.format(
self.name, self.version.parameters.output_file_tag))
propka.output.write_pka(
self, self.version.parameters, filename=filename,
conformation='AVR', reference=reference)
if hasattr(self.version.parameters, 'output_file_tag') and len(self.version.parameters.output_file_tag)>0:
filename=os.path.join('%s_%s.pka'%(self.name,self.version.parameters.output_file_tag))
def get_folding_profile(self, conformation='AVR', reference="neutral",
grid=[0., 14., 0.1]):
"""Get a folding profile.
propka.output.writePKA(self, self.version.parameters, filename=filename,
conformation='AVR',reference=reference,
direction=direction, options=options)
return
def getFoldingProfile(self, conformation='AVR',reference="neutral", direction="folding", grid=[0., 14., 0.1], options=None):
Args:
conformation: conformation to select
reference: reference state
direction: folding direction (folding)
grid: the grid of pH values [min, max, step_size]
options: options object
Returns:
TODO - figure out what these are
1. profile
2. opt
3. range_80pct
4. stability_range
"""
# calculate stability profile
profile = []
for ph in propka.lib.make_grid(*grid):
ddg = self.conformations[conformation].calculate_folding_energy( pH=ph, reference=reference)
#info(ph,ddg)
conf = self.conformations[conformation]
ddg = conf.calculate_folding_energy(ph=ph, reference=reference)
profile.append([ph, ddg])
# find optimum
opt =[None, 1e6]
opt = [None, 1e6]
for point in profile:
opt = min(opt, point, key=lambda v:v[1])
opt = min(opt, point, key=lambda v: v[1])
# find values within 80 % of optimum
range_80pct = [None, None]
values_within_80pct = [p[0] for p in profile if p[1]< 0.8*opt[1]]
if len(values_within_80pct)>0:
values_within_80pct = [p[0] for p in profile if p[1] < 0.8*opt[1]]
if len(values_within_80pct) > 0:
range_80pct = [min(values_within_80pct), max(values_within_80pct)]
# find stability range
stability_range = [None, None]
stable_values = [p[0] for p in profile if p[1]< 0.0]
if len(stable_values)>0:
stable_values = [p[0] for p in profile if p[1] < 0.0]
if len(stable_values) > 0:
stability_range = [min(stable_values), max(stable_values)]
return profile, opt, range_80pct, stability_range
def get_charge_profile(self, conformation='AVR', grid=[0., 14., .1]):
"""Get charge profile for conformation as function of pH.
def getChargeProfile(self, conformation='AVR', grid=[0., 14., .1]):
Args:
conformation: conformation to test
grid: grid of pH values [min, max, step]
Returns:
list of charge state values
"""
charge_profile = []
for ph in propka.lib.make_grid(*grid):
q_unfolded, q_folded = self.conformations[conformation].calculate_charge(self.version.parameters, pH=ph)
conf = self.conformations[conformation]
q_unfolded, q_folded = conf.calculate_charge(
self.version.parameters, ph=ph)
charge_profile.append([ph, q_unfolded, q_folded])
return charge_profile
def getPI(self, conformation='AVR', grid=[0., 14., 1], iteration=0):
#info('staring',grid, iteration)
# search
charge_profile = self.getChargeProfile(conformation=conformation, grid=grid)
pi = []
pi_folded = pi_unfolded = [None, 1e6,1e6]
for point in charge_profile:
pi_folded = min(pi_folded, point, key=lambda v:abs(v[2]))
pi_unfolded = min(pi_unfolded, point, key=lambda v:abs(v[1]))
def get_pi(self, conformation='AVR', grid=[0., 14., 1], iteration=0):
"""Get the isoelectric points for folded and unfolded states.
# If results are not good enough, do it again with a higher sampling resolution
pi_folded_value = pi_folded[0]
Args:
conformation: conformation to test
grid: grid of pH values [min, max, step]
iteration: iteration number of process
Returns:
1. Folded state PI
2. Unfolded state PI
"""
charge_profile = self.get_charge_profile(
conformation=conformation, grid=grid)
pi_folded = pi_unfolded = [None, 1e6, 1e6]
for point in charge_profile:
pi_folded = min(pi_folded, point, key=lambda v: abs(v[2]))
pi_unfolded = min(pi_unfolded, point, key=lambda v: abs(v[1]))
# If results are not good enough, do it again with a higher sampling
# resolution
pi_folded_value = pi_folded[0]
pi_unfolded_value = pi_unfolded[0]
step = grid[2]
if (pi_folded[2] > 0.01 or pi_unfolded[1] > 0.01) and iteration<4:
pi_folded_value, x = self.getPI(conformation=conformation, grid=[pi_folded[0]-step, pi_folded[0]+step, step/10.0], iteration=iteration+1)
x, pi_unfolded_value = self.getPI(conformation=conformation, grid=[pi_unfolded[0]-step, pi_unfolded[0]+step, step/10.0], iteration=iteration+1)
# TODO - need to warn if maximum number of iterations is exceeded
if ((pi_folded[2] > UNK_PI_CUTOFF
or pi_unfolded[1] > UNK_PI_CUTOFF) and iteration < MAX_ITERATION):
pi_folded_value, _ = self.get_pi(
conformation=conformation,
grid=[pi_folded[0]-step, pi_folded[0]+step, step/10.0],
iteration=iteration+1)
_, pi_unfolded_value = self.get_pi(
conformation=conformation,
grid=[pi_unfolded[0]-step, pi_unfolded[0]+step, step/10.0],
iteration=iteration+1)
return pi_folded_value, pi_unfolded_value
if __name__ == '__main__':
input_file = sys.argv[1]
mc = Molecular_container(input_file)

View File

@@ -1,397 +1,463 @@
from __future__ import division
from __future__ import print_function
import sys
import propka.lib
from propka.lib import info, warning
"""Output routines."""
from datetime import date
from propka.lib import info
def printHeader():
def print_header():
"""Print header section of output."""
str_ = "{0:s}\n".format(get_propka_header())
str_ += "{0:s}\n".format(get_references_header())
str_ += "{0:s}\n".format(get_warning_header())
info(str_)
def write_pdb(protein, pdbfile=None, filename=None, include_hydrogens=False,
_=None):
"""Write a residue to the new PDB file.
Args:
protein: protein object
pdbfile: PDB file
filename: file to write to
include_hydrogens: Boolean indicating whether to include hydrogens
options: options object
"""
prints the header section
"""
str = "%s\n" % ( getPropkaHeader() )
str += "%s\n" % ( getReferencesHeader() )
str += "%s\n" % ( getWarningHeader() )
info(str)
def writePDB(protein, file=None, filename=None, include_hydrogens=False, options=None):
"""
Write the residue to the new pdbfile
"""
if file == None:
# opening file if not given
if filename == None:
filename = "%s.pdb" % (protein.name)
file = open(filename, 'w')
info("writing pdbfile %s" % (filename))
close_file = True
if pdbfile is None:
# opening file if not given
if filename is None:
filename = "{0:s}.pdb".format(protein.name)
# TODO - this would be better as a context manager
pdbfile = open(filename, 'w')
info("writing pdbfile {0:s}".format(filename))
close_file = True
else:
# don't close the file, it was opened in a different place
close_file = False
# don't close the file, it was opened in a different place
close_file = False
numb = 0
for chain in protein.chains:
for residue in chain.residues:
if residue.res_name not in ["N+ ", "C- "]:
for atom in residue.atoms:
if include_hydrogens == False and atom.name[0] == "H":
""" don't print """
else:
numb += 1
line = atom.make_pdb_line2(numb=numb)
line += "\n"
file.write(line)
if close_file == True:
file.close()
for residue in chain.residues:
if residue.res_name not in ["N+ ", "C- "]:
for atom in residue.atoms:
if (not include_hydrogens) and atom.name[0] == "H":
# don't print
pass
else:
numb += 1
line = atom.make_pdb_line2(numb=numb)
line += "\n"
pdbfile.write(line)
if close_file:
pdbfile.close()
def writePKA(protein, parameters, filename=None, conformation ='1A',reference="neutral", direction="folding", verbose=False, options=None):
"""
Write the pka-file based on the given protein
def write_pka(protein, parameters, filename=None, conformation='1A',
reference="neutral", _="folding", verbose=False,
__=None):
"""Write the pKa-file based on the given protein.
Args:
protein: protein object
filename: output file name
conformation: TODO - figure this out
reference: reference state
_: "folding" or other
verbose: Boolean flag for verbosity
__: options object
"""
# TODO - the code immediately overrides the verbose argument; why?
verbose = True
if filename == None:
filename = "%s.pka" % (protein.name)
file = open(filename, 'w')
if verbose == True:
info("Writing %s" % (filename))
if filename is None:
filename = "{0:s}.pka".format(protein.name)
# TODO - this would be much better with a context manager
file_ = open(filename, 'w')
if verbose:
info("Writing {0:s}".format(filename))
# writing propka header
str = "%s\n" % ( getPropkaHeader() )
str += "%s\n" % ( getReferencesHeader() )
str += "%s\n" % ( getWarningHeader() )
str_ = "{0:s}\n".format(get_propka_header())
str_ += "{0:s}\n".format(get_references_header())
str_ += "{0:s}\n".format(get_warning_header())
# writing pKa determinant section
str += getDeterminantSection(protein,conformation, parameters)
str_ += get_determinant_section(protein, conformation, parameters)
# writing pKa summary section
str += getSummarySection(protein,conformation,parameters)
str += "%s\n" % ( getTheLine() )
str_ += get_summary_section(protein, conformation, parameters)
str_ += "{0:s}\n".format(get_the_line())
# printing Folding Profile
str += getFoldingProfileSection(protein, conformation=conformation, reference=reference, direction=direction, window=[0., 14., 1.0], options=options)
str_ += get_folding_profile_section(
protein, conformation=conformation, reference=reference,
window=[0., 14., 1.0])
# printing Protein Charge Profile
str += getChargeProfileSection(protein, conformation=conformation)
str_ += get_charge_profile_section(protein, conformation=conformation)
# now, writing the pka text to file
file.write(str)
file.close()
file_.write(str_)
file_.close()
def printTmProfile(protein, reference="neutral", window=[0., 14., 1.], Tm=[0.,0.], Tms=None, ref=None, verbose=False, options=None):
def print_tm_profile(protein, reference="neutral", window=[0., 14., 1.],
__=[0., 0.], tms=None, ref=None, _=False,
options=None):
"""Print Tm profile.
I think Tm refers to the denaturation temperature.
Args:
protein: protein object
reference: reference state
window: pH window [min, max, step]
__: temperature range [min, max]
tms: TODO - figure this out
ref: TODO - figure this out (probably reference state?)
_: Boolean for verbosity
options: options object
"""
prints Tm profile
"""
profile = protein.getTmProfile(reference=reference, grid=[0., 14., 0.1], Tms=Tms, ref=ref, options=options)
if profile == None:
str = "Could not determine Tm-profile\n"
profile = protein.getTmProfile(
reference=reference, grid=[0., 14., 0.1], tms=tms, ref=ref,
options=options)
if profile is None:
str_ = "Could not determine Tm-profile\n"
else:
str = " suggested Tm-profile for %s\n" % (protein.name)
for (pH, Tm) in profile:
if pH >= window[0] and pH <= window[1] and (pH%window[2] < 0.01 or pH%window[2] > 0.99*window[2]):
str += "%6.2lf%10.2lf\n" % (pH, Tm)
info(str)
str_ = " suggested Tm-profile for {0:s}\n".format(protein.name)
for (ph, tm_) in profile:
if (ph >= window[0] and ph <= window[1]
and (ph % window[2] < 0.01
or ph % window[2] > 0.99*window[2])):
str_ += "{0:>6.2f}{1:>10.2f}\n".format(ph, tm_)
info(str_)
def printResult(protein, conformation, parameters):
def print_result(protein, conformation, parameters):
"""Prints all resulting output from determinants and down.
Args:
protein: protein object
conformation: specific conformation
parameters: parameters
"""
prints all resulting output from determinants and down
"""
printPKASection(protein, conformation, parameters)
print_pka_section(protein, conformation, parameters)
def printPKASection(protein, conformation, parameters):
"""
prints out the pka-section of the result
def print_pka_section(protein, conformation, parameters):
"""Prints out pKa section of results.
Args:
protein: protein object
conformation: specific conformation
parameters: parameters
"""
# geting the determinants section
str = getDeterminantSection(protein, conformation, parameters)
info(str)
str = getSummarySection(protein,conformation,parameters)
info(str)
str_ = get_determinant_section(protein, conformation, parameters)
info(str_)
str_ = get_summary_section(protein, conformation, parameters)
info(str_)
def getDeterminantSection(protein, conformation, parameters):
"""
prints out the pka-section of the result
def get_determinant_section(protein, conformation, parameters):
"""Returns string with determinant section of results.
Args:
protein: protein object
conformation: specific conformation
parameters: parameters
Returns:
string
"""
# getting the same order as in propka2.0
str = "%s\n" % ( getDeterminantsHeader() )
str_ = "{0:s}\n".format(get_determinants_header())
# printing determinants
for chain in protein.conformations[conformation].chains:
for residue_type in parameters.write_out_order:
groups = [g for g in protein.conformations[conformation].groups if g.atom.chain_id == chain]
groups = [
g for g in protein.conformations[conformation].groups
if g.atom.chain_id == chain]
for group in groups:
if group.residue_type == residue_type:
str += "%s" % ( group.getDeterminantString(parameters.remove_penalised_group) )
str_ += "{0:s}".format(
group.get_determinant_string(
parameters.remove_penalised_group))
# Add a warning in case of coupled residues
if protein.conformations[conformation].non_covalently_coupled_groups and not protein.options.display_coupled_residues:
str += 'Coupled residues (marked *) were detected. Please rerun PropKa with the --display-coupled-residues \nor -d option for detailed information.\n'
return str
if (protein.conformations[conformation].non_covalently_coupled_groups
and not protein.options.display_coupled_residues):
str_ += 'Coupled residues (marked *) were detected.'
str_ += 'Please rerun PropKa with the --display-coupled-residues \n'
str_ += 'or -d option for detailed information.\n'
return str_
def getSummarySection(protein, conformation, parameters):
def get_summary_section(protein, conformation, parameters):
"""Returns string with summary section of the results.
Args:
protein: protein object
conformation: specific conformation
parameters: parameters
Returns:
string
"""
prints out the pka-section of the result
"""
str = "%s\n" % ( getSummaryHeader() )
str_ = "{0:s}\n".format(get_summary_header())
# printing pKa summary
for residue_type in parameters.write_out_order:
for group in protein.conformations[conformation].groups:
if group.residue_type == residue_type:
str += "%s" % ( group.getSummaryString(parameters.remove_penalised_group) )
return str
if group.residue_type == residue_type:
str_ += "{0:s}".format(
group.get_summary_string(
parameters.remove_penalised_group))
return str_
def getFoldingProfileSection(protein, conformation='AVR', direction="folding", reference="neutral", window=[0., 14., 1.0], verbose=False, options=None):
def get_folding_profile_section(
protein, conformation='AVR', direction="folding", reference="neutral",
window=[0., 14., 1.0], _=False, __=None):
"""Returns string with the folding profile section of the results.
Args:
protein: protein object
conformation: specific conformation
direction: 'folding' or other
reference: reference state
window: pH window [min, max, step]
_: Boolean for verbose output
__: options object
Returns:
string
"""
returns the protein-folding-profile section
"""
str = getTheLine()
str += "\n"
str += "Free energy of %9s (kcal/mol) as a function of pH (using %s reference)\n" % (direction, reference)
profile, [pH_opt, dG_opt], [dG_min, dG_max], [pH_min, pH_max] = protein.getFoldingProfile(conformation=conformation,
reference=reference,
direction=direction, grid=[0., 14., 0.1], options=options)
if profile == None:
str += "Could not determine folding profile\n"
str_ = get_the_line()
str_ += "\n"
str_ += "Free energy of {0:>9s} (kcal/mol) as a function".format(direction)
str_ += " of pH (using {0:s} reference)\n".format(reference)
profile, [ph_opt, dg_opt], [dg_min, dg_max], [ph_min, ph_max] = (
protein.get_folding_profile(
conformation=conformation, reference=reference,
grid=[0., 14., 0.1]))
if profile is None:
str_ += "Could not determine folding profile\n"
else:
for (pH, dG) in profile:
if pH >= window[0] and pH <= window[1] and (pH%window[2] < 0.05 or pH%window[2] > 0.95):
str += "%6.2lf%10.2lf\n" % (pH, dG)
str += "\n"
if pH_opt == None or dG_opt == None:
str += "Could not determine pH optimum\n"
for (ph, dg) in profile:
if ph >= window[0] and ph <= window[1]:
if ph % window[2] < 0.05 or ph % window[2] > 0.95:
str_ += "{0:>6.2f}{1:>10.2f}\n".format(ph, dg)
str_ += "\n"
if ph_opt is None or dg_opt is None:
str_ += "Could not determine pH optimum\n"
else:
str += "The pH of optimum stability is %4.1lf for which the free energy is%6.1lf kcal/mol at 298K\n" % (pH_opt, dG_opt)
if dG_min == None or dG_max == None:
str += "Could not determine pH values where the free energy is within 80 %s of minimum\n" % ("%")
str_ += "The pH of optimum stability is {0:>4.1f}".format(ph_opt)
str_ += (
" for which the free energy is {0:>6.1f} kcal/mol at 298K\n".format(
dg_opt))
if dg_min is None or dg_max is None:
str_ += "Could not determine pH values where the free energy"
str_ += " is within 80 % of minimum\n"
else:
str += "The free energy is within 80 %s of maximum at pH %4.1lf to %4.1lf\n" % ("%", dG_min, dG_max)
if pH_min == None or pH_max == None:
str += "Could not determine the pH-range where the free energy is negative\n\n"
str_ += "The free energy is within 80 % of maximum"
str_ += " at pH {0:>4.1f} to {1:>4.1f}\n".format(dg_min, dg_max)
if ph_min is None or ph_max is None:
str_ += "Could not determine the pH-range where the free"
str_ += " energy is negative\n\n"
else:
str += "The free energy is negative in the range %4.1lf - %4.1lf\n\n" % (pH_min, pH_max)
str_ += "The free energy is negative in the range"
str_ += " {0:>4.1f} - {1:>4.1f}\n\n".format(ph_min, ph_max)
return str_
return str
def get_charge_profile_section(protein, conformation='AVR', _=None):
"""Returns string with the charge profile section of the results.
def getChargeProfileSection(protein, conformation='AVR', options=None):
Args:
protein: protein object
conformation: specific conformation
_: options object
Returns:
string
"""
returns the protein-folding-profile section
"""
str = "Protein charge of folded and unfolded state as a function of pH\n"
profile = protein.getChargeProfile(conformation=conformation,grid=[0., 14., 1.])
if profile == None:
str += "Could not determine charge profile\n"
str_ = "Protein charge of folded and unfolded state as a function of pH\n"
profile = protein.get_charge_profile(conformation=conformation,
grid=[0., 14., 1.])
if profile is None:
str_ += "Could not determine charge profile\n"
else:
str += "%6s%10s%8s\n" % ("pH", "unfolded", "folded")
for (pH, Q_mod, Q_pro) in profile:
str += "%6.2lf%10.2lf%8.2lf\n" % (pH, Q_mod, Q_pro)
pI_pro, pI_mod = protein.getPI(conformation=conformation)
if pI_pro == None or pI_mod == None:
str += "Could not determine the pI\n\n"
str_ += ' pH unfolded folded\n'
for (ph, q_mod, q_pro) in profile:
str_ += "{ph:6.2f}{qm:10.2f}{qp:8.2f}\n".format(
ph=ph, qm=q_mod, qp=q_pro)
pi_pro, pi_mod = protein.get_pi(conformation=conformation)
if pi_pro is None or pi_mod is None:
str_ += "Could not determine the pI\n\n"
else:
str += "The pI is %5.2lf (folded) and %5.2lf (unfolded)\n" % (pI_pro, pI_mod)
str_ += ("The pI is {0:>5.2f} (folded) and {1:>5.2f} (unfolded)\n")
return str_
return str
def write_jackal_scap_file(mutation_data=None, filename="1xxx_scap.list",
_=None):
"""Write a scap file for, i.e., generating a mutated protein
def writeJackalScapFile(mutationData=None, filename="1xxx_scap.list", options=None):
TODO - figure out what this is
"""
writing a scap file for, i.e., generating a mutated protein
with open(filename, 'w') as file_:
for chain_id, _, res_num, code2 in mutation_data:
str_ = "{chain:s}, {num:d}, {code:s}\n".format(
chain=chain_id, num=res_num, code=code2)
file_.write(str_)
def write_scwrl_sequence_file(sequence, filename="x-ray.seq", _=None):
"""Write a scwrl sequence file for, e.g., generating a mutated protein
TODO - figure out what this is
"""
file = open(filename, 'w')
for chain_id, code1, res_num, code2 in mutationData:
str = "%s, %d, %s\n" % (chain_id, res_num, code2)
file.write(str)
file.close()
with open(filename, 'w') as file_:
start = 0
while len(sequence[start:]) > 60:
file_.write("{0:s}s\n".format(sequence[start:start+60]))
start += 60
file_.write("{0:s}\n".format(sequence[start:]))
def writeScwrlSequenceFile(sequence, filename="x-ray.seq", options=None):
def get_propka_header():
"""Create the header.
Returns:
string
"""
writing a scwrl sequence file for, e.g., generating a mutated protein
"""
file = open(filename, 'w')
start = 0
while len(sequence[start:]) > 60:
file.write( "%s\n" % (sequence[start:start+60]) )
start += 60
file.write( "%s\n" % (sequence[start:]) )
file.close()
# --- various header text --- #
def getPropkaHeader():
"""
Creates the header
"""
from datetime import date
today = date.today()
str_ = "propka3.1 {0!s:>93s}\n".format(today)
str_ += """
-------------------------------------------------------------------------------
-- --
-- PROPKA: A PROTEIN PKA PREDICTOR --
-- --
-- VERSION 1.0, 04/25/2004, IOWA CITY --
-- BY HUI LI --
-- --
-- VERSION 2.0, 11/05/2007, IOWA CITY/COPENHAGEN --
-- BY DELPHINE C. BAS AND DAVID M. ROGERS --
-- --
-- VERSION 3.0, 01/06/2011, COPENHAGEN --
-- BY MATS H.M. OLSSON AND CHRESTEN R. SONDERGARD --
-- --
-- VERSION 3.1, 07/01/2011, COPENHAGEN --
-- BY CHRESTEN R. SONDERGARD AND MATS H.M. OLSSON --
-- --
-------------------------------------------------------------------------------
"""
return str_
def get_references_header():
"""Create the 'references' part of output file.
str = "propka3.1 %93s\n" % (today)
str += "-------------------------------------------------------------------------------------------------------\n"
str += "-- --\n"
str += "-- PROPKA: A PROTEIN PKA PREDICTOR --\n"
str += "-- --\n"
str += "-- VERSION 1.0, 04/25/2004, IOWA CITY --\n"
str += "-- BY HUI LI --\n"
str += "-- --\n"
str += "-- VERSION 2.0, 11/05/2007, IOWA CITY/COPENHAGEN --\n"
str += "-- BY DELPHINE C. BAS AND DAVID M. ROGERS --\n"
str += "-- --\n"
str += "-- VERSION 3.0, 01/06/2011, COPENHAGEN --\n"
str += "-- BY MATS H.M. OLSSON AND CHRESTEN R. SONDERGARD --\n"
str += "-- --\n"
str += "-- VERSION 3.1, 07/01/2011, COPENHAGEN --\n"
str += "-- BY CHRESTEN R. SONDERGARD AND MATS H.M. OLSSON --\n"
str += "-------------------------------------------------------------------------------------------------------\n"
str += "\n"
return str
def getReferencesHeader():
Returns:
string
"""
Returns the 'references' part in output file
str_ = """
-------------------------------------------------------------------------------
References:
Very Fast Empirical Prediction and Rationalization of Protein pKa Values.
Hui Li, Andrew D. Robertson and Jan H. Jensen. PROTEINS: Structure, Function,
and Bioinformatics. 61:704-721 (2005)
Very Fast Prediction and Rationalization of pKa Values for Protein-Ligand
Complexes. Delphine C. Bas, David M. Rogers and Jan H. Jensen. PROTEINS:
Structure, Function, and Bioinformatics 73:765-783 (2008)
PROPKA3: Consistent Treatment of Internal and Surface Residues in Empirical
pKa predictions. Mats H.M. Olsson, Chresten R. Sondergard, Michal Rostkowski,
and Jan H. Jensen. Journal of Chemical Theory and Computation, 7(2):525-537
(2011)
Improved Treatment of Ligands and Coupling Effects in Empirical Calculation
and Rationalization of pKa Values. Chresten R. Sondergaard, Mats H.M. Olsson,
Michal Rostkowski, and Jan H. Jensen. Journal of Chemical Theory and
Computation, (2011)
-------------------------------------------------------------------------------
"""
return str_
def get_warning_header():
"""Create the 'warning' part of the output file.
TODO - this function is essentially a no-op.
Returns:
string
"""
str = ""
str += "-------------------------------------------------------------------------------------------------------\n"
str += " References:\n"
str += "\n"
str += " Very Fast Empirical Prediction and Rationalization of Protein pKa Values\n"
str += " Hui Li, Andrew D. Robertson and Jan H. Jensen\n"
str += " PROTEINS: Structure, Function, and Bioinformatics 61:704-721 (2005)\n"
str += " \n"
str += " Very Fast Prediction and Rationalization of pKa Values for Protein-Ligand Complexes\n"
str += " Delphine C. Bas, David M. Rogers and Jan H. Jensen\n"
str += " PROTEINS: Structure, Function, and Bioinformatics 73:765-783 (2008)\n"
str += " \n"
str += " PROPKA3: Consistent Treatment of Internal and Surface Residues in Empirical pKa predictions\n"
str += " Mats H.M. Olsson, Chresten R. Sondergard, Michal Rostkowski, and Jan H. Jensen\n"
str += " Journal of Chemical Theory and Computation, 7(2):525-537 (2011)\n"
str += " \n"
str += " Improved Treatment of Ligands and Coupling Effects in Empirical Calculation\n"
str += " and Rationalization of pKa Values\n"
str += " Chresten R. Sondergaard, Mats H.M. Olsson, Michal Rostkowski, and Jan H. Jensen\n"
str += " Journal of Chemical Theory and Computation, (2011)\n"
str += " \n"
str += "-------------------------------------------------------------------------------------------------------\n"
return str
str_ = ""
return str_
def getWarningHeader():
def get_determinants_header():
"""Create the Determinant header.
Returns:
string
"""
Returns the 'warning' part in output file
str_ = """
--------- ----- ------ --------------------- -------------- -------------- --------------
DESOLVATION EFFECTS SIDECHAIN BACKBONE COULOMBIC
RESIDUE pKa BURIED REGULAR RE HYDROGEN BOND HYDROGEN BOND INTERACTION
--------- ----- ------ --------- --------- -------------- -------------- --------------
"""
return str_
def get_summary_header():
"""Create the summary header.
Returns:
string
"""
str = ""
return str
str_ = get_the_line()
str_ += "\n"
str_ += "SUMMARY OF THIS PREDICTION\n"
str_ += " Group pKa model-pKa ligand atom-type"
return str_
def getDeterminantsHeader():
def get_the_line():
"""Draw the line-Johnny Cash would have been proud-or actually Aerosmith!
NOTE - Johnny Cash walked the line.
Returns:
string
"""
Creates the Determinant header
str_ = ""
str_ += ("-" * 104)
return str_
def make_interaction_map(name, list_, interaction):
"""Print out an interaction map named 'name' of the groups in 'list'
based on the function 'interaction'
Args:
list_: list of groups
interaction: some sort of function
Returns:
string
"""
str = ""
str += "--------- ----- ------ --------------------- -------------- -------------- --------------\n"
str += " DESOLVATION EFFECTS SIDECHAIN BACKBONE COULOMBIC \n"
str += " RESIDUE pKa BURIED REGULAR RE HYDROGEN BOND HYDROGEN BOND INTERACTION \n"
str += "--------- ----- ------ --------- --------- -------------- -------------- --------------\n"
return str
def getSummaryHeader():
"""
returns the summary header
"""
str = getTheLine()
str += "\n"
str += "SUMMARY OF THIS PREDICTION\n"
str += " Group pKa model-pKa ligand atom-type"
return str
def getTheLine():
"""
draw the line - Johnny Cash would have been proud - or actually Aerosmith!
"""
str = ""
for i in range(0, 104):
str += "-"
return str
# Interaction maps
def make_interaction_map(name, list, interaction):
""" Print out an interaction map named 'name' of the groups in 'list'
based on the function 'interaction' """
# return an empty string, if the list is empty
if len(list)==0:
if len(list_) == 0:
return ''
# for long list, use condensed formatting
if len(list)>10:
if len(list_) > 10:
res = 'Condensed form:\n'
for i in range(len(list)):
for j in range(i,len(list)):
if interaction(list[i],list[j]):
res += 'Coupling: %9s - %9s\n'%(list[i].label,list[j].label)
for i, group1 in enumerate(list_):
for group2 in list_[i:]:
if interaction(group1, group2):
res += 'Coupling: {0:>9s} - {1:>9s}\n'.format(
group1.label, group2.label)
return res
# Name and map header
res = '%s\n%12s'%(name,'')
for g in list:
res += '%9s | '%g.label
res = '{0:s}\n{1:>12s}'.format(name, '')
for group in list_:
res += '{0:>9s} | '.format(group.label)
# do the map
for g1 in list:
res += '\n%-12s'%(g1.label)
for g2 in list:
for group1 in list_:
res += '\n{0:<12s}'.format(group1.label)
for group2 in list_:
tag = ''
if interaction(g1, g2):
if interaction(group1, group2):
tag = ' X '
res += '%10s| '%tag
res += '{0:>10s}| '.format(tag)
return res

View File

@@ -1,232 +1,276 @@
from __future__ import division
from __future__ import print_function
import math
"""Holds parameters and settings."""
import pkg_resources
import propka.lib as lib
import sys, os
from propka.lib import info, warning
import pkg_resources
# names and types of all key words in configuration file
matrices = ['interaction_matrix']
pair_wise_matrices = ['sidechain_cutoffs']
number_dictionaries = ['VanDerWaalsVolume','charge','model_pkas','ions',
'valence_electrons','custom_model_pkas']
list_dictionaries = ['backbone_NH_hydrogen_bond','backbone_CO_hydrogen_bond']
string_dictionaries = ['protein_group_mapping']
string_lists = ['ignore_residues','angular_dependent_sidechain_interactions',
'acid_list','base_list','exclude_sidechain_interactions',
'backbone_reorganisation_list','write_out_order']
distances = ['desolv_cutoff','buried_cutoff','coulomb_cutoff1','coulomb_cutoff2']
parameters = ['Nmin','Nmax','desolvationSurfaceScalingFactor','desolvationPrefactor',
'desolvationAllowance','coulomb_diel','COO_HIS_exception','OCO_HIS_exception',
'CYS_HIS_exception','CYS_CYS_exception','min_ligand_model_pka','max_ligand_model_pka',
'include_H_in_interactions','coupling_max_number_of_bonds',
'min_bond_distance_for_hydrogen_bonds','coupling_penalty',
'shared_determinants','common_charge_centre','hide_penalised_group', 'remove_penalised_group',
'max_intrinsic_pKa_diff','min_interaction_energy','max_free_energy_diff','min_swap_pka_shift',
'min_pka','max_pka','sidechain_interaction']
strings = ['version','output_file_tag','ligand_typing','pH','reference']
MATRICES = ['interaction_matrix']
PAIR_WISE_MATRICES = ['sidechain_cutoffs']
NUMBER_DICTIONARIES = [
'VanDerWaalsVolume', 'charge', 'model_pkas', 'ions', 'valence_electrons',
'custom_model_pkas']
LIST_DICTIONARIES = ['backbone_NH_hydrogen_bond', 'backbone_CO_hydrogen_bond']
STRING_DICTIONARIES = ['protein_group_mapping']
STRING_LISTS = [
'ignore_residues', 'angular_dependent_sidechain_interactions',
'acid_list', 'base_list', 'exclude_sidechain_interactions',
'backbone_reorganisation_list', 'write_out_order']
DISTANCES = ['desolv_cutoff', 'buried_cutoff', 'coulomb_cutoff1',
'coulomb_cutoff2']
PARAMETERS = [
'Nmin', 'Nmax', 'desolvationSurfaceScalingFactor', 'desolvationPrefactor',
'desolvationAllowance', 'coulomb_diel', 'COO_HIS_exception',
'OCO_HIS_exception', 'CYS_HIS_exception', 'CYS_CYS_exception',
'min_ligand_model_pka', 'max_ligand_model_pka',
'include_H_in_interactions', 'coupling_max_number_of_bonds',
'min_bond_distance_for_hydrogen_bonds', 'coupling_penalty',
'shared_determinants', 'common_charge_centre', 'hide_penalised_group',
'remove_penalised_group', 'max_intrinsic_pka_diff',
'min_interaction_energy', 'max_free_energy_diff', 'min_swap_pka_shift',
'min_pka', 'max_pka', 'sidechain_interaction']
STRINGS = ['version', 'output_file_tag', 'ligand_typing', 'pH', 'reference']
class Parameters:
def __init__(self, parameter_file):
"""PROPKA parameter class."""
def __init__(self, parameter_file):
"""Initialize parameter class.
Args:
parameter_file: file with parameters
"""
# TODO - need to define all members explicitly
self.model_pkas = {}
self.interaction_matrix = InteractionMatrix("interaction_matrix")
self.sidechain_cutoffs = None
# TODO - it would be nice to rename these; they're defined everywhere
self.COO_HIS_exception = None
self.OCO_HIS_exception = None
self.CYS_HIS_exception = None
self.CYS_CYS_exception = None
# These functions set up remaining data structures implicitly
self.set_up_data_structures()
self.read_parameters(parameter_file)
#self.print_interaction_parameters()
#self.print_interaction_parameters_latex()
#####self.print_interactions_latex()
#sys.exit(0)
def read_parameters(self, file_):
"""Read parameters from file.
return
def read_parameters(self, file):
Args:
file_: file to read
"""
# try to locate the parameters file
try:
ifile = pkg_resources.resource_filename(__name__, file)
input = lib.open_file_for_reading(ifile)
except:
input = lib.open_file_for_reading(file)
for line in input:
ifile = pkg_resources.resource_filename(__name__, file_)
input_ = lib.open_file_for_reading(ifile)
except (IOError, FileNotFoundError, ValueError):
input_ = lib.open_file_for_reading(file_)
for line in input_:
self.parse_line(line)
return
def parse_line(self, line):
"""Parse parameter file line."""
# first, remove comments
comment_pos = line.find('#')
if comment_pos != -1:
line = line[:comment_pos]
# split the line into words
words = line.split()
if len(words) == 0:
return
# parse the words
if len(words)==3 and words[0] in number_dictionaries:
if len(words) == 3 and words[0] in NUMBER_DICTIONARIES:
self.parse_to_number_dictionary(words)
elif len(words)==2 and words[0] in string_lists:
elif len(words) == 2 and words[0] in STRING_LISTS:
self.parse_to_string_list(words)
elif len(words)==2 and words[0] in distances:
elif len(words) == 2 and words[0] in DISTANCES:
self.parse_distance(words)
elif len(words)==2 and words[0] in parameters:
elif len(words) == 2 and words[0] in PARAMETERS:
self.parse_parameter(words)
elif len(words)==2 and words[0] in strings:
elif len(words) == 2 and words[0] in STRINGS:
self.parse_string(words)
elif len(words)>2 and words[0] in list_dictionaries:
elif len(words) > 2 and words[0] in LIST_DICTIONARIES:
self.parse_to_list_dictionary(words)
elif words[0] in matrices+pair_wise_matrices:
elif words[0] in MATRICES+PAIR_WISE_MATRICES:
self.parse_to_matrix(words)
elif len(words)==3 and words[0] in string_dictionaries:
elif len(words) == 3 and words[0] in STRING_DICTIONARIES:
self.parse_to_string_dictionary(words)
#info(words)
return
def parse_to_number_dictionary(self, words):
exec('self.%s[\'%s\'] = %s'%tuple(words))
return
"""Parse field to number dictionary.
Args:
words: strings to parse.
"""
dict_ = getattr(self, words[0])
key = words[1]
value = words[2]
dict_[key] = float(value)
def parse_to_string_dictionary(self, words):
exec('self.%s[\'%s\'] = \'%s\''%tuple(words))
return
"""Parse field to string dictionary.
Args:
words: strings to parse
"""
dict_ = getattr(self, words[0])
key = words[1]
value = words[2]
dict_[key] = value
def parse_to_list_dictionary(self, words):
exec('if not \'%s\' in self.%s.keys(): self.%s[\'%s\'] = []'%(words[1],words[0],words[0],words[1]))
for word in words[2:]:
exec('self.%s[\'%s\'].append(%s)'%(words[0],words[1],word))
"""Parse field to list dictionary.
return
Args:
words: strings to parse.
"""
dict_ = getattr(self, words[0])
key = words[1]
if not key in dict_:
dict_[key] = []
for value in words[2:]:
if isinstance(value, list):
dict_[key].append([float(x) for x in value])
dict_[key].append(float(value))
def parse_to_string_list(self, words):
exec('self.%s.append(\'%s\')'%tuple(words))
return
"""Parse field to string list.
Args:
words: strings to parse
"""
list_ = getattr(self, words[0])
value = words[1]
list_.append(value)
def parse_to_matrix(self, words):
exec('self.%s.add(%s)'%(words[0],tuple(words[1:])))
return
"""Parse field to matrix.
Args:
words: strings to parse
"""
matrix = getattr(self, words[0])
value = tuple(words[1:])
matrix.add(value)
def parse_distance(self, words):
# float check needed
exec('self.%s = %s'%tuple(words))
exec('self.%s_squared = pow(%s,2)'%tuple(words))
return
"""Parse field to distance.
Args:
words: strings to parse
"""
value = float(words[1])
setattr(self, words[0], value)
value_sq = value*value
attr = "{0:s}_squared".format(words[0])
setattr(self, attr, value_sq)
def parse_parameter(self, words):
exec('self.%s = %s'%tuple(words))
return
"""Parse field to parameters.
Args:
words: strings to parse
"""
value = float(words[1])
setattr(self, words[0], value)
def parse_string(self, words):
#info('self.%s = \'%s\''%tuple(words))
exec('self.%s = \'%s\''%tuple(words))
return
"""Parse field to strings.
Args:
words: strings to parse
"""
setattr(self, words[0], words[1])
def set_up_data_structures(self):
for key_word in number_dictionaries+list_dictionaries+string_dictionaries:
exec('self.%s = {}'%key_word)
for key_word in string_lists:
exec('self.%s = []'%key_word)
for key_word in strings:
exec('self.%s = \'\''%key_word)
for key_word in matrices:
exec('self.%s = Interaction_matrix(\'%s\')'%(key_word,key_word))
for key_word in pair_wise_matrices:
exec('self.%s =Pair_wise_matrix(\'%s\')'%(key_word,key_word))
"""Set up internal data structures.
return
TODO - it would be better to make these assignments explicit in
__init__.
"""
for key_word in (NUMBER_DICTIONARIES + LIST_DICTIONARIES
+ STRING_DICTIONARIES):
setattr(self, key_word, {})
for key_word in STRING_LISTS:
setattr(self, key_word, [])
for key_word in STRINGS:
setattr(self, key_word, "")
for key_word in MATRICES:
matrix = InteractionMatrix(key_word)
setattr(self, key_word, matrix)
for key_word in PAIR_WISE_MATRICES:
matrix = PairwiseMatrix(key_word)
setattr(self, key_word, matrix)
def print_interaction_parameters(self):
"""Print interaction parameters."""
info('--------------- Model pKa values ----------------------')
for k in self.model_pkas.keys():
info('%3s %8.2f' % (k, self.model_pkas[k]))
for k in self.model_pkas:
info('{0:>3s} {1:8.2f}'.format(k, self.model_pkas[k]))
info('')
info('--------------- Interactions --------------------------')
agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
map = {
'CG' :['ARG'],
'C2N':['ARG'],
'N30':['N+','LYS'],
'N31':['N+','LYS'],
'N32':['N+','LYS'],
'N33':['N+','LYS'] ,
'NAR':['HIS'],
'OCO':['COO'],
'OP' :[],#['TYR','SER'],
'SH' :['CYS'] ,
'NP1':[],
'OH' :['ROH'],
'O3' :[] ,
'CL' :[],
'F' :[],
'NAM':['AMD'],
'N1' :[],
'O2' :[]}
for g1 in agroups:
for g2 in lgroups:
interaction = '%3s %3s %1s %4s %4s'%(g1,g2,
self.interaction_matrix[g1][g2],
self.sidechain_cutoffs.get_value(g1,g2)[0],
self.sidechain_cutoffs.get_value(g1,g2)[1])
agroups = [
'COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG',
'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR',
'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
lgroups = [
'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1',
'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
map_ = {
'CG': ['ARG'], 'C2N': ['ARG'], 'N30': ['N+', 'LYS'],
'N31': ['N+', 'LYS'], 'N32': ['N+', 'LYS'], 'N33': ['N+', 'LYS'],
'NAR': ['HIS'], 'OCO': ['COO'], 'OP': [], 'SH': ['CYS'],
'NP1': [], 'OH': ['ROH'], 'O3': [], 'CL': [], 'F': [],
'NAM': ['AMD'], 'N1': [], 'O2': []}
for group1 in agroups:
for group2 in lgroups:
fmt = "{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}"
interaction = fmt.format(
grp1=group1, grp2=group2,
mat=self.interaction_matrix[group1][group2],
val1=self.sidechain_cutoffs.get_value(group1, group2)[0],
val2=self.sidechain_cutoffs.get_value(group1, group2)[1])
map_interaction = ''
if g2 in map:
for m in map[g2]:
map_interaction += '|%3s %3s %1s %4s %4s'%(g1,m,
self.interaction_matrix[g1][m],
self.sidechain_cutoffs.get_value(g1,m)[0],
self.sidechain_cutoffs.get_value(g1,m)[1])
if self.interaction_matrix[g1][m] != self.interaction_matrix[g1][g2]:
if group2 in map_:
for val in map_[group2]:
fmt = "|{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}"
map_interaction += fmt.format(
group1, val, self.interaction_matrix[group1][val],
self.sidechain_cutoffs.get_value(group1, val)[0],
self.sidechain_cutoffs.get_value(group1, val)[1])
if (self.interaction_matrix[group1][val]
!= self.interaction_matrix[group1][group2]):
map_interaction += '* '
if self.sidechain_cutoffs.get_value(g1,m)[0] != self.sidechain_cutoffs.get_value(g1,g2)[0] or \
self.sidechain_cutoffs.get_value(g1,m)[1] != self.sidechain_cutoffs.get_value(g1,g2)[1]:
if (self.sidechain_cutoffs.get_value(group1, val)[0]
!= self.sidechain_cutoffs.get_value(
group1, group2)[0]
or self.sidechain_cutoffs.get_value(
group1, val)[1]
!= self.sidechain_cutoffs.get_value(
group1, group2)[1]):
map_interaction += '! '
else:
map_interaction += ' '
if len(map[g2])==0 and (self.sidechain_cutoffs.get_value(g1,g2)[0] !=3 or self.sidechain_cutoffs.get_value(g1,g2)[1] != 4):
if (len(map_[group2]) == 0
and (self.sidechain_cutoffs.get_value(
group1, group2)[0]
!= 3
or self.sidechain_cutoffs.get_value(
group1, group2)[1]
!= 4)):
map_interaction += '? '
info(interaction, map_interaction)
if g1==g2:
if group1 == group2:
break
info('-')
info('--------------- Exceptions ----------------------------')
info('COO-HIS', self.COO_HIS_exception)
info('OCO-HIS', self.OCO_HIS_exception)
info('CYS-HIS', self.CYS_HIS_exception)
info('CYS-CYS', self.CYS_CYS_exception)
info('--------------- Mapping -------------------------------')
info("""
Titratable:
@@ -251,284 +295,281 @@ NAM
N1
O2
""")
return
def print_interaction_parameters_latex(self):
# info('--------------- Model pKa values ----------------------')
# for k in self.model_pkas.keys():
# info('%3s %8.2f'%(k,self.model_pkas[k]))
# info('')
# info('--------------- Interactions --------------------------')
agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
map = {
'CG' :['ARG'],
'C2N':['ARG'],
'N30':['N+','LYS'],
'N31':['N+','LYS'],
'N32':['N+','LYS'],
'N33':['N+','LYS'] ,
'NAR':['HIS'],
'OCO':['COO'],
'OP' :[],#['TYR','SER'],
'SH' :['CYS'] ,
'NP1':['AMD'],
'OH' :['ROH'],
'O3' :[] ,
'CL' :[],
'F' :[],
'NAM':[],
'N1' :[],
'O2' :[]}
s = """
\\begin{longtable}{lllll}
\\caption{Ligand interaction parameters. For interactions not listed, the default value of %s is applied.}
\\label{tab:ligand_interaction_parameters}\\\\
\\toprule
Group1 & Group2 & Interaction & c1 &c2 \\\\
\\midrule
\\endfirsthead
\\multicolumn{5}{l}{\\emph{continued from the previous page}}\\\\
\\toprule
Group1 & Group2 & Interaction & c1 &c2 \\\\
\\midrule
\\endhead
\\midrule
\\multicolumn{5}{r}{\\emph{continued on the next page}}\\\\
\\endfoot
\\bottomrule
\\endlastfoot
"""%(self.sidechain_cutoffs.default)
for g1 in agroups:
for g2 in lgroups:
if self.interaction_matrix[g1][g2]=='-':
"""Print interaction parameters in LaTeX format."""
# TODO - if these lists and dictionaries are the same as above, then
# should be constants at the level of the module
agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD',
'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32',
'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM',
'N1', 'O2', 'OP', 'SH']
lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO',
'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP',
'SH']
lines = [
"",
"\\begin{{longtable}}{{lllll}}",
("\\caption{{Ligand interaction parameters. For interactions not "
"listed, the default value of {0:s} is applied.}}").format(
self.sidechain_cutoffs.default),
"\\label{{tab:ligand_interaction_parameters}}\\\\",
"\\toprule",
"Group1 & Group2 & Interaction & c1 &c2 \\\\",
"\\midrule",
"\\endfirsthead",
"",
"\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous page}}}\\\\",
"\\toprule",
"Group1 & Group2 & Interaction & c1 &c2 \\\\",
"\\midrule",
"\\endhead",
"",
"\\midrule",
"\\multicolumn{{5}}{{r}}{\\emph{{continued on the next page}}}\\\\",
"\\endfoot",
"",
"\\bottomrule",
"\\endlastfoot",
""]
str_ = "\n".join(lines)
for group1 in agroups:
for group2 in lgroups:
if self.interaction_matrix[group1][group2] == '-':
continue
if self.sidechain_cutoffs.get_value(g1,g2)==self.sidechain_cutoffs.default:
if (self.sidechain_cutoffs.get_value(group1, group2)
== self.sidechain_cutoffs.default):
continue
s+= '%3s & %3s & %1s & %4s & %4s\\\\ \n'%(g1,g2,
self.interaction_matrix[g1][g2],
self.sidechain_cutoffs.get_value(g1,g2)[0],
self.sidechain_cutoffs.get_value(g1,g2)[1])
if g1==g2:
fmt = (
"{grp1:>3s} & {grp2:>3s} & {mat:1s} & {val1:4} & "
"{val2:4}\\\\ \n")
str_ += fmt.format(
group1, group2,
self.interaction_matrix[group1][group2],
self.sidechain_cutoffs.get_value(group1, group2)[0],
self.sidechain_cutoffs.get_value(group1, group2)[1])
if group1 == group2:
break
s += ' \\end{longtable}\n'
info(s)
return
str_ += ' \\end{{longtable}}\n'
info(str_)
def print_interactions_latex(self):
agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
s = """
\\begin{longtable}{%s}
\\caption{Ligand interaction parameters. For interactions not listed, the default value of %s is applied.}
\\label{tab:ligand_interaction_parameters}\\\\
\\toprule
Group1 & Group2 & Interaction & c1 &c2 \\\\
\\midrule
\\endfirsthead
\\multicolumn{5}{l}{\\emph{continued from the previous page}}\\\\
\\toprule
Group1 & Group2 & Interaction & c1 &c2 \\\\
\\midrule
\\endhead
\\midrule
\\multicolumn{5}{r}{\\emph{continued on the next page}}\\\\
\\endfoot
\\bottomrule
\\endlastfoot
"""%('l'*len(agroups),self.sidechain_cutoffs.default)
for g1 in agroups:
for g2 in agroups:
s+= '%3s & %3s & %1s & %4s & %4s\\\\ \n'%(g1,g2,
self.interaction_matrix[g1][g2],
self.sidechain_cutoffs.get_value(g1,g2)[0],
self.sidechain_cutoffs.get_value(g1,g2)[1])
if g1==g2:
"""Print interactions in LaTeX."""
# TODO - are these the same lists as above? Convert to module constants.
agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD',
'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32',
'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM',
'N1', 'O2', 'OP', 'SH']
lines = [
"",
"\\begin{longtable}{{{0:s}}}".format('l'*len(agroups)),
("\\caption{{Ligand interaction parameters. For interactions not "
"listed, the default value of {0:s} is applied.}}").format(
str(self.sidechain_cutoffs.default)),
"\\label{{tab:ligand_interaction_parameters}}\\\\",
"\\toprule",
"Group1 & Group2 & Interaction & c1 &c2 \\\\",
"\\midrule",
"\\endfirsthead",
"",
"\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous page}}}\\\\",
"\\toprule",
"Group1 & Group2 & Interaction & c1 &c2 \\\\",
"\\midrule",
"\\endhead",
"",
"\\midrule",
"\\multicolumn{{5}}{{r}}{\\emph{{continued on the next page}}}\\\\",
"\\endfoot",
"",
"\\bottomrule",
"\\endlastfoot",
""
]
str_ = "\n".join(lines)
for group1 in agroups:
for group2 in agroups:
fmt = '{g1:>3s} & {g2:>3s} & {mat:1s} & {val1:>4s} & {val2:>4s}\\\\ \n'
str_ += fmt.format(
group1, group2, self.interaction_matrix[group1][group2],
str(self.sidechain_cutoffs.get_value(group1, group2)[0]),
str(self.sidechain_cutoffs.get_value(group1, group2)[1]))
if group1 == group2:
break
s += ' \\end{longtable}\n'
info(s)
return
str_ += ' \\end{{longtable}}\n'
info(str_)
class InteractionMatrix:
"""Interaction matrix class."""
class Interaction_matrix:
def __init__(self, name):
"""Initialize with name of matrix.
Args:
name: name of interaction matrix
"""
self.name = name
self.value = None
self.ordered_keys = []
self.dictionary = {}
return
def add(self,words):
def add(self, words):
"""Add values to matrix.
Args:
words: values to add
"""
new_group = words[0]
self.ordered_keys.append(new_group)
if not new_group in self.dictionary.keys():
self.dictionary[new_group] = {}
for i in range(len(self.ordered_keys)):
group = self.ordered_keys[i]
if len(words)>i+1:
for i, group in enumerate(self.ordered_keys):
if len(words) > i+1:
try:
exec('self.value = %s'%words[i+1])
except:
self.value = float(words[i+1])
except ValueError:
self.value = words[i+1]
self.dictionary[group][new_group] = self.value
self.dictionary[new_group][group] = self.value
return
def get_value(self, item1, item2):
"""Get specific matrix value.
Args:
item1: matrix row index
item2: matrix column index
Returns:
matrix value or None
"""
try:
return self.dictionary[item1][item2]
except:
except KeyError:
return None
def __getitem__(self, group):
"""Get specific group from matrix.
Args:
group: group to get
"""
if group not in self.dictionary.keys():
raise Exception('%s not found in interaction matrix %s'%(group,self.name))
str_ = '{0:s} not found in interaction matrix {1:s}'.format(
group, self.name)
raise KeyError(str_)
return self.dictionary[group]
def keys(self):
"""Get keys from matrix.
Returns:
dictionary key list
"""
return self.dictionary.keys()
def __str__(self):
s = ' '
for k1 in self.ordered_keys:
s+='%3s '%k1
s+='\n'
for k1 in self.ordered_keys:
s+='%3s '%k1
for k2 in self.ordered_keys:
s+='%3s '%self[k1][k2]
s+='\n'
return s
# ks = ['COO', 'SER', 'ARG', 'LYS', 'HIS', 'AMD', 'CYS', 'TRP','ROH','TYR','N+','CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
# p = ''
# n=0
# for i in range(len(ks)):
# for j in range(i,len(ks)):
# if not [0.0,0.0]==self[ks[i]][ks[j]]:
# if not [3.0,4.0]==self[ks[i]][ks[j]]:
# p+='sidechain_cutoff %3s %3s %s\n'%(ks[i],ks[j],self[ks[i]][ks[j]])
# n+=1
# info('total',n,len(ks))
# return p
str_ = ' '
for key in self.ordered_keys:
str_ += '{0:>3s} '.format(key)
str_ += '\n'
for key1 in self.ordered_keys:
str_ += '{0:>3s} '.format(key1)
for key2 in self.ordered_keys:
str_ += '{0:>3s} '.format(self[key1][key2])
str_ += '\n'
return str_
class PairwiseMatrix:
"""Pairwise interaction matrix class."""
class Pair_wise_matrix:
def __init__(self, name):
"""Initialize pairwise matrix.
Args:
name: name of pairwise interaction
"""
self.name = name
self.dictionary = {}
self.default = [0.0, 0.0]
return
def add(self,words):
def add(self, words):
"""Add information to the matrix.
TODO - this function unnecessarily bundles arguments into a tuple
Args:
words: tuple with assignment information and value
"""
# assign the default value
if len(words)==3 and words[0]=='default':
if len(words) == 3 and words[0] == 'default':
self.default = [float(words[1]), float(words[2])]
return
# assign non-default values
g1 = words[0]
g2 = words[1]
v = [float(words[2]), float(words[3])]
group1 = words[0]
group2 = words[1]
value = [float(words[2]), float(words[3])]
self.insert(group1, group2, value)
self.insert(group2, group1, value)
self.insert(g1,g2,v)
self.insert(g2,g1,v)
def insert(self, key1, key2, value):
"""Insert value into matrix.
return
def insert(self, k1,k2,v):
if k1 in self.dictionary.keys() and k2 in self.dictionary[k1].keys():
if k1!=k2:
warning('Parameter value for %s, %s defined more than once' % (k1, k2))
if not k1 in self.dictionary:
self.dictionary[k1] = {}
self.dictionary[k1][k2] =v
return
Args:
key1: first matrix key (row)
key2: second matrix key (column)
value: value to insert
"""
if key1 in self.dictionary and key2 in self.dictionary[key1]:
if key1 != key2:
str_ = (
'Parameter value for {0:s}, {1:s} defined more '
'than once'.format(key1, key2))
warning(str_)
if not key1 in self.dictionary:
self.dictionary[key1] = {}
self.dictionary[key1][key2] = value
def get_value(self, item1, item2):
"""Get specified value from matrix.
Args:
item1: row index
item2: column index
Returns:
matrix value (or default)
"""
try:
return self.dictionary[item1][item2]
except:
except KeyError:
return self.default
def __getitem__(self, group):
"""Get item from matrix corresponding to specific group.
Args:
group: group to retrieve
Returns:
matrix information
"""
if group not in self.dictionary.keys():
raise Exception('%s not found in interaction matrix %s'%(group,self.name))
str_ = '{0:s} not found in interaction matrix {1:s}'.format(
group, self.name)
raise KeyError(str_)
return self.dictionary[group]
def keys(self):
"""Get keys from matrix.
Returns:
dictionary key list
"""
return self.dictionary.keys()
def __str__(self):
s=''
for k1 in self.keys():
for k2 in self[k1].keys():
s += '%s %s %s\n'%(k1,k2,self[k1][k2])
return s
str_ = ''
for key1 in self.keys():
for key2 in self[key1].keys():
str_ += '{0:s} {1:s} {2:s}\n'.format(
key1, key2, self[key1][key2])
return str_

View File

@@ -1,239 +1,257 @@
from __future__ import division
from __future__ import print_function
import string, sys, copy
"""PDB parsing functionality."""
import propka.lib
from propka.lib import info, warning
from propka.lib import warning
from propka.atom import Atom
from propka.conformation_container import Conformation_container
from propka.conformation_container import ConformationContainer
expected_atom_numbers = {'ALA':5,
'ARG':11,
'ASN':8,
'ASP':8,
'CYS':6,
'GLY':4,
'GLN':9,
'GLU':9,
'HIS':10,
'ILE':8,
'LEU':8,
'LYS':9,
'MET':8,
'PHE':11,
'PRO':7,
'SER':6,
'THR':7,
'TRP':14,
'TYR':12,
'VAL':7}
EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
def read_pdb(pdb_file, parameters, molecule):
conformations = {}
"""Parse a PDB file.
Args:
pdb_file: file to read
parameters: parameters to guide parsing
molecule: molecular container
Returns:
list with elements:
1. list of conformations
2. list of names
"""
conformations = {}
# read in all atoms in the file
lines = get_atom_lines_from_pdb(pdb_file, ignore_residues = parameters.ignore_residues, keep_protons = molecule.options.keep_protons, chains=molecule.options.chains)
lines = get_atom_lines_from_pdb(
pdb_file, ignore_residues=parameters.ignore_residues,
keep_protons=molecule.options.keep_protons,
chains=molecule.options.chains)
for (name, atom) in lines:
if not name in conformations.keys():
conformations[name] = Conformation_container(name=name, parameters=parameters, molecular_container=molecule)
conformations[name] = ConformationContainer(
name=name, parameters=parameters, molecular_container=molecule)
conformations[name].add_atom(atom)
# make a sorted list of conformation names
names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
return [conformations, names]
def protein_precheck(conformations, names):
def protein_precheck(conformations, names):
"""Check protein for correct number of atoms, etc.
Args:
names: conformation names to check
"""
for name in names:
atoms = conformations[name].atoms
# Group the atoms by their residue:
atoms_by_residue = {}
for a in atoms:
if a.element != 'H':
res_id = resid_from_atom(a)
for atom in atoms:
if atom.element != 'H':
res_id = resid_from_atom(atom)
try:
atoms_by_residue[res_id].append(a)
atoms_by_residue[res_id].append(atom)
except KeyError:
atoms_by_residue[res_id] = [a]
atoms_by_residue[res_id] = [atom]
for res_id, res_atoms in atoms_by_residue.items():
res_name = res_atoms[0].res_name
residue_label = '%3s%5s'%(res_name, res_id)
residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
# ignore ligand residues
if res_name not in expected_atom_numbers:
if res_name not in EXPECTED_ATOM_NUMBERS:
continue
# check for c-terminal
if 'C-' in [a.terminal for a in res_atoms]:
if len(res_atoms) != expected_atom_numbers[res_name]+1:
warning('Unexpected number (%d) of atoms in residue %s in conformation %s' % (len(res_atoms), residue_label, name))
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
str_ = ("Unexpected number ({num:d}) of atoms in residue "
"{res:s} in conformation {conf:s}".format(
num=len(res_atoms), res=residue_label,
conf=name))
warning(str_)
continue
# check number of atoms in residue
if len(res_atoms) != expected_atom_numbers[res_name]:
warning('Unexpected number (%d) of atoms in residue %s in conformation %s' % (len(res_atoms), residue_label, name))
return
def resid_from_atom(a):
return '%4d %s %s'%(a.res_num,a.chain_id,a.icode)
if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
str_ = ("Unexpected number ({num:d}) of atoms in residue "
"{res:s} in conformation {conf:s}".format(
num=len(res_atoms), res=residue_label,
conf=name))
warning(str_)
def get_atom_lines_from_pdb(pdb_file, ignore_residues = [], keep_protons=False, tags = ['ATOM ', 'HETATM'], chains=None):
def resid_from_atom(atom):
"""Return string with atom residue information.
Args:
atom: atom to generate string for
Returns
string
"""
return '{0:>4d} {1:s} {2:s}'.format(
atom.res_num, atom.chain_id, atom.icode)
def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
tags=['ATOM ', 'HETATM'], chains=None):
"""Get atom lines from PDB file.
Args:
pdb_file: PDB file to parse
ignore_residues: list of residues to ignore
keep_protons: bool to keep/ignore protons
tags: tags of lines that include atoms
chains: list of chains
"""
lines = propka.lib.open_file_for_reading(pdb_file).readlines()
nterm_residue = 'next_residue'
old_residue = None
terminal = None
model = 1
for line in lines:
tag = line[0:6]
# set the model number
if tag == 'MODEL ':
model = int(line[6:])
nterm_residue = 'next_residue'
if tag == 'TER ':
nterm_residue = 'next_residue'
if tag in tags:
alt_conf_tag = line[16]
residue_name = line[12:16]
residue_number = line[22:26]
residue_name = line[12: 16]
residue_number = line[22: 26]
# check if we want this residue
if line[17:20] in ignore_residues:
if line[17: 20] in ignore_residues:
continue
if chains and line[21] not in chains:
continue
# set the Nterm residue number - nessecary because we may need to
# identify more than one N+ group for structures with alt_conf tags
if nterm_residue == 'next_residue' and tag == 'ATOM ':
# make sure that we reached a new residue - nessecary if OXT is not the last atom in
# the previous residue
# make sure that we reached a new residue - nessecary if OXT is
# not the last atom inthe previous residue
if old_residue != residue_number:
nterm_residue = residue_number
old_residue = None
# Identify the configuration
# convert digits to letters
if alt_conf_tag in '123456789':
alt_conf_tag = chr(ord(alt_conf_tag)+16)
if alt_conf_tag == ' ':
alt_conf_tag = 'A'
conformation = '%d%s'%(model, alt_conf_tag)
conformation = '{0:d}{1:s}'.format(model, alt_conf_tag)
# set the terminal
if tag == 'ATOM ':
if residue_name.strip() == 'N' and nterm_residue == residue_number:
if (residue_name.strip() == 'N'
and nterm_residue == residue_number):
terminal = 'N+'
if residue_name.strip() in ['OXT','O\'\'']:
if residue_name.strip() in ['OXT', 'O\'\'']:
terminal = 'C-'
nterm_residue = 'next_residue'
old_residue = residue_number
# and yield the atom
atom = Atom(line=line)
atom.terminal = terminal
#if keep_protons:
# atom.is_protonated = True
if not (atom.element == 'H' and not keep_protons): #ignore hydrogen
#ignore hydrogen
if not (atom.element == 'H' and not keep_protons):
yield (conformation, atom)
terminal = None
return
def write_pdb(conformation, filename):
"""Write PDB conformation to a file.
Args:
conformation: conformation container
filename: filename for output
"""
write_pdb_for_atoms(conformation.atoms, filename)
return
def write_pdb_for_atoms(atoms, filename, make_conect_section=False):
out = propka.lib.open_file_for_writing(filename)
"""Write out PDB file for atoms.
Args:
atoms: list of atoms
filename: name of file
make_conect_section: generate a CONECT PDB section
"""
out = propka.lib.open_file_for_writing(filename)
for atom in atoms:
out.write(atom.make_pdb_line())
if make_conect_section:
for atom in atoms:
out.write(atom.make_conect_line())
out.close()
return
def write_mol2_for_atoms(atoms, filename):
"""Write out MOL2 file for atoms.
header = '@<TRIPOS>MOLECULE\n\n%d %d\nSMALL\nUSER_CHARGES\n'
Args:
atoms: list of atoms
filename: name of file
"""
# TODO - header needs to be converted to format string
header = '@<TRIPOS>MOLECULE\n\n{natom:d} {id:d}\nSMALL\nUSER_CHARGES\n'
atoms_section = '@<TRIPOS>ATOM\n'
for i in range(len(atoms)):
atoms_section += atoms[i].make_mol2_line(i+1)
for i, atom in enumerate(atoms):
atoms_section += atom.make_mol2_line(i+1)
bonds_section = '@<TRIPOS>BOND\n'
id = 1
for i in range(len(atoms)):
for j in range(i+1,len(atoms)):
if atoms[i] in atoms[j].bonded_atoms:
type = get_bond_order(atoms[i],atoms[j])
bonds_section += '%7d %7d %7d %7s\n'%(id, i+1, j+1, type)
id+=1
id_ = 1
for i, atom1 in enumerate(atoms):
for j, atom2 in enumerate(atoms, i+1):
if atom1 in atom2.bonded_atoms:
type_ = get_bond_order(atom1, atom2)
bonds_section += '{0:>7d} {1:>7d} {2:>7d} {3:>7s}\n'.format(
id_, i+1, j+1, type_)
id_ += 1
substructure_section = '@<TRIPOS>SUBSTRUCTURE\n\n'
if len(atoms)>0:
substructure_section = '@<TRIPOS>SUBSTRUCTURE\n%-7d %10s %7d\n'%(atoms[0].res_num,atoms[0].res_name,atoms[0].numb)
if len(atoms) > 0:
substructure_section = (
'@<TRIPOS>SUBSTRUCTURE\n{0:<7d} {1:>10s} {2:>7d}\n'.format(
atoms[0].res_num, atoms[0].res_name, atoms[0].numb))
out = propka.lib.open_file_for_writing(filename)
out.write(header%(len(atoms),id-1))
out.write(header.format(natom=len(atoms), id=id_-1))
out.write(atoms_section)
out.write(bonds_section)
out.write(substructure_section)
out.close()
return
def get_bond_order(atom1, atom2):
type = '1'
"""Get the order of a bond between two atoms.
Args:
atom1: first atom in bond
atom2: second atom in bond
Returns:
string with bond type
"""
type_ = '1'
pi_electrons1 = atom1.num_pi_elec_2_3_bonds
pi_electrons2 = atom2.num_pi_elec_2_3_bonds
if '.ar' in atom1.sybyl_type:
pi_electrons1 -=1
pi_electrons1 -= 1
if '.ar' in atom2.sybyl_type:
pi_electrons2 -=1
pi_electrons2 -= 1
if pi_electrons1 > 0 and pi_electrons2 > 0:
type = '%d'%(min(pi_electrons1, pi_electrons2)+1)
type_ = '{0:d}'.format(min(pi_electrons1, pi_electrons2)+1)
if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type:
type = 'ar'
return type
type_ = 'ar'
return type_
def write_input(molecular_container, filename):
out = propka.lib.open_file_for_writing(filename)
"""Write PROPKA input file for molecular container.
Args:
molecular_container: molecular container
filename: output file name
"""
out = propka.lib.open_file_for_writing(filename)
for conformation_name in molecular_container.conformation_names:
out.write('MODEL %s\n'%conformation_name)
out.write('MODEL {0:s}\n'.format(conformation_name))
# write atoms
for atom in molecular_container.conformations[conformation_name].atoms:
out.write(atom.make_input_line())
@@ -241,49 +259,59 @@ def write_input(molecular_container, filename):
for atom in molecular_container.conformations[conformation_name].atoms:
out.write(atom.make_conect_line())
# write covalently coupled groups
for group in molecular_container.conformations[conformation_name].groups:
for group in (
molecular_container.conformations[conformation_name].groups):
out.write(group.make_covalently_coupled_line())
# write non-covalently coupled groups
for group in molecular_container.conformations[conformation_name].groups:
for group in (
molecular_container.conformations[conformation_name].groups):
out.write(group.make_non_covalently_coupled_line())
out.write('ENDMDL\n')
out.close()
return
def read_input(input_file, parameters, molecule):
"""Read PROPKA input file for molecular container.
def read_input(input_file, parameters,molecule):
Args:
input_file: input file
parameters: parameters for parsing/setup
molecule: molecular container
Returns:
list with [conformations, names of conformations]
"""
conformations = {}
# read in all atoms in the input file
lines = get_atom_lines_from_input(input_file)
for (name, atom) in lines:
if not name in conformations.keys():
conformations[name] = Conformation_container(name=name, parameters=parameters, molecular_container=molecule)
conformations[name] = ConformationContainer(
name=name, parameters=parameters,
molecular_container=molecule)
conformations[name].add_atom(atom)
# make a sorted list of conformation names
names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
return [conformations, names]
def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']):
"""Get atom lines from a PROPKA input file.
def get_atom_lines_from_input(input_file, tags = ['ATOM ','HETATM']):
Args:
input_file: input file
tags: tags defining atom lines
Yields:
conformation container, list of atoms
"""
lines = propka.lib.open_file_for_reading(input_file).readlines()
conformation = ''
atoms = {}
numbers = []
for line in lines:
tag = line[0:6]
# set the conformation
if tag == 'MODEL ':
conformation = line[6:].strip()
# found an atom - save it
if tag in tags:
atom = Atom(line=line)
@@ -292,46 +320,39 @@ def get_atom_lines_from_input(input_file, tags = ['ATOM ','HETATM']):
atom.is_protonated = True
atoms[atom.numb] = atom
numbers.append(atom.numb)
# found bonding information - apply it
if tag == 'CONECT' and len(line)>14:
if tag == 'CONECT' and len(line) > 14:
conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)]
center_atom = atoms[int(conect_numbers[0])]
for n in conect_numbers[1:]:
b = atoms[int(n)]
for num in conect_numbers[1:]:
bond_atom = atoms[int(num)]
# remember to check for cysteine bridges
if center_atom.element == 'S' and b.element == 'S':
center_atom.cysteine_bridge = True
b.cysteine_bridge = True
if center_atom.element == 'S' and bond_atom.element == 'S':
center_atom.cysteine_bridge = True
bond_atom.cysteine_bridge = True
# set up bonding
if not b in center_atom.bonded_atoms:
center_atom.bonded_atoms.append(b)
if not center_atom in b.bonded_atoms:
b.bonded_atoms.append(center_atom)
if not bond_atom in center_atom.bonded_atoms:
center_atom.bonded_atoms.append(bond_atom)
if not center_atom in bond_atom.bonded_atoms:
bond_atom.bonded_atoms.append(center_atom)
# found info on covalent coupling
if tag == 'CCOUPL' and len(line)>14:
if tag == 'CCOUPL' and len(line) > 14:
conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)]
center_atom = atoms[int(conect_numbers[0])]
for n in conect_numbers[1:]:
cg = atoms[int(n)]
center_atom.group.couple_covalently(cg.group)
for num in conect_numbers[1:]:
cov_atom = atoms[int(num)]
center_atom.group.couple_covalently(cov_atom.group)
# found info on non-covalent coupling
if tag == 'NCOUPL' and len(line)>14:
if tag == 'NCOUPL' and len(line) > 14:
conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)]
center_atom = atoms[int(conect_numbers[0])]
for n in conect_numbers[1:]:
cg = atoms[int(n)]
center_atom.group.couple_non_covalently(cg.group)
for num in conect_numbers[1:]:
cov_atom = atoms[int(num)]
center_atom.group.couple_non_covalently(cov_atom.group)
# this conformation is done - yield the atoms
if tag == 'ENDMDL':
for n in numbers:
yield (conformation, atoms[n])
for num in numbers:
yield (conformation, atoms[num])
# prepare for next conformation
atoms = {}
numbers = []
return

View File

@@ -1,6 +1,6 @@
# PropKa configuration file
version version_A
version VersionA
# Model pKa values
model_pkas C- 3.20
@@ -342,7 +342,7 @@ common_charge_centre 0
remove_penalised_group 1
# non-covalent coupling
max_intrinsic_pKa_diff 2.0
max_intrinsic_pka_diff 2.0
min_interaction_energy 0.5
max_free_energy_diff 1.0
min_swap_pka_shift 1.0

View File

@@ -1,441 +1,365 @@
#!/usr/bin/python
"""Protonate a structure."""
import math
import propka.bonds
import propka.atom
from propka.vector_algebra import rotate_vector_around_an_axis, Vector
from propka.lib import warning, debug
from __future__ import division
from __future__ import print_function
from propka.vector_algebra import *
import propka.bonds, propka.pdb, propka.atom
from propka.lib import info, warning, debug
class Protonate:
""" Protonates atoms using VSEPR theory """
def __init__(self, verbose=False):
self.verbose=verbose
self.valence_electrons = {'H': 1,
'He':2,
'Li':1,
'Be':2,
'B': 3,
'C': 4,
'N': 5,
'O': 6,
'F': 7,
'Ne':8,
'Na':1,
'Mg':2,
'Al':3,
'Si':4,
'P': 5,
'S': 6,
'Cl':7,
'Ar':8,
'K': 1,
'Ca':2,
'Sc':2,
'Ti':2,
'Va':2,
'Cr':1,
'Mn':2,
'Fe':2,
'Co':2,
'Ni':2,
'Cu':1,
'Zn':2,
'Ga':3,
'Ge':4,
'As':5,
'Se':6,
'Br':7,
'Kr':8,
'I':7,
}
self.standard_charges= {'ARG-NH1':1.0,
'ASP-OD2':-1.0,
'GLU-OE2':-1.0,
'HIS-ND1':1.0,
'LYS-NZ':1.0,
'N+':1.0,
'C-':-1.0}
self.sybyl_charges = {'N.pl3':+1,
'N.3':+1,
'N.4':+1,
'N.ar':+1,
'O.co2-':-1}
self.bond_lengths = {'C':1.09,
'N':1.01,
'O':0.96,
'F':0.92,
'Cl':1.27,
'Br':1.41,
'I':1.61,
'S':1.35}
# protonation_methods[steric_number] = method
self.protonation_methods = {4:self.tetrahedral,
3:self.trigonal}
return
"""Initialize with flag for verbosity
Args:
verbose: True for verbose output
"""
self.verbose = verbose
self.valence_electrons = {
'H': 1, 'He': 2, 'Li': 1, 'Be': 2, 'B': 3, 'C': 4, 'N': 5,
'O': 6, 'F': 7, 'Ne': 8, 'Na': 1, 'Mg': 2, 'Al': 3, 'Si': 4,
'P': 5, 'S': 6, 'Cl': 7, 'Ar': 8, 'K': 1, 'Ca': 2, 'Sc': 2,
'Ti': 2, 'Va': 2, 'Cr': 1, 'Mn': 2, 'Fe': 2, 'Co': 2, 'Ni': 2,
'Cu': 1, 'Zn': 2, 'Ga': 3, 'Ge': 4, 'As': 5, 'Se': 6, 'Br': 7,
'Kr': 8, 'I': 7}
# TODO - consider putting charges in a configuration file
self.standard_charges = {
'ARG-NH1': 1.0, 'ASP-OD2': -1.0, 'GLU-OE2': -1.0, 'HIS-ND1': 1.0,
'LYS-NZ': 1.0, 'N+': 1.0, 'C-': -1.0}
self.sybyl_charges = {
'N.pl3': 1, 'N.3': 1, 'N.4': 1, 'N.ar': 1, 'O.co2-': 1}
# TODO - consider putting bond lengths in a configuration file
self.bond_lengths = {
'C': 1.09, 'N': 1.01, 'O': 0.96, 'F': 0.92, 'Cl': 1.27,
'Br': 1.41, 'I': 1.61, 'S': 1.35}
self.protonation_methods = {4: self.tetrahedral, 3: self.trigonal}
def protonate(self, molecules):
""" Will protonate all atoms in the molecular container """
"""Protonate all atoms in the molecular container.
Args:
molecules: molecular containers
"""
debug('----- Protonation started -----')
# Remove all currently present hydrogen atoms
self.remove_all_hydrogen_atoms(molecules)
# protonate all atoms
for name in molecules.conformation_names:
non_H_atoms = molecules.conformations[name].get_non_hydrogen_atoms()
for atom in non_H_atoms:
non_h_atoms = (molecules.conformations[name]
.get_non_hydrogen_atoms())
for atom in non_h_atoms:
self.protonate_atom(atom)
# fix hydrogen names
#self.set_proton_names(non_H_atoms)
@staticmethod
def remove_all_hydrogen_atoms(molecular_container):
"""Remove all hydrogen atoms from molecule.
return
def remove_all_hydrogen_atoms(self, molecular_container):
Args:
molecular_container: molecule to remove hydrogens from
"""
for name in molecular_container.conformation_names:
molecular_container.conformations[name].atoms = molecular_container.conformations[name].get_non_hydrogen_atoms()
return
molecular_container.conformations[name].atoms = (
molecular_container.conformations[name]
.get_non_hydrogen_atoms())
def set_charge(self, atom):
"""Set charge for atom.
Args:
atom: atom to be charged
"""
# atom is a protein atom
if atom.type=='atom':
key = '%3s-%s'%(atom.res_name, atom.name)
if atom.type == 'atom':
key = '{0:3s}-{1:s}'.format(atom.res_name, atom.name)
if atom.terminal:
debug(atom.terminal)
key=atom.terminal
if key in list(self.standard_charges.keys()):
key = atom.terminal
if key in self.standard_charges:
atom.charge = self.standard_charges[key]
debug('Charge', atom, atom.charge)
atom.charge_set = True
# atom is a ligand atom
elif atom.type=='hetatm':
if atom.sybyl_type in list(self.sybyl_charges.keys()):
elif atom.type == 'hetatm':
if atom.sybyl_type in self.sybyl_charges:
atom.charge = self.sybyl_charges[atom.sybyl_type]
atom.sybyl_type = atom.sybyl_type.replace('-','')
atom.sybyl_type = atom.sybyl_type.replace('-', '')
atom.charge_set = True
return
def protonate_atom(self, atom):
if atom.is_protonated: return
if atom.element == 'H': return
"""Protonate an atom.
Args:
atom: atom to be protonated
"""
if atom.is_protonated:
return
if atom.element == 'H':
return
self.set_charge(atom)
self.set_number_of_protons_to_add(atom)
self.set_steric_number_and_lone_pairs(atom)
self.add_protons(atom)
atom.is_protonated = True
return
def set_proton_names(self, heavy_atoms):
@staticmethod
def set_proton_names(heavy_atoms):
"""Set names for protons.
Args:
heavy_atoms: list of heavy atoms with protons to be named
"""
for heavy_atom in heavy_atoms:
i = 1
for bonded in heavy_atom.bonded_atoms:
if bonded.element == 'H':
bonded.name+='%d'%i
i+=1
return
bonded.name += str(i)
i += 1
def set_number_of_protons_to_add(self, atom):
debug('*'*10)
debug('Setting number of protons to add for',atom)
atom.number_of_protons_to_add = 8
debug(' %4d'%8)
atom.number_of_protons_to_add -= self.valence_electrons[atom.element]
debug('Valence eletrons: %4d'%-self.valence_electrons[atom.element])
atom.number_of_protons_to_add -= len(atom.bonded_atoms)
debug('Number of bonds: %4d'%- len(atom.bonded_atoms))
atom.number_of_protons_to_add -= atom.num_pi_elec_2_3_bonds
debug('Pi electrons: %4d'%-atom.num_pi_elec_2_3_bonds)
atom.number_of_protons_to_add += int(atom.charge)
debug('Charge: %4.1f'%atom.charge)
"""Set the number of protons to add to this atom.
Args:
atom: atom for calculation
"""
debug('*'*10)
debug('Setting number of protons to add for', atom)
atom.number_of_protons_to_add = 8
debug(" 8")
atom.number_of_protons_to_add -= self.valence_electrons[atom.element]
debug('Valence electrons: {0:>4d}'.format(
-self.valence_electrons[atom.element]))
atom.number_of_protons_to_add -= len(atom.bonded_atoms)
debug('Number of bonds: {0:>4d}'.format(-len(atom.bonded_atoms)))
atom.number_of_protons_to_add -= atom.num_pi_elec_2_3_bonds
debug('Pi electrons: {0:>4d}'.format(-atom.num_pi_elec_2_3_bonds))
atom.number_of_protons_to_add += int(atom.charge)
debug('Charge: {0:>4.1f}'.format(atom.charge))
debug('-'*10)
debug(atom.number_of_protons_to_add)
return
def set_steric_number_and_lone_pairs(self, atom):
"""Set steric number and lone pairs for atom.
Args:
atom: atom for calculation
"""
# If we already did this, there is no reason to do it again
if atom.steric_num_lone_pairs_set:
return
debug('='*10)
debug('Setting steric number and lone pairs for',atom)
# costumly set the N backbone atoms up for peptide bond trigonal planer shape
#if atom.name == 'N' and len(atom.bonded_atoms) == 2:
# atom.steric_number = 3
# atom.number_of_lone_pairs = 0
# self.display 'Peptide bond: steric number is %d and number of lone pairs is %s'%(atom.steric_number,
# atom.number_of_lone_pairs)
# return
debug('Setting steric number and lone pairs for', atom)
atom.steric_number = 0
debug('%65s: %4d'%('Valence electrons',self.valence_electrons[atom.element]))
debug('{0:>65s}: {1:>4d}'.format(
'Valence electrons', self.valence_electrons[atom.element]))
atom.steric_number += self.valence_electrons[atom.element]
debug('%65s: %4d'%('Number of bonds',len(atom.bonded_atoms)))
debug('{0:>65s}: {1:>4d}'.format(
'Number of bonds', len(atom.bonded_atoms)))
atom.steric_number += len(atom.bonded_atoms)
debug('%65s: %4d'%('Number of hydrogen atoms to add',atom.number_of_protons_to_add))
debug('{0:>65s}: {1:>4d}'.format(
'Number of hydrogen atoms to add', atom.number_of_protons_to_add))
atom.steric_number += atom.number_of_protons_to_add
debug('%65s: %4d'%('Number of pi-electrons in double and triple bonds(-)',atom.num_pi_elec_2_3_bonds))
debug('{0:>65s}: {1:>4d}'.format(
'Number of pi-electrons in double and triple bonds(-)',
atom.num_pi_elec_2_3_bonds))
atom.steric_number -= atom.num_pi_elec_2_3_bonds
debug('%65s: %4d'%('Number of pi-electrons in conjugated double and triple bonds(-)',atom.num_pi_elec_conj_2_3_bonds))
debug('{0:>65s}: {1:>4d}'.format(
'Number of pi-electrons in conjugated double and triple bonds(-)',
atom.num_pi_elec_conj_2_3_bonds))
atom.steric_number -= atom.num_pi_elec_conj_2_3_bonds
debug('%65s: %4d'%('Number of donated co-ordinated bonds',0))
debug('{0:>65s}: {1:>4d}'.format(
'Number of donated co-ordinated bonds', 0))
atom.steric_number += 0
debug('%65s: %4.1f'%('Charge(-)',atom.charge))
debug('{0:>65s}: {1:>4.1f}'.format(
'Charge(-)', atom.charge))
atom.steric_number -= atom.charge
atom.steric_number = math.floor(atom.steric_number/2.0)
atom.number_of_lone_pairs = atom.steric_number - len(atom.bonded_atoms) - atom.number_of_protons_to_add
atom.number_of_lone_pairs = (
atom.steric_number-len(atom.bonded_atoms)-atom.number_of_protons_to_add)
debug('-'*70)
debug('%65s: %4d'%('Steric number',atom.steric_number))
debug('%65s: %4d'%('Number of lone pairs',atom.number_of_lone_pairs))
debug('{0:>65s}: {1:>4d}'.format(
'Steric number', atom.steric_number))
debug('{0:>65s}: {1:>4d}'.format(
'Number of lone pairs', atom.number_of_lone_pairs))
atom.steric_num_lone_pairs_set = True
return
def add_protons(self, atom):
"""Add protons to atom.
Args:
atom: atom for calculation
"""
# decide which method to use
debug('PROTONATING',atom)
debug('PROTONATING', atom)
if atom.steric_number in list(self.protonation_methods.keys()):
self.protonation_methods[atom.steric_number](atom)
else:
warning('Do not have a method for protonating', atom, '(steric number: %d)' % atom.steric_number)
return
warning('Do not have a method for protonating', atom,
'(steric number: {0:d})'.format(atom.steric_number))
def trigonal(self, atom):
debug('TRIGONAL - %d bonded atoms'%(len(atom.bonded_atoms)))
"""Add hydrogens in trigonal geometry.
Args:
atom: atom to protonate
"""
debug('TRIGONAL - {0:d} bonded atoms'.format(len(atom.bonded_atoms)))
rot_angle = math.radians(120.0)
c = vector(atom1 = atom)
cvec = Vector(atom1=atom)
# 0 bonds
if len(atom.bonded_atoms) == 0:
pass
# 1 bond
if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0:
# Add another atom with the right angle to the first one
a = vector(atom1 = atom, atom2 = atom.bonded_atoms[0])
avec = Vector(atom1=atom, atom2=atom.bonded_atoms[0])
# use plane of bonded trigonal atom - e.g. arg
self.set_steric_number_and_lone_pairs(atom.bonded_atoms[0])
if atom.bonded_atoms[0].steric_number == 3 and len(atom.bonded_atoms[0].bonded_atoms)>1:
# use other atoms bonded to the neighbour to establish the plane, if possible
if (atom.bonded_atoms[0].steric_number == 3
and len(atom.bonded_atoms[0].bonded_atoms) > 1):
# use other atoms bonded to the neighbour to establish the
# plane, if possible
other_atom_indices = []
for i in range(len(atom.bonded_atoms[0].bonded_atoms)):
if atom.bonded_atoms[0].bonded_atoms[i] != atom:
for i, bonded_atom in enumerate(
atom.bonded_atoms[0].bonded_atoms):
if bonded_atom != atom:
other_atom_indices.append(i)
v1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0])
v2 = vector(atom1 = atom.bonded_atoms[0],
atom2 = atom.bonded_atoms[0].bonded_atoms[other_atom_indices[0]])
axis = v1**v2
# this is a trick to make sure that the order of atoms doesn't influence
# the final postions of added protons
if len(other_atom_indices)>1:
v3 = vector(atom1 = atom.bonded_atoms[0],
atom2 = atom.bonded_atoms[0].bonded_atoms[other_atom_indices[1]])
axis2 = v1**v3
if axis * axis2>0:
vec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0])
vec2 = Vector(atom1=atom.bonded_atoms[0],
atom2=atom.bonded_atoms[0]
.bonded_atoms[other_atom_indices[0]])
axis = vec1**vec2
# this is a trick to make sure that the order of atoms doesn't
# influence the final postions of added protons
if len(other_atom_indices) > 1:
vec3 = Vector(atom1=atom.bonded_atoms[0],
atom2=atom.bonded_atoms[0]
.bonded_atoms[other_atom_indices[1]])
axis2 = vec1**vec3
if axis*axis2 > 0:
axis = axis+axis2
else:
axis = axis-axis2
else:
axis = a.orthogonal()
a = rotate_vector_around_an_axis(rot_angle, axis, a)
a = self.set_bond_distance(a, atom.element)
self.add_proton(atom, c+a)
axis = avec.orthogonal()
avec = rotate_vector_around_an_axis(rot_angle, axis, avec)
avec = self.set_bond_distance(avec, atom.element)
self.add_proton(atom, cvec+avec)
# 2 bonds
if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0:
# Add another atom with the right angle to the first two
a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0)
a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0)
avec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0)
avec2 = Vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0)
new_a = -a1 - a2
new_a = -avec1 - avec2
new_a = self.set_bond_distance(new_a, atom.element)
self.add_proton(atom, c+new_a)
return
self.add_proton(atom, cvec+new_a)
def tetrahedral(self, atom):
debug('TETRAHEDRAL - %d bonded atoms'%(len(atom.bonded_atoms)))
"""Protonate atom in tetrahedral geometry.
Args:
atom: atom to protonate.
"""
debug(
'TETRAHEDRAL - {0:d} bonded atoms'.format(len(atom.bonded_atoms)))
# TODO - might be good to move tetrahedral angle to constant
rot_angle = math.radians(109.5)
# sanity check
# if atom.number_of_protons_to_add + len(atom.bonded_atoms) != 4:
# self.display 'Error: Attempting tetrahedral structure with %d bonds'%(atom.number_of_protons_to_add +
# len(atom.bonded_atoms))
c = vector(atom1 = atom)
cvec = Vector(atom1=atom)
# 0 bonds
if len(atom.bonded_atoms) == 0:
pass
# 1 bond
if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0:
# Add another atom with the right angle to the first one
a = vector(atom1 = atom, atom2 = atom.bonded_atoms[0])
axis = a.orthogonal()
a = rotate_vector_around_an_axis(rot_angle, axis, a)
a = self.set_bond_distance(a, atom.element)
self.add_proton(atom, c+a)
avec = Vector(atom1=atom, atom2=atom.bonded_atoms[0])
axis = avec.orthogonal()
avec = rotate_vector_around_an_axis(rot_angle, axis, avec)
avec = self.set_bond_distance(avec, atom.element)
self.add_proton(atom, cvec+avec)
# 2 bonds
if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0:
# Add another atom with the right angle to the first two
a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0)
a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0)
axis = a1 + a2
new_a = rotate_vector_around_an_axis(math.radians(90), axis, -a1)
avec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0)
avec2 = Vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0)
axis = avec1 + avec2
new_a = rotate_vector_around_an_axis(math.radians(90), axis,
-avec1)
new_a = self.set_bond_distance(new_a, atom.element)
self.add_proton(atom, c+new_a)
self.add_proton(atom, cvec+new_a)
# 3 bonds
if len(atom.bonded_atoms) == 3 and atom.number_of_protons_to_add > 0:
a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0)
a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0)
a3 = vector(atom1 = atom, atom2 = atom.bonded_atoms[2]).rescale(1.0)
new_a = -a1-a2-a3
avec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0)
avec2 = Vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0)
avec3 = Vector(atom1=atom, atom2=atom.bonded_atoms[2]).rescale(1.0)
new_a = -avec1-avec2-avec3
new_a = self.set_bond_distance(new_a, atom.element)
self.add_proton(atom, c+new_a)
self.add_proton(atom, cvec+new_a)
return
@staticmethod
def add_proton(atom, position):
"""Add a proton to an atom at a specific position.
def add_proton(self, atom, position):
Args:
atom: atom to protonate
position: position for proton
"""
# Create the new proton
new_H = propka.atom.Atom()
new_H.set_property(numb = None,
name = 'H%s'%atom.name[1:],
res_name = atom.res_name,
chain_id = atom.chain_id,
res_num = atom.res_num,
x = round(position.x,3), # round of to three digimal points
y = round(position.y,3), # to avoid round-off differences
z = round(position.z,3), # when input file
occ = None,
beta = None)
new_H.element = 'H'
new_H.type = atom.type
new_H.bonded_atoms = [atom]
new_H.charge = 0
new_H.steric_number = 0
new_H.number_of_lone_pairs = 0
new_H.number_of_protons_to_add = 0
new_H.num_pi_elec_2_3_bonds = 0
new_H.is_protonates = True
atom.bonded_atoms.append(new_H)
atom.number_of_protons_to_add -=1
atom.conformation_container.add_atom(new_H)
new_h = propka.atom.Atom()
new_h.set_property(
numb=None,
name='H{0:s}'.format(atom.name[1:]),
res_name=atom.res_name,
chain_id=atom.chain_id,
res_num=atom.res_num,
x=round(position.x, 3), # round of to three decimal points to
# avoid round-off differences in input file
y=round(position.y, 3),
z=round(position.z, 3),
occ=None,
beta=None)
new_h.element = 'H'
new_h.type = atom.type
new_h.bonded_atoms = [atom]
new_h.charge = 0
new_h.steric_number = 0
new_h.number_of_lone_pairs = 0
new_h.number_of_protons_to_add = 0
new_h.num_pi_elec_2_3_bonds = 0
new_h.is_protonates = True
atom.bonded_atoms.append(new_h)
atom.number_of_protons_to_add -= 1
atom.conformation_container.add_atom(new_h)
# update names of all protons on this atom
new_H.residue_label = "%-3s%4d%2s" % (new_H.name,new_H.res_num, new_H.chain_id)
new_h.residue_label = "{0:<3s}{1:>4d}{2:>2s}".format(
new_h.name, new_h.res_num, new_h.chain_id)
no_protons = atom.count_bonded_elements('H')
if no_protons > 1:
i = 1
for proton in atom.get_bonded_elements('H'):
proton.name = 'H%s%d'%(atom.name[1:],i)
proton.residue_label = "%-3s%4d%2s" % (proton.name,proton.res_num, proton.chain_id)
i+=1
proton.name = 'H{0:s}{1:d}'.format(
atom.name[1:], i)
proton.residue_label = "{0:<3s}{1:>4d}{2:>2s}".format(
proton.name, proton.res_num, proton.chain_id)
i += 1
debug('added', new_h, 'to', atom)
def set_bond_distance(self, bvec, element):
"""Set bond distance between atom and element.
debug('added',new_H, 'to',atom)
return
def set_bond_distance(self, a, element):
d = 1.0
Args:
bvec: bond vector
element: bonded element
Returns:
scaled bond vector
"""
dist = 1.0
if element in list(self.bond_lengths.keys()):
d = self.bond_lengths[element]
dist = self.bond_lengths[element]
else:
warning('Bond length for %s not found, using the standard value of %f' % (element, d))
a = a.rescale(d)
return a
if __name__ == '__main__':
import protein, pdb, sys,os
arguments = sys.argv
if len(arguments) != 2:
info('Usage: protonate.py <pdb_file>')
sys.exit(0)
filename = arguments[1]
if not os.path.isfile(filename):
info('Error: Could not find \"%s\"' % filename)
sys.exit(1)
p = Protonate()
pdblist = pdb.readPDB(filename)
my_protein = protein.Protein(pdblist,'test.pdb')
p.remove_all_hydrogen_atoms_from_protein(my_protein)
my_protein.writePDB('before_protonation.pdb')
p.protonate_protein(my_protein)
## write out protonated file
my_protein.writePDB('protonated.pdb')
str_ = (
'Bond length for {0:s} not found, using the standard value '
'of {1:f}'.format(element, dist))
warning(str_)
bvec = bvec.rescale(dist)
return bvec

View File

@@ -1,24 +1,24 @@
# entry point for propka script
"""Entry point for PROPKA script."""
import logging
import propka.lib, propka.molecular_container
from propka.lib import loadOptions
from propka.molecular_container import Molecular_container
_LOGGER = logging.getLogger("PROPKA")
def main():
"""
Reads in structure files, calculates pKa values, and prints pKa files
"""
# loading options, flaggs and arguments
options = propka.lib.loadOptions()
def main(optargs=None):
"""Read in structure files, calculate pKa values, and print pKa files."""
# loading options, flags and arguments
optargs = optargs if optargs is not None else []
options = loadOptions(*optargs)
pdbfiles = options.filenames
for pdbfile in pdbfiles:
my_molecule = propka.molecular_container.Molecular_container(pdbfile, options)
my_molecule = Molecular_container(pdbfile, options)
my_molecule.calculate_pka()
my_molecule.write_pka()
def single(pdbfile, optargs=None):
"""Run a single PROPKA calculation using *pdbfile* as input.
@@ -27,15 +27,15 @@ def single(pdbfile, optargs=None):
.. rubric:: Example
::
single("protein.pdb", optargs=["--mutation=N25R/N181D", "-v", "--pH=7.2"])
single("protein.pdb", optargs=["--mutation=N25R/N181D", "-v",
"--pH=7.2"])
"""
optargs = optargs if optargs is not None else []
options = propka.lib.loadOptions(*optargs)
options = loadOptions(*optargs)
pdbfile = options.filenames.pop(0)
if len(options.filenames) > 0:
_LOGGER.warning("Ignoring filenames: %s", options.filenames)
my_molecule = propka.molecular_container.Molecular_container(pdbfile, options)
_LOGGER.warning("Ignoring filenames: {0:s}".format(options.filenames))
my_molecule = Molecular_container(pdbfile, options)
my_molecule.calculate_pka()
my_molecule.write_pka()
return my_molecule

View File

@@ -1,11 +1,21 @@
from __future__ import division
from __future__ import print_function
"""Vector algebra for PROPKA."""
import math
from propka.lib import info
from propka.lib import info, get_sorted_configurations
class vector:
""" Vector """
def __init__(self, xi=0.0, yi=0.0, zi=0.0, atom1 = 0, atom2 = 0):
class Vector:
"""Vector"""
def __init__(self, xi=0.0, yi=0.0, zi=0.0, atom1=None, atom2=None):
"""Initialize vector.
Args:
xi: default x-coordinate
yi: default y-coordinate
zi: default z-coordinate
atom1: atom center used to define default coordinate
atom2: two atom centers used to define vector
"""
self.x = xi
self.y = yi
self.z = zi
@@ -22,224 +32,280 @@ class vector:
self.y = atom2.y - self.y
self.z = atom2.z - self.z
return
def __add__(self, other):
return vector(self.x + other.x,
return Vector(self.x + other.x,
self.y + other.y,
self.z + other.z)
def __sub__(self, other):
return vector(self.x - other.x,
return Vector(self.x - other.x,
self.y - other.y,
self.z - other.z)
def __mul__(self, other):
""" Dot product, scalar and matrix multiplication """
if isinstance(other,vector):
"""Dot product, scalar and matrix multiplication."""
if isinstance(other, Vector):
return self.x * other.x + self.y * other.y + self.z * other.z
elif isinstance(other, matrix4x4):
return vector(
xi = other.a11*self.x + other.a12*self.y + other.a13*self.z + other.a14*1.0,
yi = other.a21*self.x + other.a22*self.y + other.a23*self.z + other.a24*1.0,
zi = other.a31*self.x + other.a32*self.y + other.a33*self.z + other.a34*1.0
elif isinstance(other, Matrix4x4):
return Vector(
xi=other.a11*self.x + other.a12*self.y + other.a13*self.z
+ other.a14*1.0,
yi=other.a21*self.x + other.a22*self.y + other.a23*self.z
+ other.a24*1.0,
zi=other.a31*self.x + other.a32*self.y + other.a33*self.z
+ other.a34*1.0
)
elif type(other) in [int, float]:
return vector(self.x * other, self.y * other, self.z * other)
return Vector(self.x * other, self.y * other, self.z * other)
else:
info('%s not supported' % type(other))
info('{0:s} not supported'.format(type(other)))
raise TypeError
def __rmul__(self,other):
return self.__mul__(other)
def __rmul__(self, other):
return self.__mul__(other)
def __pow__(self, other):
""" Cross product """
return vector(self.y * other.z - self.z * other.y,
"""Cross product."""
return Vector(self.y * other.z - self.z * other.y,
self.z * other.x - self.x * other.z,
self.x * other.y - self.y * other.x)
def __neg__(self):
res = vector(xi = -self.x,
yi = -self.y,
zi = -self.z)
res = Vector(xi=-self.x,
yi=-self.y,
zi=-self.z)
return res
def sq_length(self):
"""Return vector squared-length"""
return self.x * self.x + self.y * self.y + self.z * self.z
def length(self):
"""Return vector length."""
return math.sqrt(self.sq_length())
def __str__(self):
return '%10.4f %10.4f %10.4f'%(self.x, self.y, self.z)
return '{0:>10.4f} {1:>10.4f} {2:>10.4f}'.format(
self.x, self.y, self.z)
def __repr__(self):
return '<vector>'
def orthogonal(self):
""" Returns a vector orthogonal to self """
res = vector(self.y, -self.x, 0)
res = Vector(self.y, -self.x, 0)
if abs(self.y) < abs(self.z):
res = vector(self.z, 0, -self.x)
res = Vector(self.z, 0, -self.x)
return res
def rescale(self, new_length):
""" Rescale vector to new length while preserving direction """
frac = new_length/(self.length())
res = vector(xi = self.x*frac,
yi = self.y*frac,
zi = self.z*frac)
res = Vector(xi=self.x*frac, yi=self.y*frac, zi=self.z*frac)
return res
class matrix4x4:
class Matrix4x4:
"""A 4-by-4 matrix class."""
def __init__(self,
a11i=0.0, a12i=0.0, a13i=0.0, a14i=0.0,
a21i=0.0, a22i=0.0, a23i=0.0, a24i=0.0,
a31i=0.0, a32i=0.0, a33i=0.0, a34i=0.0,
a41i=0.0, a42i=0.0, a43i=0.0, a44i=0.0):
"""Initialize with matrix elements."""
# Row 1
self.a11 = a11i
self.a12 = a12i
self.a13 = a13i
self.a14 = a14i
# Row 2
self.a21 = a21i
self.a22 = a22i
self.a23 = a23i
self.a24 = a24i
# Row 3
self.a31 = a31i
self.a32 = a32i
self.a33 = a33i
self.a34 = a34i
# Row 4
self.a41 = a41i
self.a42 = a42i
self.a43 = a43i
self.a44 = a44i
return
def angle(avec, bvec):
"""Get the angle between two vectors.
Args:
avec: vector 1
bvec: vector 2
Returns:
angle in radians
"""
dot = avec * bvec
return math.acos(dot / (avec.length() * bvec.length()))
def angle_degrees(avec, bvec):
"""Get the angle between two vectors in degrees.
Args:
avec: vector 1
bvec: vector 2
Returns:
angle in degrees
"""
return math.degrees(angle(avec, bvec))
def signed_angle_around_axis(avec, bvec, axis):
"""Get signed angle of two vectors around axis in radians.
# methods working on vectors
Args:
avec: vector 1
bvec: vector 2
axis: axis
Returns:
angle in radians
"""
norma = avec**axis
normb = bvec**axis
ang = angle(norma, normb)
dot_ = bvec*(avec**axis)
if dot_ < 0:
ang = -ang
return ang
def angle(a, b):
dot = a * b
return math.acos(dot / (a.length() * b.length()))
def rotate_vector_around_an_axis(theta, axis, vec):
"""Rotate vector around an axis.
def angle_degrees(a,b):
return math.degrees(angle(a, b))
def signed_angle_around_axis(a,b, axis):
na = a**axis
nb = b**axis
v = angle(na,nb)
d = b*(a**axis)
if d < 0:
v =-v
return v
def signed_angle_degrees(a,b):
return 180/math.pi * signed_angle(a, b)
def rotate_vector_around_an_axis(theta, axis, v):
#print "# 1. rotate space about the z-axis so that the rotation axis lies in the xz-plane"
gamma = 0.0
if axis.y != 0:
if axis.x != 0:
gamma = -axis.x/abs(axis.x)*math.asin(axis.y/(math.sqrt(axis.x*axis.x + axis.y*axis.y)))
else:
gamma = math.pi/2.0
Rz = rotate_atoms_around_z_axis(gamma)
v = Rz * v
axis = Rz * axis
#print "# 2. rotate space about the y-axis so that the rotation axis lies along the z-axis"
beta = 0.0
Args:
theta: rotation angle (in radians)
axis: axis for rotation
vec: vector to rotate
Returns:
rotated vector
"""
gamma = 0.0
if axis.y != 0:
if axis.x != 0:
beta = -axis.x/abs(axis.x)*math.acos(axis.z/math.sqrt(axis.x*axis.x + axis.z*axis.z))
Ry = rotate_atoms_around_y_axis(beta)
v = Ry * v
axis = Ry *axis
gamma = -axis.x/abs(axis.x)*math.asin(
axis.y/(math.sqrt(axis.x*axis.x + axis.y*axis.y)))
else:
gamma = math.pi/2.0
rot_z = rotate_atoms_around_z_axis(gamma)
vec = rot_z * vec
axis = rot_z * axis
beta = 0.0
if axis.x != 0:
beta = -axis.x/abs(axis.x)*math.acos(
axis.z/math.sqrt(axis.x*axis.x + axis.z*axis.z))
rot_y = rotate_atoms_around_y_axis(beta)
vec = rot_y * vec
axis = rot_y * axis
rot_z = rotate_atoms_around_z_axis(theta)
vec = rot_z * vec
rot_y = rotate_atoms_around_y_axis(-beta)
vec = rot_y * vec
rot_z = rotate_atoms_around_z_axis(-gamma)
vec = rot_z * vec
return vec
#print "# 3. perform the desired rotation by theta about the z-axis"
Rz = rotate_atoms_around_z_axis(theta)
v = Rz * v
#print "# 4. apply the inverse of step 2."
Ry = rotate_atoms_around_y_axis(-beta)
v = Ry * v
def rotate_atoms_around_z_axis(theta):
"""Get rotation matrix for z-axis.
#print "# 5. apply the inverse of step 1."
Rz = rotate_atoms_around_z_axis(-gamma)
v = Rz * v
return v
def rotate_atoms_around_z_axis(angle):
Rz = matrix4x4(
a11i = math.cos(angle), a12i = -math.sin(angle), a13i = 0.0, a14i = 0.0,
a21i = math.sin(angle), a22i = math.cos(angle), a23i = 0.0, a24i = 0.0,
a31i = 0.0 , a32i = 0.0 , a33i = 1.0, a34i = 0.0,
a41i = 0.0 , a42i = 0.0 , a43i = 0.0, a44i = 1.0
Args:
theta: angle of rotation (radians)
Returns:
rotation matrix
"""
return Matrix4x4(
a11i=math.cos(theta),
a12i=-math.sin(theta),
a13i=0.0,
a14i=0.0,
a21i=math.sin(theta),
a22i=math.cos(theta),
a23i=0.0,
a24i=0.0,
a31i=0.0,
a32i=0.0,
a33i=1.0,
a34i=0.0,
a41i=0.0,
a42i=0.0,
a43i=0.0,
a44i=1.0
)
return Rz
def rotate_atoms_around_y_axis(theta):
"""Get rotation matrix for y-axis.
def rotate_atoms_around_y_axis(angle):
Ry = matrix4x4(
a11i = math.cos(angle), a12i = 0.0, a13i = math.sin(angle), a14i = 0.0,
a21i = 0.0 , a22i = 1.0, a23i = 0.0 , a24i = 0.0,
a31i = -math.sin(angle), a32i = 0.0, a33i = math.cos(angle), a34i = 0.0,
a41i = 0.0 , a42i = 0.0, a43i = 0.0 , a44i = 1.0
Args:
theta: angle of rotation (radians)
Returns:
rotation matrix
"""
return Matrix4x4(
a11i=math.cos(theta),
a12i=0.0,
a13i=math.sin(theta),
a14i=0.0,
a21i=0.0,
a22i=1.0,
a23i=0.0,
a24i=0.0,
a31i=-math.sin(theta),
a32i=0.0,
a33i=math.cos(theta),
a34i=0.0,
a41i=0.0,
a42i=0.0,
a43i=0.0,
a44i=1.0
)
return Ry
class MultiVector:
"""Collection of vectors for multiple configurations of atoms.
TODO - this class does not appear to be used or covered by tests
"""
class multi_vector:
def __init__(self, atom1=0, atom2=0):
def __init__(self, atom1=None, atom2=None):
"""Initialize with atom configurations.
Args:
atom1: first atom to define vector
atom2: second atom to define vector
"""
self.vectors = []
self.keys = []
self.result = None
# store vectors for all configurations of atoms
if atom1!=0:
self.keys = lib.get_sorted_configurations(atom1.configurations.keys())
if atom2!=0:
keys2 = lib.get_sorted_configurations(atom2.configurations.keys())
if atom1 is not None:
self.keys = get_sorted_configurations(atom1.configurations.keys())
if atom2 is not None:
keys2 = get_sorted_configurations(atom2.configurations.keys())
if self.keys != keys2:
raise 'Cannot make multi vector: Atomic configurations mismatch for\n %s\n %s\n'%(atom1,atom2)
str_ = ('Cannot make multi vector: Atomic configurations '
'mismatch for\n {0:s}\n {1:s}\n'.format(
atom1, atom2))
raise KeyError(str_)
for key in self.keys:
atom1.setConfiguration(key)
if atom2!=0:
if atom2 != 0:
atom2.setConfiguration(key)
v = vector(atom1=atom1, atom2=atom2)
self.vectors.append(v)
#info(key,v)
return
vec = Vector(atom1=atom1, atom2=atom2)
self.vectors.append(vec)
def __getattribute__(self,name):
def __getattribute__(self, name):
try:
return object.__getattribute__(self, name)
except AttributeError:
@@ -247,72 +313,103 @@ class multi_vector:
def __str__(self):
res = ''
for i in range(len(self.keys)):
res += '%s %s\n'%(self.keys[i], self.vectors[i])
for i, key in enumerate(self.keys):
res += '{0:s} {1:s}\n'.format(key, self.vectors[i])
return res
def do_job(self, job):
#info(job)
self.res = multi_vector()
for i in range(len(self.vectors)):
self.res.vectors.append(eval('self.vectors[%d].%s()'%(i,job)))
self.res.keys.append(self.keys[i])
"""Append vectors to configuration.
Args:
job: name of function to apply to vectors
Returns:
TODO - figure out what this is
"""
self.result = MultiVector()
for i, vector in enumerate(self.vectors):
func = getattr(vector, job)
self.result.vectors.append(func())
self.result.keys.append(self.keys[i])
return self.get_result
@property
def get_result(self):
return self.res
"""Return the latest result."""
return self.result
def generic_operation(self, operation, other):
if self.keys != other.keys:
raise 'Incompatable keys'
"""Perform a generic operation between two MultiVector objects.
self.res = multi_vector()
Args:
operation: operation to perform (string)
other: other MultiVector object
"""
if self.keys != other.keys:
raise 'Incompatible keys'
self.result = MultiVector()
for i in range(len(self.vectors)):
self.res.vectors.append(eval('self.vectors[%d] %s other.vectors[%d]'%(i,operation,i)))
self.res.keys.append(self.keys[i])
return
self.result.vectors.append(
# TODO - eliminate eval() or entire class
eval(
'self.vectors[{0:d}] {1:s} other.vectors[{2:d}]'.format(
i, operation, i)))
self.result.keys.append(self.keys[i])
def __add__(self, other):
self.generic_operation('+',other)
return self.res
self.generic_operation('+', other)
return self.result
def __sub__(self, other):
self.generic_operation('-',other)
return self.res
self.generic_operation('-', other)
return self.result
def __mul__(self, other):
self.generic_operation('*',other)
return self.res
self.generic_operation('*', other)
return self.result
def __pow__(self, other):
self.generic_operation('**',other)
return self.res
self.generic_operation('**', other)
return self.result
def generic_self_operation(self, operation):
@staticmethod
def generic_self_operation(_):
"""TODO - delete this."""
return
def __neg__(self):
self.generic_operation('*',-1.0)
return self.res
self.generic_operation('*', -1.0)
return self.result
def rescale(self, new_length):
self.res = multi_vector()
for i in range(len(self.vectors)):
self.res.vectors.append(self.vectors[i].rescale(new_length))
self.res.keys.append(self.keys[i])
"""Rescale multi-vector to new length.
Args:
new_length: new length for multi-vector
Result:
MultiVector object
"""
self.result = MultiVector()
for i, vector in enumerate(self.vectors):
self.result.vectors.append(vector.rescale(new_length))
self.result.keys.append(self.keys[i])
return self.res
def rotate_multi_vector_around_an_axis(theta, axis, v):
""" both axis ans v must be multi_vectors """
def rotate_multi_vector_around_an_axis(theta, axis, vec):
"""Rotate a multi-vector around an axis.
if axis.keys != v.keys:
raise 'Incompatible keys in rotate multi_vector'
res = multi_vector()
for i in range(len(v.keys)):
res.vectors.append(rotate_vector_around_an_axis(theta, axis.vectors[i], v.vectors[i]))
res.keys.append(v.keys[i])
NOTE - both axis ans v must be MultiVectors.
Args:
theta: angle (in radians)
axis: multi-vector axis
vec: multi-vector vector
"""
if axis.keys != vec.keys:
raise 'Incompatible keys in rotate MultiVector'
res = MultiVector()
for i, key in enumerate(vec.keys):
res.vectors.append(rotate_vector_around_an_axis(
theta, axis.vectors[i], vec.vectors[i]))
res.keys.append(key)
return res

View File

@@ -1,216 +1,290 @@
from __future__ import division
from __future__ import print_function
import math
import sys, os
"""Contains version-specific methods and parameters.
import propka.lib as lib
from propka.lib import info, warning
import propka.calculations as calculations
import propka.parameters
TODO - this module unnecessarily confuses the code. Can we eliminate it?
"""
from propka.lib import info
import propka.calculations as calcs
class version:
def __init__(self,parameters):
class Version:
"""Store version-specific methods and parameters."""
def __init__(self, parameters):
self.parameters = parameters
return
self.desolvation_model = self.empty_function
self.weight_pair_method = self.empty_function
self.hydrogen_bond_interaction_model = self.empty_function
self.sidechain_interaction_model = self.empty_function
self.electrostatic_interaction_model = self.empty_function
self.coulomb_interaction_model = self.empty_function
self.check_coulomb_pair_method = self.empty_function
self.backbone_reorganisation_method = self.empty_function
self.exception_check_method = self.empty_function
self.molecular_preparation_method = self.empty_function
self.prepare_bonds = self.empty_function
@staticmethod
def empty_function(*args):
"""Placeholder function so we don't use uninitialized variables.
Args:
args: whatever arguments would have been passed to the function
Raises:
NotImplementedError
"""
err = "Called an empty Version function with args {0:s}".format(args)
raise NotImplementedError(err)
# desolvation
def calculate_desolvation(self, group):
"""Calculate desolvation energy using assigned model."""
return self.desolvation_model(self.parameters, group)
def calculatePairWeight(self, Nmass1, Nmass2):
return self.weight_pair_method(self.parameters, Nmass1, Nmass2)
def calculate_pair_weight(self, num_volume1, num_volume2):
"""Calculate pair weight using assigned model."""
return self.weight_pair_method(
self.parameters, num_volume1, num_volume2)
# side chains
def hydrogen_bond_interaction(self, group1, group2):
"""Calculate H-bond energy using assigned model."""
return self.hydrogen_bond_interaction_model(group1, group2, self)
def calculateSideChainEnergy(self, distance, dpka_max, cutoff, weight, f_angle):
return self.sidechain_interaction_model(distance, dpka_max, cutoff, f_angle) # weight is ignored in 3.0 Sep07
def calculate_side_chain_energy(self, distance, dpka_max, cutoff, _,
f_angle):
"""Calculate sidechain energy using assigned model."""
return self.sidechain_interaction_model(
distance, dpka_max, cutoff, f_angle)
# coulomb
def electrostatic_interaction(self, group1, group2, distance):
return self.electrostatic_interaction_model(group1, group2, distance, self)
"""Calculate electrostatic energy using assigned model."""
return self.electrostatic_interaction_model(
group1, group2, distance, self)
def calculateCoulombEnergy(self, distance, weight):
return self.coulomb_interaction_model(distance, weight, self.parameters)
def calculate_coulomb_energy(self, distance, weight):
"""Calculate Coulomb energy using assigned model."""
return self.coulomb_interaction_model(
distance, weight, self.parameters)
def checkCoulombPair(self, group1, group2, distance):
return self.check_coulomb_pair_method(self.parameters, group1, group2, distance)
def check_coulomb_pair(self, group1, group2, distance):
"""Check Coulomb pair using assigned model."""
return self.check_coulomb_pair_method(
self.parameters, group1, group2, distance)
# backbone re-organisation
def calculateBackBoneReorganization(self, conformation):
return self.backbone_reorganisation_method(self.parameters, conformation)
def calculate_backbone_reorganization(self, conformation):
"""Calculate backbone reorganization using assigned model."""
return self.backbone_reorganisation_method(
self.parameters, conformation)
# exceptions
def checkExceptions(self, group1, group2):
def check_exceptions(self, group1, group2):
"""Calculate exceptions using assigned model."""
return self.exception_check_method(self, group1, group2)
def setup_bonding_and_protonation(self, molecular_container):
return self.molecular_preparation_method(self.parameters, molecular_container)
"""Setup bonding and protonation using assigned model."""
return self.molecular_preparation_method(
self.parameters, molecular_container)
def setup_bonding(self, molecular_container):
"""Setup bonding using assigned model."""
return self.prepare_bonds(self.parameters, molecular_container)
class VersionA(Version):
"""TODO - figure out what this is."""
class version_A(version):
def __init__(self, parameters):
"""Initialize object with parameters."""
# set the calculation rutines used in this version
version.__init__(self, parameters)
# atom naming, bonding, and protonation
self.molecular_preparation_method = propka.calculations.setup_bonding_and_protonation
self.prepare_bonds = propka.calculations.setup_bonding
# desolvation related methods
self.desolvation_model = calculations.radial_volume_desolvation
self.weight_pair_method = calculations.calculatePairWeight
# side chain methods
self.sidechain_interaction_model = propka.calculations.HydrogenBondEnergy
self.hydrogen_bond_interaction_model = propka.calculations.hydrogen_bond_interaction
# colomb methods
self.electrostatic_interaction_model = propka.calculations.electrostatic_interaction
self.check_coulomb_pair_method = propka.calculations.checkCoulombPair
self.coulomb_interaction_model = propka.calculations.CoulombEnergy
#backbone
self.backbone_interaction_model = propka.calculations.HydrogenBondEnergy
self.backbone_reorganisation_method = propka.calculations.BackBoneReorganization
# exception methods
self.exception_check_method = propka.calculations.checkExceptions
return
super().__init__(parameters)
self.molecular_preparation_method = calcs.setup_bonding_and_protonation
self.prepare_bonds = calcs.setup_bonding
self.desolvation_model = calcs.radial_volume_desolvation
self.weight_pair_method = calcs.calculate_pair_weight
self.sidechain_interaction_model = calcs.hydrogen_bond_energy
self.hydrogen_bond_interaction_model = calcs.hydrogen_bond_interaction
self.electrostatic_interaction_model = calcs.electrostatic_interaction
self.check_coulomb_pair_method = calcs.check_coulomb_pair
self.coulomb_interaction_model = calcs.coulomb_energy
self.backbone_interaction_model = calcs.hydrogen_bond_energy
self.backbone_reorganisation_method = calcs.backbone_reorganization
self.exception_check_method = calcs.check_exceptions
def get_hydrogen_bond_parameters(self, atom1, atom2):
"""Get hydrogen bond parameters for two atoms.
Args:
atom1: first atom
atom2: second atom
Returns:
[dpka_max, cutoff]
"""
dpka_max = self.parameters.sidechain_interaction
cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type)
cutoff = self.parameters.sidechain_cutoffs.get_value(
atom1.group_type, atom2.group_type)
return [dpka_max, cutoff]
def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom):
"""Get hydrogen bond parameters between backbone atom and other atom.
Args:
backbone_atom: backbone atom
atom: other atom
Returns
[v, [c1, c3]] TODO - figure out what this is
"""
if backbone_atom.group_type == 'BBC':
if atom.group_type in self.parameters.backbone_CO_hydrogen_bond.keys():
[v,c1,c2] = self.parameters.backbone_CO_hydrogen_bond[atom.group_type]
return [v,[c1,c2]]
[v, c1, c2] = self.parameters.backbone_CO_hydrogen_bond[
atom.group_type]
return [v, [c1, c2]]
if backbone_atom.group_type == 'BBN':
if atom.group_type in self.parameters.backbone_NH_hydrogen_bond.keys():
[v,c1,c2] = self.parameters.backbone_NH_hydrogen_bond[atom.group_type]
return [v,[c1,c2]]
[v, c1, c2] = self.parameters.backbone_NH_hydrogen_bond[
atom.group_type]
return [v, [c1, c2]]
return None
class SimpleHB(VersionA):
"""A simple hydrogen bond version."""
class simple_hb(version_A):
def __init__(self, parameters):
"""Initialize object with parameters."""
# set the calculation rutines used in this version
version_A.__init__(self, parameters)
super().__init__(parameters)
info('Using simple hb model')
return
def get_hydrogen_bond_parameters(self, atom1, atom2):
return self.parameters.hydrogen_bonds.get_value(atom1.element, atom2.element)
"""Get hydrogen bond parameters for two atoms.
Args:
atom1: first atom
atom2: second atom
Returns:
[dpka_max, cutoff]
"""
return self.parameters.hydrogen_bonds.get_value(
atom1.element, atom2.element)
def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom):
return self.parameters.hydrogen_bonds.get_value(backbone_atom.element, atom.element)
"""Get hydrogen bond parameters between backbone atom and other atom.
Args:
backbone_atom: backbone atom
atom: other atom
Returns
[v, [c1, c3]] TODO - figure out what this is
"""
return self.parameters.hydrogen_bonds.get_value(
backbone_atom.element, atom.element)
class ElementBasedLigandInteractions(VersionA):
"""TODO - figure out what this is."""
class element_based_ligand_interactions(version_A):
def __init__(self, parameters):
"""Initialize object with parameters."""
# set the calculation rutines used in this version
version_A.__init__(self, parameters)
super().__init__(parameters)
info('Using detailed SC model!')
return
def get_hydrogen_bond_parameters(self, atom1, atom2):
if not 'hetatm' in [atom1.type, atom2.type]:
# this is a protein-protein interaction
dpka_max = self.parameters.sidechain_interaction.get_value(atom1.group_type, atom2.group_type)
cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type)
return [dpka_max, cutoff]
"""Get hydrogen bond parameters for two atoms.
Args:
atom1: first atom
atom2: second atom
Returns:
[dpka_max, cutoff]
"""
if 'hetatm' not in [atom1.type, atom2.type]:
# this is a protein-protein interaction
dpka_max = self.parameters.sidechain_interaction.get_value(
atom1.group_type, atom2.group_type)
cutoff = self.parameters.sidechain_cutoffs.get_value(
atom1.group_type, atom2.group_type)
return [dpka_max, cutoff]
# at least one ligand atom is involved in this interaction
# make sure that we are using the heavy atoms for finding paramters
elements = []
for a in [atom1, atom2]:
if a.element == 'H': elements.append(a.bonded_atoms[0].element)
else: elements.append(a.element)
return self.parameters.hydrogen_bonds.get_value(elements[0], elements[1])
for atom in [atom1, atom2]:
if atom.element == 'H':
elements.append(atom.bonded_atoms[0].element)
else:
elements.append(atom.element)
return self.parameters.hydrogen_bonds.get_value(
elements[0], elements[1])
def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom):
"""Get hydrogen bond parameters between backbone atom and other atom.
Args:
backbone_atom: backbone atom
atom: other atom
Returns
[v, [c1, c3]] TODO - figure out what this is
"""
if atom.type == 'atom':
# this is a backbone-protein interaction
if backbone_atom.group_type == 'BBC' and\
atom.group_type in self.parameters.backbone_CO_hydrogen_bond.keys():
[v,c1,c2] = self.parameters.backbone_CO_hydrogen_bond[atom.group_type]
return [v,[c1,c2]]
if (backbone_atom.group_type == 'BBC'
and atom.group_type
in self.parameters.backbone_CO_hydrogen_bond.keys()):
[v, c1, c2] = self.parameters.backbone_CO_hydrogen_bond[
atom.group_type]
return [v, [c1, c2]]
if backbone_atom.group_type == 'BBN' and\
atom.group_type in self.parameters.backbone_NH_hydrogen_bond.keys():
[v,c1,c2] = self.parameters.backbone_NH_hydrogen_bond[atom.group_type]
return [v,[c1,c2]]
if (backbone_atom.group_type == 'BBN'
and atom.group_type
in self.parameters.backbone_NH_hydrogen_bond.keys()):
[v, c1, c2] = self.parameters.backbone_NH_hydrogen_bond[
atom.group_type]
return [v, [c1, c2]]
else:
# this is a backbone-ligand interaction
# make sure that we are using the heavy atoms for finding paramters
elements = []
for a in [backbone_atom, atom]:
if a.element == 'H': elements.append(a.bonded_atoms[0].element)
else: elements.append(a.element)
res = self.parameters.hydrogen_bonds.get_value(elements[0], elements[1])
for atom2 in [backbone_atom, atom]:
if atom2.element == 'H':
elements.append(atom2.bonded_atoms[0].element)
else:
elements.append(atom2.element)
res = self.parameters.hydrogen_bonds.get_value(
elements[0], elements[1])
if not res:
info('Could not determine backbone interaction parameters for:',
backbone_atom, atom)
return
info(
'Could not determine backbone interaction parameters for:',
backbone_atom, atom)
return None
return None
class Propka30(Version):
"""Version class for PROPKA 3.0."""
class propka30(version):
def __init__(self, parameters):
# set the calculation rutines used in this version
version.__init__(self, parameters)
# atom naming, bonding, and protonation
self.molecular_preparation_method = propka.calculations.setup_bonding_and_protonation_30_style
# desolvation related methods
self.desolvation_model = calculations.radial_volume_desolvation
self.weight_pair_method = calculations.calculatePairWeight
# side chain methods
self.sidechain_interaction_model = propka.calculations.HydrogenBondEnergy
# colomb methods
self.check_coulomb_pair_method = propka.calculations.checkCoulombPair
self.coulomb_interaction_model = propka.calculations.CoulombEnergy
#backbone
self.backbone_reorganisation_method = propka.calculations.BackBoneReorganization
# exception methods
self.exception_check_method = propka.calculations.checkExceptions
return
"""Initialize object with parameters."""
# set the calculation routines used in this version
super().__init__(parameters)
self.molecular_preparation_method = (
calcs.setup_bonding_and_protonation_30_style)
self.desolvation_model = calcs.radial_volume_desolvation
self.weight_pair_method = calcs.calculate_pair_weight
self.sidechain_interaction_model = calcs.hydrogen_bond_energy
self.check_coulomb_pair_method = calcs.check_coulomb_pair
self.coulomb_interaction_model = calcs.coulomb_energy
self.backbone_reorganisation_method = calcs.backbone_reorganization
self.exception_check_method = calcs.check_exceptions
def get_hydrogen_bond_parameters(self, atom1, atom2):
dpka_max = self.parameters.sidechain_interaction.get_value(atom1.group_type, atom2.group_type)
cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type)
"""Get hydrogen bond parameters for two atoms.
Args:
atom1: first atom
atom2: second atom
Returns:
[dpka_max, cutoff]
"""
dpka_max = self.parameters.sidechain_interaction.get_value(
atom1.group_type, atom2.group_type)
cutoff = self.parameters.sidechain_cutoffs.get_value(
atom1.group_type, atom2.group_type)
return [dpka_max, cutoff]

View File

@@ -1,32 +1,30 @@
#!/usr/bin/env python
"""PROPKA script.
# This is the original propka script. However, this distribute-based
# installation moved the main() function into propka.run.main and just
# generates a script called propka31 from the setup.py installation
# script. You should not need to use this script.
#
# (Also note that there can be import problems because the script name
# is the same as the module name; that's why the new script is called
# propka31.)
This is the original propka script. However, this distribute-based
installation moved the main() function into propka.run.main and just
generates a script called propka31 from the setup.py installation
script. You should not need to use this script.
(Also note that there can be import problems because the script name
is the same as the module name; that's why the new script is called
propka31.)
"""
from propka.lib import loadOptions
from propka.molecular_container import Molecular_container
import propka.lib, propka.molecular_container
def main():
"""
Reads in structure files, calculates pKa values, and prints pKa files
"""
"""Read in structure files, calculates pKa values, and prints pKa files."""
# loading options, flaggs and arguments
options = propka.lib.loadOptions()
options = loadOptions([])
pdbfiles = options.filenames
for pdbfile in pdbfiles:
my_molecule = propka.molecular_container.Molecular_container(pdbfile, options)
my_molecule = Molecular_container(pdbfile, options)
my_molecule.calculate_pka()
my_molecule.write_pka()
if __name__ == '__main__':
#import cProfile
#cProfile.run('main()',sort=1)
main()

View File

@@ -22,7 +22,8 @@ MAX_ERR_DECIMALS = 2
TEST_DIR = Path("tests")
# Location for test PDBs
PDB_DIR = Path("pdb")
# Location for results for comparing output (allow running from tests/ and ../tests/)
# Location for results for comparing output (allow running from tests/ and
# ../tests/)
RESULTS_DIR = Path("tests/results")
if not RESULTS_DIR.is_dir():
_LOGGER.warning("Switching to sub-directory")
@@ -47,7 +48,9 @@ def get_test_dirs():
if test_path.is_dir():
path_dict[key] = test_path
else:
errstr = "Can't find %s test files in %s" % (key, [TEST_DIR / path, path])
errstr = (
"Can't find {0:s} test files in {1:s}".format(
key, [TEST_DIR / path, path]))
raise FileNotFoundError(errstr)
return path_dict
@@ -63,11 +66,13 @@ def run_propka(options, pdb_path, tmp_path):
options += [str(pdb_path)]
args = propka.lib.loadOptions(options)
try:
_LOGGER.warning("Working in tmpdir %s because of PROPKA file output; need to fix this.",
tmp_path)
_LOGGER.warning(
"Working in tmpdir {0:s} because of PROPKA file output; "
"need to fix this.".format(str(tmp_path)))
cwd = Path.cwd()
os.chdir(tmp_path)
molecule = propka.molecular_container.Molecular_container(str(pdb_path), args)
molecule = propka.molecular_container.Molecular_container(
str(pdb_path), args)
molecule.calculate_pka()
molecule.write_pka()
finally:
@@ -90,7 +95,7 @@ def compare_output(pdb, tmp_path, ref_path):
ref_data.append(float(line))
test_data = []
pka_path = Path(tmp_path) / ("%s.pka" % pdb)
pka_path = Path(tmp_path) / ("{0:s}.pka".format(pdb))
with open(pka_path, "rt") as pka_file:
at_pka = False
for line in pka_file:
@@ -100,12 +105,15 @@ def compare_output(pdb, tmp_path, ref_path):
elif line.startswith("---"):
at_pka = False
else:
m = re.search(r'([0-9]+\.[0-9]+)', line)
value = float(m.group(0))
match = re.search(r'([0-9]+\.[0-9]+)', line)
value = float(match.group(0))
test_data.append(value)
errstr = "Error exceeds maximum allowed value (%d decimal places)" % MAX_ERR_DECIMALS
assert_almost_equal(test_data, ref_data, decimal=MAX_ERR_DECIMALS,
err_msg=errstr, verbose=True)
errstr = (
"Error exceeds maximum allowed value ({0:d} decimal places)".format(
MAX_ERR_DECIMALS))
assert_almost_equal(
test_data, ref_data, decimal=MAX_ERR_DECIMALS, err_msg=errstr,
verbose=True)
@pytest.mark.parametrize("pdb, options", [
@@ -113,24 +121,26 @@ def compare_output(pdb, tmp_path, ref_path):
pytest.param('1HPX', [], id="1HPX: no options"),
pytest.param('4DFR', [], id="4DFR: no options"),
pytest.param('3SGB', [], id="3SGB: no options"),
pytest.param('3SGB-subset', ["--titrate_only",
pytest.param('3SGB-subset', [
"--titrate_only",
"E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"],
id="3SGB: --titrate_only"),
id="3SGB: --titrate_only"),
pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet")])
def test_regression(pdb, options, tmp_path):
"""Basic regression test of PROPKA functionality."""
path_dict = get_test_dirs()
ref_path = path_dict["results"] / ("%s.dat" % pdb)
ref_path = path_dict["results"] / ("{0:s}.dat".format(pdb))
if ref_path.is_file():
ref_path = ref_path.resolve()
else:
_LOGGER.warning("Missing results file for comparison: %s", ref_path)
_LOGGER.warning("Missing results file for comparison: {0:s}".format(
str(ref_path)))
ref_path = None
pdb_path = path_dict["pdbs"] / ("%s.pdb" % pdb)
pdb_path = path_dict["pdbs"] / ("{0:s}.pdb".format(pdb))
if pdb_path.is_file():
pdb_path = pdb_path.resolve()
else:
errstr = "Missing PDB file: %s" % pdb_path
errstr = "Missing PDB file: {0:s}".format(pdb_path)
raise FileNotFoundError(errstr)
tmp_path = Path(tmp_path).resolve()

60
tests/test_hybrid36.py Normal file
View File

@@ -0,0 +1,60 @@
"""Test the hybrid36 module."""
import unittest
import propka.hybrid36 as hybrid36
class Hybrid36Test(unittest.TestCase):
"""Test class for hybrid36."""
def test_decode(self):
"""Test decoding functions."""
test_values = {
"99999": 99999,
"A0000": 100000,
"0": 0,
"9": 9,
"A": 10,
" ZZZZY": 43770014,
"ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000
"a0000": 43770016,
"zzzzz": 87440031,
"zzzzy": 87440030,
"99": 99,
"A0": 100,
"ZZ": 1035,
"zz": 1971,
"-99999": -99999,
"-A0000": -100000,
"-0": 0,
"-9": -9,
"-A": -10,
"-ZZZZY": -43770014,
"-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000
"-a0000": -43770016,
"-zzzzz": -87440031,
"-zzzzy": -87440030,
"-99": -99,
"-A0": -100,
"-ZZ": -1035,
"-zz": -1971,
"PROPKA": 954495146,
"A001Z": 100071,
"B0000": 1779616,
}
for key, value in test_values.items():
self.assertEqual(hybrid36.decode(key), value)
def test_errors(self):
"""Test values that should raise errors."""
test_values = [
"99X99",
"X9-99",
"XYZa",
"",
"-",
"!NotOk",
]
for value in test_values:
with self.assertRaises(ValueError) as err:
hybrid36.decode(value)
self.assertTrue(value in str(err.exception))