Merge pull request #40 from Electrostatics/nathan/delint

De-lint PROPKA
This commit is contained in:
Nathan Baker
2020-05-30 08:14:45 -07:00
committed by GitHub
26 changed files with 5238 additions and 4327 deletions

2
.pylintrc Normal file
View File

@@ -0,0 +1,2 @@
[MESSAGES CONTROL]
disable = no-else-return

View File

@@ -5,15 +5,35 @@ import propka.group
from . import hybrid36 from . import hybrid36
# Format strings that get used in multiple places (or are very complex)
PKA_FMT = "{:6.2f}"
INPUT_LINE_FMT = (
"{type:6s}{r.numb:>5d} {atom_label} {r.res_name}{r.chain_id:>2s}"
"{r.res_num:>4d}{r.x:>12.3f}{r.y:>8.3f}{r.z:>8.3f}{group:>6s}{pka:>6s} \n")
PDB_LINE_FMT1 = (
"{type:6s}{r.numb:>5d} {atom_label} {r.res_name}{r.chain_id:>2s}"
"{r.res_num:>4d}{r.x:>12.3f}{r.y:>8.3f}{r.z:>8.3f}{r.occ:>6s}"
"{r.beta:>6s}\n")
MOL2_LINE_FMT = (
"{id:<4d} {atom_label:4s} "
"{r.x:>10.4f} {r.y:>10.4f} {r.z:>10.4f} "
"{r.sybyl_type:>6s} {r.res_num:>6d} {r.res_name:>10s} 0.0000\n")
PDB_LINE_FMT2 = (
"ATOM {numb:>6d} {atom_label} {res_name}{chain_id:>2s}{res_num:>4d}"
"{x:>12.3f}{y:>8.3f}{z:>8.3f}{occ:>6.2f}{beta:>6.2f}\n")
STR_FMT = (
"{r.numb:>5d}-{r.name:>4s} {r.res_num:>5d}-{r.res_name:>3s} "
"({r.chain_id:1s}) [{r.x:>8.3f} {r.y:>8.3f} {r.z:>8.3f}] {r.element:s}")
class Atom(object): class Atom(object):
"""Atom class - contains all atom information found in the PDB file""" """Atom class - contains all atom information found in the PDB file"""
def __init__(self, line=None, verbose=False): def __init__(self, line=None):
"""Initialize Atom object. """Initialize Atom object.
Args: Args:
line: Line from a PDB file to set properties of atom. line: Line from a PDB file to set properties of atom.
verbose: TODO - this does not appear to be used. Can we remove it?
""" """
self.occ = None self.occ = None
self.numb = None self.numb = None
@@ -48,7 +68,8 @@ class Atom(object):
self.num_pi_elec_conj_2_3_bonds = 0 self.num_pi_elec_conj_2_3_bonds = 0
self.groups_extracted = 0 self.groups_extracted = 0
self.set_properties(line) self.set_properties(line)
self.residue_label = "%-3s%4d%2s" % (self.name, self.res_num, self.chain_id) fmt = "{r.name:3s}{r.res_num:>4d}{r.chain_id:>2s}"
self.residue_label = fmt.format(r=self)
# ligand atom types # ligand atom types
self.sybyl_type = '' self.sybyl_type = ''
@@ -82,13 +103,14 @@ class Atom(object):
self.y = float(line[38:46].strip()) self.y = float(line[38:46].strip())
self.z = float(line[46:54].strip()) self.z = float(line[46:54].strip())
self.res_num = int(line[22:26].strip()) self.res_num = int(line[22:26].strip())
self.res_name = "%-3s" % (line[17:20].strip()) self.res_name = "{0:<3s}".format(line[17:20].strip())
self.chain_id = line[21] self.chain_id = line[21]
# Set chain id to "_" if it is just white space. # Set chain id to "_" if it is just white space.
if not self.chain_id.strip(): if not self.chain_id.strip():
self.chain_id = '_' self.chain_id = '_'
self.type = line[:6].strip().lower() self.type = line[:6].strip().lower()
# TODO - define nucleic acid residue names elsewhere
if self.res_name in ['DA ', 'DC ', 'DG ', 'DT ']: if self.res_name in ['DA ', 'DC ', 'DG ', 'DT ']:
self.type = 'hetatm' self.type = 'hetatm'
@@ -101,7 +123,8 @@ class Atom(object):
if len(self.name) == 4: if len(self.name) == 4:
self.element = self.element[0] self.element = self.element[0]
if len(self.element) == 2: if len(self.element) == 2:
self.element = '%1s%1s' % (self.element[0], self.element[1].lower()) self.element = '{0:1s}{1:1s}'.format(
self.element[0], self.element[1].lower())
def set_group_type(self, type_): def set_group_type(self, type_):
"""Set group type of atom. """Set group type of atom.
@@ -130,9 +153,9 @@ class Atom(object):
array of bonded atoms. array of bonded atoms.
""" """
res = [] res = []
for ba in self.bonded_atoms: for bond_atom in self.bonded_atoms:
if ba.element == element: if bond_atom.element == element:
res.append(ba) res.append(bond_atom)
return res return res
def get_bonded_heavy_atoms(self): def get_bonded_heavy_atoms(self):
@@ -156,12 +179,14 @@ class Atom(object):
if ba == other_atom: if ba == other_atom:
return True return True
if max_bonds > cur_bond: if max_bonds > cur_bond:
if ba.is_atom_within_bond_distance(other_atom, max_bonds, cur_bond+1): if ba.is_atom_within_bond_distance(other_atom, max_bonds,
cur_bond+1):
return True return True
return False return False
def set_property(self, numb=None, name=None, res_name=None, chain_id=None, def set_property(self, numb=None, name=None, res_name=None, chain_id=None,
res_num=None, x=None, y=None, z=None, occ=None, beta=None): res_num=None, x=None, y=None, z=None, occ=None,
beta=None):
"""Set properties of the atom object. """Set properties of the atom object.
Args: Args:
@@ -225,7 +250,8 @@ class Atom(object):
"""PDB line for this atom. """PDB line for this atom.
TODO - Could be @property method/attribute TODO - Could be @property method/attribute
TODO - figure out difference between make_pdb_line, make_input_line, and make_pdb_line2 TODO - figure out difference between make_pdb_line, make_input_line,
and make_pdb_line2
Returns: Returns:
String with PDB-format line. String with PDB-format line.
@@ -238,12 +264,11 @@ class Atom(object):
group = 'C-' ## circumventing C-/COO parameter unification group = 'C-' ## circumventing C-/COO parameter unification
if self.group.titratable: if self.group.titratable:
model_pka = '%6.2f'%self.group.model_pka model_pka = PKA_FMT.format(self.group.model_pka)
str_ = "%-6s%5d %s " % (self.type.upper(), self.numb, str_ = INPUT_LINE_FMT.format(
propka.lib.makeTidyAtomLabel(self.name, self.element)) type=self.type.upper(), r=self,
str_ += "%s%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s \n" % (self.res_name, self.chain_id, atom_label=propka.lib.make_tidy_atom_label(self.name, self.element),
self.res_num, self.x, self.y, group=group, pka=model_pka)
self.z, group, model_pka)
return str_ return str_
def make_conect_line(self): def make_conect_line(self):
@@ -252,15 +277,15 @@ class Atom(object):
Returns: Returns:
String with PDB line. String with PDB line.
""" """
res = 'CONECT%5d' % self.numb res = 'CONECT{0:5d}'.format(self.numb)
bonded = [] bonded = []
for atom in self.bonded_atoms: for atom in self.bonded_atoms:
bonded.append(atom.numb) bonded.append(atom.numb)
bonded.sort() bonded.sort()
for b in bonded: for bond in bonded:
res += '%5d'%b res += '{0:5d}'.format(bond)
res += '\n' res += '\n'
return res return res
@@ -290,10 +315,15 @@ class Atom(object):
self.occ = self.occ.replace('LG', 'non_titratable_ligand') self.occ = self.occ.replace('LG', 'non_titratable_ligand')
# try to initialise the group # try to initialise the group
try: try:
# TODO - get rid of this exec() statement for security reasons group_attr = "{0:s}_group".format(self.occ)
exec('self.group = propka.group.%s_group(self)' % self.occ) group_attr = getattr(propka.group, group_attr)
self.group = group_attr(self)
except: except:
raise Exception('%s in input_file is not recognized as a group' % self.occ) # TODO - be more specific with expection handling here
str_ = (
'{0:s} in input_file is not recognized as a group'.format(
self.occ))
raise Exception(str_)
# set the model pKa value # set the model pKa value
if self.beta != '-': if self.beta != '-':
self.group.model_pka = float(self.beta) self.group.model_pka = float(self.beta)
@@ -306,17 +336,15 @@ class Atom(object):
"""Create PDB line. """Create PDB line.
TODO - this could/should be a @property method/attribute TODO - this could/should be a @property method/attribute
TODO - figure out difference between make_pdb_line, make_input_line, and make_pdb_line2 TODO - figure out difference between make_pdb_line, make_input_line,
and make_pdb_line2
Returns: Returns:
String with PDB line. String with PDB line.
""" """
str_ = "%-6s%5d " % (self.type.upper(), self.numb) str_ = PDB_LINE_FMT1.format(
str_ += "%s %s" % (propka.lib.makeTidyAtomLabel(self.name, self.element), type=self.type.upper(), r=self,
self.res_name) atom_label=propka.lib.make_tidy_atom_label(self.name, self.element))
str_ += "%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s\n" % (self.chain_id, self.res_num,
self.x, self.y, self.z,
self.occ, self.beta)
return str_ return str_
def make_mol2_line(self, id_): def make_mol2_line(self, id_):
@@ -329,19 +357,19 @@ class Atom(object):
Returns: Returns:
String with MOL2 line. String with MOL2 line.
""" """
str_ = "%-4d %-4s " % (id_, propka.lib.makeTidyAtomLabel(self.name, str_ = MOL2_LINE_FMT.format(
self.element)) id=id_, r=self,
str_ += "%10.4f %10.4f %10.4f " % (self.x, self.y, self.z) atom_label=propka.lib.make_tidy_atom_label(self.name, self.element))
str_ += "%6s %6d %10s %10.4f\n" % (self.sybyl_type.replace('-', ''),
self.res_num, self.res_name, 0.0)
return str_ return str_
def make_pdb_line2(self, numb=None, name=None, res_name=None, chain_id=None, def make_pdb_line2(self, numb=None, name=None, res_name=None, chain_id=None,
res_num=None, x=None, y=None, z=None, occ=None, beta=None): res_num=None, x=None, y=None, z=None, occ=None,
beta=None):
"""Create a PDB line. """Create a PDB line.
TODO - this could/should be a @property method/attribute TODO - this could/should be a @property method/attribute
TODO - figure out difference between make_pdb_line, make_input_line, and make_pdb_line2 TODO - figure out difference between make_pdb_line, make_input_line,
and make_pdb_line2
Returns: Returns:
String with PDB line. String with PDB line.
@@ -366,18 +394,11 @@ class Atom(object):
occ = self.occ occ = self.occ
if beta is None: if beta is None:
beta = self.beta beta = self.beta
str_ = "ATOM " str_ = PDB_LINE_FMT2.format(
str_ += "%6d" % (numb) numb=numb, res_name=res_name, chain_id=chain_id, res_num=res_num,
str_ += " %s" % (propka.lib.makeTidyAtomLabel(name, self.element)) x=x, y=y, z=z, occ=occ, beta=beta,
str_ += " %s" % (res_name) atom_label=propka.lib.make_tidy_atom_label(name, self.element)
str_ += "%2s" % (chain_id) )
str_ += "%4d" % (res_num)
str_ += "%12.3lf" % (x)
str_ += "%8.3lf" % (y)
str_ += "%8.3lf" % (z)
str_ += "%6.2lf" % (occ)
str_ += "%6.2lf" % (beta)
str_ += '\n'
return str_ return str_
def get_tidy_label(self): def get_tidy_label(self):
@@ -387,14 +408,11 @@ class Atom(object):
Returns: Returns:
String with label""" String with label"""
return propka.lib.makeTidyAtomLabel(self.name, self.element) return propka.lib.make_tidy_atom_label(self.name, self.element)
def __str__(self): def __str__(self):
"""Return an undefined-format string version of this atom.""" """Return an undefined-format string version of this atom."""
return '%5d-%4s %5d-%3s (%1s) [%8.3f %8.3f %8.3f] %s' % (self.numb, self.name, return STR_FMT.format(r=self)
self.res_num, self.res_name,
self.chain_id, self.x, self.y,
self.z, self.element)
def set_residue(self, residue): def set_residue(self, residue):
""" Makes a reference to the parent residue """ Makes a reference to the parent residue

View File

@@ -1,16 +1,10 @@
"""PROPKA representation of bonds.""" """PROPKA representation of bonds."""
# TODO - is pickle still used?
import pickle
# TODO - eliminate use of sys
import sys
# TODO - eliminate use of os
import os
import math import math
import json import json
import pkg_resources import pkg_resources
import propka.calculations import propka.calculations
# TODO - replace the info/warning imports with logging functionality # TODO - replace the info/warning imports with logging functionality
from propka.lib import info, warning from propka.lib import info
# TODO - should these constants be defined higher up in the module? # TODO - should these constants be defined higher up in the module?
@@ -30,18 +24,23 @@ class BondMaker:
""" """
def __init__(self): def __init__(self):
# predefined bonding distances # predefined bonding distances
self.distances = {'S-S' : DISULFIDE_DISTANCE, 'F-F' : FLUORIDE_DISTANCE} self.distances = {'S-S' : DISULFIDE_DISTANCE,
'F-F' : FLUORIDE_DISTANCE}
self.distances_squared = {} self.distances_squared = {}
for k in self.distances.keys(): for key in self.distances:
self.distances_squared[k] = self.distances[k] * self.distances[k] self.distances_squared[key] = (
self.H_dist = HYDROGEN_DISTANCE self.distances[key] * self.distances[key])
h_dist = HYDROGEN_DISTANCE
self.default_dist = DEFAULT_DISTANCE self.default_dist = DEFAULT_DISTANCE
self.H_dist_squared = self.H_dist * self.H_dist self.h_dist_squared = h_dist * h_dist
self.default_dist_squared = self.default_dist * self.default_dist self.default_dist_squared = self.default_dist * self.default_dist
distances = list(self.distances_squared.values()) + [self.default_dist_squared] distances = (
list(self.distances_squared.values())
+ [self.default_dist_squared])
self.max_sq_distance = max(distances) self.max_sq_distance = max(distances)
# protein bonding data # protein bonding data
self.data_file_name = pkg_resources.resource_filename(__name__, 'protein_bonds.json') self.data_file_name = (
pkg_resources.resource_filename(__name__, 'protein_bonds.json'))
with open(self.data_file_name, 'rt') as json_file: with open(self.data_file_name, 'rt') as json_file:
self.protein_bonds = json.load(json_file) self.protein_bonds = json.load(json_file)
self.intra_residue_backbone_bonds = {'N': ['CA'], 'CA': ['N', 'C'], self.intra_residue_backbone_bonds = {'N': ['CA'], 'CA': ['N', 'C'],
@@ -74,9 +73,13 @@ class BondMaker:
self.num_pi_elec_conj_bonds_ligands = {'N.am': 1, 'N.pl3': 1} self.num_pi_elec_conj_bonds_ligands = {'N.am': 1, 'N.pl3': 1}
self.backbone_atoms = list(self.intra_residue_backbone_bonds.keys()) self.backbone_atoms = list(self.intra_residue_backbone_bonds.keys())
self.terminal_oxygen_names = ['OXT', 'O\'\''] self.terminal_oxygen_names = ['OXT', 'O\'\'']
self.boxes = {}
self.num_box_x = None
self.num_box_y = None
self.num_box_z = None
def find_bonds_for_protein(self, protein): def find_bonds_for_protein(self, protein):
"""Finds bonds proteins based on the way atoms normally bond in proteins. """Bonds proteins based on the way atoms normally bond.
Args: Args:
protein: the protein to search for bonds protein: the protein to search for bonds
@@ -92,9 +95,12 @@ class BondMaker:
last_residues = [] last_residues = []
for chain in protein.chains: for chain in protein.chains:
for i in range(1, len(chain.residues)): for i in range(1, len(chain.residues)):
if chain.residues[i-1].res_name.replace(' ', '') not in ['N+', 'C-']: if (chain.residues[i-1].res_name.replace(' ', '')
if chain.residues[i].res_name.replace(' ', '') not in ['N+', 'C-']: not in ['N+', 'C-']):
self.connect_backbone(chain.residues[i-1], chain.residues[i]) if (chain.residues[i].res_name.replace(' ', '')
not in ['N+', 'C-']):
self.connect_backbone(chain.residues[i-1],
chain.residues[i])
last_residues.append(chain.residues[i]) last_residues.append(chain.residues[i])
info('++++ terminal oxygen ++++') info('++++ terminal oxygen ++++')
# terminal OXT # terminal OXT
@@ -121,7 +127,8 @@ class BondMaker:
if atom1.name == 'SG': if atom1.name == 'SG':
for atom2 in cys2.atoms: for atom2 in cys2.atoms:
if atom2.name == 'SG': if atom2.name == 'SG':
dist = propka.calculations.squared_distance(atom1, atom2) dist = propka.calculations.squared_distance(atom1,
atom2)
# TODO - is SS_dist_squared an attribute of this class? # TODO - is SS_dist_squared an attribute of this class?
if dist < self.SS_dist_squared: if dist < self.SS_dist_squared:
self.make_bond(atom1, atom2) self.make_bond(atom1, atom2)
@@ -138,7 +145,6 @@ class BondMaker:
if atom2.name == 'C': if atom2.name == 'C':
self.make_bond(atom1, atom2) self.make_bond(atom1, atom2)
# TODO - stopped here.
def connect_backbone(self, residue1, residue2): def connect_backbone(self, residue1, residue2):
"""Sets up bonds in the backbone """Sets up bonds in the backbone
@@ -152,8 +158,8 @@ class BondMaker:
if atom1.name == 'C': if atom1.name == 'C':
for atom2 in residue2.atoms: for atom2 in residue2.atoms:
if atom2.name == 'N': if atom2.name == 'N':
if propka.calculations.squared_distance(atom1, atom2) \ if (propka.calculations.squared_distance(atom1, atom2)
< self.default_dist_squared: < self.default_dist_squared):
self.make_bond(atom1, atom2) self.make_bond(atom1, atom2)
def find_bonds_for_residue_backbone(self, residue): def find_bonds_for_residue_backbone(self, residue):
@@ -164,17 +170,18 @@ class BondMaker:
""" """
for atom1 in residue.atoms: for atom1 in residue.atoms:
if atom1.name in list(self.num_pi_elec_bonds_backbone.keys()): if atom1.name in list(self.num_pi_elec_bonds_backbone.keys()):
atom1.num_pi_elec_2_3_bonds \ atom1.num_pi_elec_2_3_bonds = (
= self.num_pi_elec_bonds_backbone[atom1.name] self.num_pi_elec_bonds_backbone[atom1.name])
if atom1.name in \ if atom1.name in (
list(self.num_pi_elec_conj_bonds_backbone.keys()) \ list(self.num_pi_elec_conj_bonds_backbone.keys())
and len(atom1.bonded_atoms) > 1: # last part to avoid including N-term and len(atom1.bonded_atoms) > 1): # avoid N-term
atom1.num_pi_elec_conj_2_3_bonds \ atom1.num_pi_elec_conj_2_3_bonds = (
= self.num_pi_elec_conj_bonds_backbone[atom1.name] self.num_pi_elec_conj_bonds_backbone[atom1.name])
if atom1.name in self.backbone_atoms: if atom1.name in self.backbone_atoms:
for atom2 in residue.atoms: for atom2 in residue.atoms:
if atom2.name in self.intra_residue_backbone_bonds[atom1.name]: if atom2.name in (
self.intra_residue_backbone_bonds[atom1.name]):
self.make_bond(atom1, atom2) self.make_bond(atom1, atom2)
def find_bonds_for_side_chain(self, atoms): def find_bonds_for_side_chain(self, atoms):
@@ -184,18 +191,19 @@ class BondMaker:
atoms: list of atoms to check for bonds atoms: list of atoms to check for bonds
""" """
for atom1 in atoms: for atom1 in atoms:
key = '%s-%s' % (atom1.res_name, atom1.name) key = '{0:s}-{1:s}'.format(atom1.res_name, atom1.name)
if key in list(self.num_pi_elec_bonds_sidechains.keys()): if key in list(self.num_pi_elec_bonds_sidechains.keys()):
atom1.num_pi_elec_2_3_bonds \ atom1.num_pi_elec_2_3_bonds = (
= self.num_pi_elec_bonds_sidechains[key] self.num_pi_elec_bonds_sidechains[key])
if key in list(self.num_pi_elec_conj_bonds_sidechains.keys()): if key in list(self.num_pi_elec_conj_bonds_sidechains.keys()):
atom1.num_pi_elec_conj_2_3_bonds \ atom1.num_pi_elec_conj_2_3_bonds = (
= self.num_pi_elec_conj_bonds_sidechains[key] self.num_pi_elec_conj_bonds_sidechains[key])
if not atom1.name in self.backbone_atoms: if not atom1.name in self.backbone_atoms:
if not atom1.name in self.terminal_oxygen_names: if not atom1.name in self.terminal_oxygen_names:
for atom2 in atoms: for atom2 in atoms:
if atom2.name in self.protein_bonds[atom1.res_name][atom1.name]: if atom2.name in (
self
.protein_bonds[atom1.res_name][atom1.name]):
self.make_bond(atom1, atom2) self.make_bond(atom1, atom2)
def find_bonds_for_ligand(self, ligand): def find_bonds_for_ligand(self, ligand):
@@ -219,25 +227,30 @@ class BondMaker:
# for ligands # for ligands
if atom.type == 'hetatm': if atom.type == 'hetatm':
if atom.sybyl_type in self.num_pi_elec_bonds_ligands.keys(): if atom.sybyl_type in self.num_pi_elec_bonds_ligands.keys():
atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_ligands[atom.sybyl_type] atom.num_pi_elec_2_3_bonds = (
if atom.sybyl_type in self.num_pi_elec_conj_bonds_ligands.keys(): self.num_pi_elec_bonds_ligands[atom.sybyl_type])
atom.num_pi_elec_conj_2_3_bonds \ if atom.sybyl_type in (
= self.num_pi_elec_conj_bonds_ligands[atom.sybyl_type] self.num_pi_elec_conj_bonds_ligands.keys()):
atom.num_pi_elec_conj_2_3_bonds = (
self.num_pi_elec_conj_bonds_ligands[atom.sybyl_type])
# for protein # for protein
if atom.type == 'atom': if atom.type == 'atom':
key = '%s-%s' % (atom.res_name, atom.name) key = '{0:s}-{1:s}'.format(atom.res_name, atom.name)
if key in list(self.num_pi_elec_bonds_sidechains.keys()): if key in list(self.num_pi_elec_bonds_sidechains.keys()):
atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_sidechains[key] atom.num_pi_elec_2_3_bonds = (
self.num_pi_elec_bonds_sidechains[key])
if key in list(self.num_pi_elec_conj_bonds_sidechains.keys()): if key in list(self.num_pi_elec_conj_bonds_sidechains.keys()):
atom.num_pi_elec_conj_2_3_bonds = self.num_pi_elec_conj_bonds_sidechains[key] atom.num_pi_elec_conj_2_3_bonds = (
self.num_pi_elec_conj_bonds_sidechains[key])
if atom.name in list(self.num_pi_elec_bonds_backbone.keys()): if atom.name in list(self.num_pi_elec_bonds_backbone.keys()):
atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_backbone[atom.name] atom.num_pi_elec_2_3_bonds = (
if atom.name in list(self.num_pi_elec_conj_bonds_backbone.keys()) \ self.num_pi_elec_bonds_backbone[atom.name])
and len(atom.bonded_atoms) > 1: if atom.name in list(
self.num_pi_elec_conj_bonds_backbone.keys()) and (
len(atom.bonded_atoms) > 1):
# last part to avoid including N-term # last part to avoid including N-term
atom.num_pi_elec_conj_2_3_bonds \ atom.num_pi_elec_conj_2_3_bonds = (
= self.num_pi_elec_conj_bonds_backbone[atom.name] self.num_pi_elec_conj_bonds_backbone[atom.name])
def find_bonds_for_protein_by_distance(self, molecule): def find_bonds_for_protein_by_distance(self, molecule):
"""Finds bonds for all atoms in the molecule. """Finds bonds for all atoms in the molecule.
@@ -287,9 +300,9 @@ class BondMaker:
sq_dist = propka.calculations.squared_distance(atom1, atom2) sq_dist = propka.calculations.squared_distance(atom1, atom2)
if sq_dist > self.max_sq_distance: if sq_dist > self.max_sq_distance:
return False return False
key = '%s-%s' % (atom1.element, atom2.element) key = '{0:s}-{1:s}'.format(atom1.element, atom2.element)
h_count = key.count('H') h_count = key.count('H')
if sq_dist < self.H_dist_squared and h_count == 1: if sq_dist < self.h_dist_squared and h_count == 1:
return True return True
if sq_dist < self.default_dist_squared and h_count == 0: if sq_dist < self.default_dist_squared and h_count == 0:
return True return True
@@ -305,7 +318,8 @@ class BondMaker:
molecules: list of molecules for finding bonds. molecules: list of molecules for finding bonds.
""" """
for name in molecules.conformation_names: for name in molecules.conformation_names:
self.find_bonds_for_atoms_using_boxes(molecules.conformations[name].atoms) self.find_bonds_for_atoms_using_boxes(
molecules.conformations[name].atoms)
def add_pi_electron_information(self, molecules): def add_pi_electron_information(self, molecules):
"""Add pi electron information to a molecule. """Add pi electron information to a molecule.
@@ -314,7 +328,8 @@ class BondMaker:
molecules: list of molecules for adding pi electron information. molecules: list of molecules for adding pi electron information.
""" """
for name in molecules.conformation_names: for name in molecules.conformation_names:
self.add_pi_electron_table_info(molecules.conformations[name].atoms) self.add_pi_electron_table_info(
molecules.conformations[name].atoms)
def find_bonds_for_atoms_using_boxes(self, atoms): def find_bonds_for_atoms_using_boxes(self, atoms):
"""Finds all bonds for a list of atoms. """Finds all bonds for a list of atoms.
@@ -370,16 +385,17 @@ class BondMaker:
z: box z-coordinates z: box z-coordinates
atom: the atom to place in a box atom: the atom to place in a box
""" """
for bx in [x, x+1]: for box_x in [x, x+1]:
for by in [y, y+1]: for box_y in [y, y+1]:
for bz in [z, z+1]: for box_z in [z, z+1]:
key = (bx, by, bz) key = (box_x, box_y, box_z)
try: try:
self.boxes[key].append(atom) self.boxes[key].append(atom)
except KeyError: except KeyError:
pass pass
def has_bond(self, atom1, atom2): @staticmethod
def has_bond(atom1, atom2):
"""Look for bond between two atoms. """Look for bond between two atoms.
Args: Args:
@@ -392,7 +408,8 @@ class BondMaker:
return True return True
return False return False
def make_bond(self, atom1, atom2): @staticmethod
def make_bond(atom1, atom2):
"""Makes a bond between atom1 and atom2 """Makes a bond between atom1 and atom2
Args: Args:
@@ -418,10 +435,12 @@ class BondMaker:
name_i = atom.name name_i = atom.name
resi_j = bonded_atom.res_name resi_j = bonded_atom.res_name
name_j = bonded_atom.name name_j = bonded_atom.name
if not name_i in self.backbone_atoms or\ if not name_i in (
not name_j in self.backbone_atoms: self.backbone_atoms
if not name_i in self.terminal_oxygen_names and\ or not name_j in self.backbone_atoms):
not name_j in self.terminal_oxygen_names: if not name_i in (
self.terminal_oxygen_names
and not name_j in self.terminal_oxygen_names):
if not resi_i in list(self.protein_bonds.keys()): if not resi_i in list(self.protein_bonds.keys()):
self.protein_bonds[resi_i] = {} self.protein_bonds[resi_i] = {}
if not name_i in self.protein_bonds[resi_i]: if not name_i in self.protein_bonds[resi_i]:

File diff suppressed because it is too large Load Diff

View File

@@ -1,451 +1,610 @@
# """Container for molecular conformations"""
# Container for molecular conformations import functools
# import propka.ligand
from propka.output import make_interaction_map
from propka.determinant import Determinant
from propka.coupled_groups import NCCG
from propka.determinants import set_backbone_determinants, set_ion_determinants
from propka.determinants import set_determinants
from propka.group import Group, is_group
from propka.lib import info
from __future__ import division
from __future__ import print_function
import propka.group, propka.determinants, propka.determinant, propka.ligand, propka.output, propka.coupled_groups, functools # A large number that gets multipled with the integer obtained from applying
from propka.lib import info, warning # ord() to the atom chain ID. Used in calculating atom keys for sorting.
UNICODE_MULTIPLIER = 1e7
# A number that gets mutiplied with an atom's residue number. Used in
# calculating keys for atom sorting.
RESIDUE_MULTIPLIER = 1000
class ConformationContainer:
"""Container for molecular conformations"""
class Conformation_container:
def __init__(self, name='', parameters=None, molecular_container=None): def __init__(self, name='', parameters=None, molecular_container=None):
"""Initialize conformation container.
Args:
name: name for conformation
parameters: parmameters for conformation
molecular_container: container for molecule
"""
self.molecular_container = molecular_container self.molecular_container = molecular_container
self.name=name self.name = name
self.parameters=parameters self.parameters = parameters
self.atoms = [] self.atoms = []
self.groups = [] self.groups = []
self.chains = [] self.chains = []
self.current_iter_item = 0 self.current_iter_item = 0
# TODO - what is marvin_pkas_calculated?
self.marvin_pkas_calculated = False self.marvin_pkas_calculated = False
self.non_covalently_coupled_groups = False self.non_covalently_coupled_groups = False
return
#
# Group related methods
#
def extract_groups(self): def extract_groups(self):
""" Generates at list of molecular groups needed for calculating pKa values """ """Generate molecular groups needed for calculating pKa values."""
for atom in self.get_non_hydrogen_atoms(): for atom in self.get_non_hydrogen_atoms():
# has this atom been checked for groups? # has this atom been checked for groups?
if atom.groups_extracted == 0: if atom.groups_extracted == 0:
group = propka.group.is_group(self.parameters, atom) group = is_group(self.parameters, atom)
else: else:
group = atom.group group = atom.group
# if the atom has been checked in a another conformation, check if it has a # if the atom has been checked in a another conformation, check
# group that should be used in this conformation as well # if it has a group that should be used in this conformation as well
if group: if group:
self.setup_and_add_group(group) self.setup_and_add_group(group)
return
def additional_setup_when_reading_input_file(self): def additional_setup_when_reading_input_file(self):
"""Generate interaction map and charge centers."""
# if a group is coupled and we are reading a .propka_input file, # if a group is coupled and we are reading a .propka_input file, then
# some more configuration might be needed # some more configuration might be needed
map_ = make_interaction_map(
# print coupling map 'Covalent coupling map for {0:s}'.format(str(self)),
map = propka.output.make_interaction_map('Covalent coupling map for %s'%self, self.get_covalently_coupled_groups(),
self.get_covalently_coupled_groups(), lambda g1, g2: g1 in g2.covalently_coupled_groups)
lambda g1,g2: g1 in g2.covalently_coupled_groups) info(map_)
info(map)
# check if we should set a common charge centre as well # check if we should set a common charge centre as well
if self.parameters.common_charge_centre: if self.parameters.common_charge_centre:
self.set_common_charge_centres() self.set_common_charge_centres()
return
def set_common_charge_centres(self): def set_common_charge_centres(self):
for system in self.get_coupled_systems(self.get_covalently_coupled_groups(), propka.group.Group.get_covalently_coupled_groups): """Assign charge centers to groups."""
for system in self.get_coupled_systems(
self.get_covalently_coupled_groups(),
Group.get_covalently_coupled_groups):
# make a list of the charge centre coordinates # make a list of the charge centre coordinates
all_coordinates = list(map(lambda g: [g.x, g.y, g.z], system)) all_coordinates = list(map(lambda g: [g.x, g.y, g.z], system))
# find the common charge center # find the common charge center
ccc = functools.reduce(lambda g1,g2: [g1[0]+g2[0], g1[1]+g2[1], g1[2]+g2[2]], all_coordinates) ccc = functools.reduce(
lambda g1, g2: [g1[0]+g2[0], g1[1]+g2[1], g1[2]+g2[2]],
all_coordinates)
ccc = list(map(lambda c: c/len(system), ccc)) ccc = list(map(lambda c: c/len(system), ccc))
# set the ccc for all coupled groups in this system # set the ccc for all coupled groups in this system
for g in system: for group in system:
[g.x, g.y, g.z] = ccc [group.x, group.y, group.z] = ccc
g.common_charge_centre = True group.common_charge_centre = True
return
def find_covalently_coupled_groups(self): def find_covalently_coupled_groups(self):
""" Finds covalently coupled groups and sets common charge centres if needed """ """Find covalently coupled groups and set common charge centres."""
for group in self.get_titratable_groups(): for group in self.get_titratable_groups():
# Find covalently bonded groups # Find covalently bonded groups
bonded_groups = self.find_bonded_titratable_groups(group.atom, 1, group.atom) bonded_groups = self.find_bonded_titratable_groups(
group.atom, 1, group.atom)
# couple groups # coupled groups
for cg in bonded_groups: for bond_group in bonded_groups:
if cg in group.covalently_coupled_groups: if bond_group in group.covalently_coupled_groups:
continue continue
if cg.atom.sybyl_type == group.atom.sybyl_type: if bond_group.atom.sybyl_type == group.atom.sybyl_type:
group.couple_covalently(cg) group.couple_covalently(bond_group)
# check if we should set a common charge centre as well # check if we should set a common charge centre as well
if self.parameters.common_charge_centre: if self.parameters.common_charge_centre:
self.set_common_charge_centres() self.set_common_charge_centres()
# print coupling map # print coupling map
map = propka.output.make_interaction_map('Covalent coupling map for %s'%self, map_ = make_interaction_map(
#self.get_titratable_groups(), 'Covalent coupling map for {0:s}'.format(str(self)),
self.get_covalently_coupled_groups(), self.get_covalently_coupled_groups(),
lambda g1,g2: g1 in g2.covalently_coupled_groups) lambda g1, g2: g1 in g2.covalently_coupled_groups)
info(map) info(map_)
return
def find_non_covalently_coupled_groups(self, verbose=False): def find_non_covalently_coupled_groups(self, verbose=False):
"""Find non-covalently coupled groups and set common charge centres.
Args:
verbose: verbose output
"""
# check if non-covalent coupling has already been set up in an input file # check if non-covalent coupling has already been set up in an input file
if len(list(filter(lambda g: len(g.non_covalently_coupled_groups)>0, self.get_titratable_groups())))>0: if len(list(filter(lambda g: len(g.non_covalently_coupled_groups) > 0,
self.get_titratable_groups()))) > 0:
self.non_covalently_coupled_groups = True self.non_covalently_coupled_groups = True
NCCG.identify_non_covalently_coupled_groups(self, verbose=verbose)
propka.coupled_groups.nccg.identify_non_covalently_coupled_groups(self,verbose=verbose)
# re-do the check # re-do the check
if len(list(filter(lambda g: len(g.non_covalently_coupled_groups)>0, self.get_titratable_groups())))>0: if len(list(filter(lambda g: len(g.non_covalently_coupled_groups) > 0,
self.get_titratable_groups()))) > 0:
self.non_covalently_coupled_groups = True self.non_covalently_coupled_groups = True
return def find_bonded_titratable_groups(self, atom, num_bonds, original_atom):
"""Find bonded titrable groups.
Args:
def find_bonded_titratable_groups(self, atom, no_bonds, original_atom): atom: atom to check for bonds
num_bonds: number of bonds for coupling
original_atom: another atom to check for bonds
Returns:
a set of bonded atom groups
"""
res = set() res = set()
for ba in atom.bonded_atoms: for bond_atom in atom.bonded_atoms:
# skip the original atom # skip the original atom
if ba == original_atom: if bond_atom == original_atom:
continue continue
# check if this atom has a titratable group # check if this atom has a titratable group
if ba.group and ba.group.titratable and no_bonds <= self.parameters.coupling_max_number_of_bonds: if (bond_atom.group and bond_atom.group.titratable
res.add(ba.group) and num_bonds
<= self.parameters.coupling_max_number_of_bonds):
res.add(bond_atom.group)
# check for titratable groups bonded to this atom # check for titratable groups bonded to this atom
if no_bonds < self.parameters.coupling_max_number_of_bonds: if num_bonds < self.parameters.coupling_max_number_of_bonds:
res |= self.find_bonded_titratable_groups(ba,no_bonds+1, original_atom) res |= self.find_bonded_titratable_groups(
bond_atom, num_bonds+1, original_atom)
return res return res
def setup_and_add_group(self, group): def setup_and_add_group(self, group):
""" Checks if we want to include this group in the calculations """ """Check if we want to include this group in the calculations.
Args:
group: group to check
"""
# Is it recognized as a group at all? # Is it recognized as a group at all?
if not group: if not group:
return return
# Other checks (include ligands, which chains etc.) # Other checks (include ligands, which chains etc.)
# if all ok, accept the group # if all ok, accept the group
self.init_group(group) self.init_group(group)
self.groups.append(group) self.groups.append(group)
def init_group(self, group): def init_group(self, group):
""" """Initialize the given Group object.
Initialize the given Group object.
Args:
group: group object to initialize
""" """
# set up the group # set up the group
group.parameters=self.parameters group.parameters = self.parameters
group.setup() group.setup()
# If --titrate_only option is set, make non-specified residues un-titratable: # If --titrate_only option is set, make non-specified residues
# un-titratable:
titrate_only = self.molecular_container.options.titrate_only titrate_only = self.molecular_container.options.titrate_only
if titrate_only is not None: if titrate_only is not None:
at = group.atom atom = group.atom
if not (at.chain_id, at.res_num, at.icode) in titrate_only: if not (atom.chain_id, atom.res_num, atom.icode) in titrate_only:
group.titratable = False group.titratable = False
if group.residue_type == 'CYS': if group.residue_type == 'CYS':
group.exclude_cys_from_results = True group.exclude_cys_from_results = True
#
# pka calculation methods
#
def calculate_pka(self, version, options): def calculate_pka(self, version, options):
"""Calculate pKas for conformation container.
Args:
version: version object
options: option object
"""
info('\nCalculating pKas for', self) info('\nCalculating pKas for', self)
# calculate desolvation # calculate desolvation
for group in self.get_titratable_groups()+self.get_ions(): for group in self.get_titratable_groups() + self.get_ions():
version.calculate_desolvation(group) version.calculate_desolvation(group)
# calculate backbone interactions # calculate backbone interactions
propka.determinants.setBackBoneDeterminants(self.get_titratable_groups(), self.get_backbone_groups(), version) set_backbone_determinants(
self.get_titratable_groups(), self.get_backbone_groups(), version)
# setting ion determinants # setting ion determinants
propka.determinants.setIonDeterminants(self, version) set_ion_determinants(self, version)
# calculating the back-bone reorganization/desolvation term # calculating the back-bone reorganization/desolvation term
version.calculateBackBoneReorganization(self) version.calculate_backbone_reorganization(self)
# setting remaining non-iterative and iterative side-chain & Coulomb
# setting remaining non-iterative and iterative side-chain & Coulomb interaction determinants # interaction determinants
propka.determinants.setDeterminants(self.get_sidechain_groups(), version=version, options=options) set_determinants(
self.get_sidechain_groups(), version=version, options=options)
# calculating the total pKa values # calculating the total pKa values
for group in self.groups: group.calculate_total_pka() for group in self.groups:
group.calculate_total_pka()
# take coupling effects into account # take coupling effects into account
penalised_labels = self.coupling_effects() penalised_labels = self.coupling_effects()
if (self.parameters.remove_penalised_group
if self.parameters.remove_penalised_group and len(penalised_labels)>0: and len(penalised_labels) > 0):
info('Removing penalised groups!!!') info('Removing penalised groups!!!')
for group in self.get_titratable_groups():
for g in self.get_titratable_groups(): group.remove_determinants(penalised_labels)
g.remove_determinants(penalised_labels)
# re-calculating the total pKa values # re-calculating the total pKa values
for group in self.groups: group.calculate_total_pka() for group in self.groups:
group.calculate_total_pka()
return
def coupling_effects(self): def coupling_effects(self):
# """Penalize groups based on coupling effects.
# Bases: The group with the highest pKa (the most stable one in the
# charged form) will be the first one to adopt a proton as pH Bases: The group with the highest pKa (the most stable one in the
# is lowered and this group is allowed to titrate. charged form) will be the first one to adopt a proton as pH is lowered
# The remaining groups are penalised and this group is allowed to titrate. The remaining groups are
# penalised.
# Acids: The group with the highest pKa (the least stable one in the
# charged form) will be the last group to loose the proton as Acids: The group with the highest pKa (the least stable one in the
# pH is raised and will be penalised. charged form) will be the last group to loose the proton as pH is
# The remaining groups are allowed to titrate. raised and will be penalised. The remaining groups are allowed to
# titrate.
"""
penalised_labels = [] penalised_labels = []
for all_groups in self.get_coupled_systems(
for all_groups in self.get_coupled_systems(self.get_covalently_coupled_groups(), self.get_covalently_coupled_groups(),
propka.group.Group.get_covalently_coupled_groups): Group.get_covalently_coupled_groups):
# check if we should share determinants # check if we should share determinants
if self.parameters.shared_determinants: if self.parameters.shared_determinants:
self.share_determinants(all_groups) self.share_determinants(all_groups)
# find the group that has the highest pKa value # find the group that has the highest pKa value
first_group = max(all_groups, key=lambda g:g.pka_value) first_group = max(all_groups, key=lambda g: g.pka_value)
# In case of acids # In case of acids
if first_group.charge < 0: if first_group.charge < 0:
first_group.coupled_titrating_group = min(all_groups, key=lambda g:g.pka_value) first_group.coupled_titrating_group = min(
penalised_labels.append(first_group.label) # group with the highest pKa is penalised all_groups, key=lambda g: g.pka_value)
# group with the highest pKa is penalised
penalised_labels.append(first_group.label)
# In case of bases # In case of bases
else: else:
for a in all_groups: for group in all_groups:
if a == first_group: if group == first_group:
continue # group with the highest pKa is allowed to titrate... # group with the highest pKa is allowed to titrate...
a.coupled_titrating_group = first_group continue
penalised_labels.append(a.label) #... and the rest is penalised group.coupled_titrating_group = first_group
#... and the rest are penalised
penalised_labels.append(group.label)
return penalised_labels return penalised_labels
@staticmethod
def share_determinants(groups):
"""Share sidechain, backbone, and Coloumb determinants between groups.
def share_determinants(self, groups): Args:
groups: groups to share between
"""
# make a list of the determinants to share # make a list of the determinants to share
types = ['sidechain','backbone','coulomb'] types = ['sidechain', 'backbone', 'coulomb']
for type in types: for type_ in types:
# find maximum value for each determinant # find maximum value for each determinant
max_dets = {} max_dets = {}
for g in groups: for group in groups:
for d in g.determinants[type]: for det in group.determinants[type_]:
# update max dets # update max dets
if d.group not in max_dets.keys(): if det.group not in max_dets.keys():
max_dets[d.group] = d.value max_dets[det.group] = det.value
else: else:
max_dets[d.group] = max(d.value, max_dets[d.group], key= lambda v: abs(v)) max_dets[det.group] = max(det.value,
max_dets[det.group],
key=lambda v: abs(v))
# overwrite/add maximum value for each determinant # overwrite/add maximum value for each determinant
for det_group in max_dets.keys(): for det_group in max_dets:
new_determinant = propka.determinant.Determinant(det_group, max_dets[det_group]) new_determinant = Determinant(det_group, max_dets[det_group])
for g in groups: for group in groups:
g.set_determinant(new_determinant,type) group.set_determinant(new_determinant, type_)
return
def get_coupled_systems(self, groups, get_coupled_groups): def get_coupled_systems(self, groups, get_coupled_groups):
""" This generator will yield one covalently coupled system at the time """ """A generator that yields covalently coupled systems.
Args:
groups: groups for generating coupled systems
get_coupled_groups: TODO - I don't know what this is
Yields:
covalently coupled systems
"""
groups = set(groups) groups = set(groups)
while len(groups)>0: while len(groups) > 0:
# extract a system of coupled groups ... # extract a system of coupled groups ...
system = set() system = set()
self.get_a_coupled_system_of_groups(groups.pop(), system, get_coupled_groups) self.get_a_coupled_system_of_groups(
groups.pop(), system, get_coupled_groups)
# ... and remove them from the list # ... and remove them from the list
groups -= system groups -= system
yield system yield system
return def get_a_coupled_system_of_groups(self, new_group, coupled_groups,
get_coupled_groups):
"""Set up coupled systems of groups.
Args:
def get_a_coupled_system_of_groups(self, new_group, coupled_groups, get_coupled_groups): new_group: added to coupled_groups
coupled_groups: existing coupled groups
get_coupled_groups: TODO - I don't know what this
"""
coupled_groups.add(new_group) coupled_groups.add(new_group)
[self.get_a_coupled_system_of_groups(c, coupled_groups, get_coupled_groups) for c in get_coupled_groups(new_group) if c not in coupled_groups] for coupled_group in get_coupled_groups(new_group):
return if coupled_group not in coupled_groups:
self.get_a_coupled_system_of_groups(coupled_group,
coupled_groups,
get_coupled_groups)
def calculate_folding_energy(self, ph=None, reference=None):
"""Calculate folding energy over all groups in conformation container.
# Args:
# Energy/summary-related methods ph: pH for calculation
# reference: reference state
def calculate_folding_energy(self, pH=None, reference=None): Returns:
folding energy
TODO - need units
"""
ddg = 0.0 ddg = 0.0
for group in self.groups: for group in self.groups:
#info('Folding energy for %s at pH %f: %f'%(group,pH,group.calculate_folding_energy(self.parameters, pH=pH, reference=reference))) ddg += group.calculate_folding_energy(self.parameters, ph=ph,
ddg += group.calculate_folding_energy(self.parameters, pH=pH, reference=reference) reference=reference)
return ddg return ddg
def calculate_charge(self, parmaeters, pH=None): def calculate_charge(self, parameters, ph=None):
"""Calculate charge for folded and unfolded states.
Args:
parameters: parameters for calculation
ph: pH for calculation
Returns:
1. charge for unfolded state
2. charge for folded state
"""
unfolded = folded = 0.0 unfolded = folded = 0.0
for group in self.get_titratable_groups(): for group in self.get_titratable_groups():
unfolded += group.calculate_charge(parmaeters, pH=pH, state='unfolded') unfolded += group.calculate_charge(parameters, ph=ph,
folded += group.calculate_charge(parmaeters, pH=pH, state='folded') state='unfolded')
folded += group.calculate_charge(parameters, ph=ph,
return unfolded,folded state='folded')
return unfolded, folded
#
# conformation/bookkeeping/atom methods
#
def get_backbone_groups(self): def get_backbone_groups(self):
""" returns all backbone groups needed for the pKa calculations """ """Get backbone groups needed for the pKa calculations.
Returns:
list of groups
"""
return [group for group in self.groups if 'BB' in group.type] return [group for group in self.groups if 'BB' in group.type]
def get_sidechain_groups(self): def get_sidechain_groups(self):
""" returns all sidechain groups needed for the pKa calculations """ """Get sidechain groups needed for the pKa calculations.
return [group for group in self.groups if ('BB' not in group.type\
and not group.atom.cysteine_bridge)] Returns:
list of groups
"""
return [
group for group in self.groups
if ('BB' not in group.type and not group.atom.cysteine_bridge)]
def get_covalently_coupled_groups(self): def get_covalently_coupled_groups(self):
return [g for g in self.groups if len(g.covalently_coupled_groups)>0] """Get covalently coupled groups needed for pKa calculations.
Returns:
list of groups
"""
return [
g for g in self.groups
if len(g.covalently_coupled_groups) > 0]
def get_non_covalently_coupled_groups(self): def get_non_covalently_coupled_groups(self):
return [g for g in self.groups if len(g.non_covalently_coupled_groups)>0] """Get non-covalently coupled groups needed for pKa calculations.
def get_backbone_NH_groups(self): Returns:
""" returns all NH backbone groups needed for the pKa calculations """ list of groups
"""
return [
g for g in self.groups
if len(g.non_covalently_coupled_groups) > 0]
def get_backbone_nh_groups(self):
"""Get NH backbone groups needed for pKa calculations.
Returns:
list of groups
"""
return [group for group in self.groups if group.type == 'BBN'] return [group for group in self.groups if group.type == 'BBN']
def get_backbone_CO_groups(self): def get_backbone_co_groups(self):
""" returns all CO backbone groups needed for the pKa calculations """ """Get CO backbone groups needed for pKa calculations.
Returns:
list of groups
"""
return [group for group in self.groups if group.type == 'BBC'] return [group for group in self.groups if group.type == 'BBC']
def get_groups_in_residue(self, residue): def get_groups_in_residue(self, residue):
return [group for group in self.groups if group.residue_type == residue] """Get residue groups needed for pKa calculations.
Args:
residue: specific residue with groups
Returns:
list of groups
"""
return [
group for group in self.groups if group.residue_type == residue]
def get_titratable_groups(self): def get_titratable_groups(self):
"""Get all titratable groups needed for pKa calculations.
Returns:
list of groups
"""
return [group for group in self.groups if group.titratable] return [group for group in self.groups if group.titratable]
def get_groups_for_calculations(self): def get_groups_for_calculations(self):
""" """Get a list of groups that should be included in results report.
Returns a list of groups that should be included in results report.
If --titrate_only option is specified, only residues that are titratable If --titrate_only option is specified, only residues that are titratable
and are in that list are included; otherwise all titratable residues and are in that list are included; otherwise all titratable residues
and CYS residues are included. and CYS residues are included.
Returns:
list of groups
""" """
return [group for group in self.groups if group.use_in_calculations()] return [group for group in self.groups if group.use_in_calculations()]
def get_acids(self): def get_acids(self):
return [group for group in self.groups if (group.residue_type in self.parameters.acid_list """Get acid groups needed for pKa calculations.
and not group.atom.cysteine_bridge)]
Returns:
list of groups
"""
return [
group for group in self.groups
if (group.residue_type in self.parameters.acid_list
and not group.atom.cysteine_bridge)]
def get_backbone_reorganisation_groups(self): def get_backbone_reorganisation_groups(self):
return [group for group in self.groups if (group.residue_type in self.parameters.backbone_reorganisation_list """Get groups involved with backbone reorganization.
and not group.atom.cysteine_bridge)]
Returns:
list of groups
"""
return [
group for group in self.groups
if (group.residue_type
in self.parameters.backbone_reorganisation_list
and not group.atom.cysteine_bridge)]
def get_ions(self): def get_ions(self):
return [group for group in self.groups if group.residue_type in self.parameters.ions.keys()] """Get ion groups.
def get_group_names(self, list): Returns:
return [group for group in self.groups if group.type in list] list of groups
"""
return [
group for group in self.groups
if group.residue_type in self.parameters.ions.keys()]
def get_group_names(self, group_list):
"""Get names of groups in list.
Args:
group_list: list to check
Returns:
list of groups
"""
return [group for group in self.groups if group.type in group_list]
def get_ligand_atoms(self): def get_ligand_atoms(self):
return [atom for atom in self.atoms if atom.type=='hetatm'] """Get atoms associated with ligands.
Returns:
list of atoms
"""
return [atom for atom in self.atoms if atom.type == 'hetatm']
def get_heavy_ligand_atoms(self): def get_heavy_ligand_atoms(self):
return [atom for atom in self.atoms if atom.type=='hetatm' and atom.element != 'H'] """Get heavy atoms associated with ligands.
def get_chain(self,chain): Returns:
list of atoms
"""
return [
atom for atom in self.atoms
if atom.type == 'hetatm' and atom.element != 'H']
def get_chain(self, chain):
"""Get atoms associated with a specific chain.
Args:
chain: chain to select
Returns:
list of atoms
"""
return [atom for atom in self.atoms if atom.chain_id != chain] return [atom for atom in self.atoms if atom.chain_id != chain]
def add_atom(self, atom): def add_atom(self, atom):
#info(self,'adding',atom) """Add atom to container.
Args:
atom: atom to add
"""
self.atoms.append(atom) self.atoms.append(atom)
if not atom.conformation_container: if not atom.conformation_container:
atom.conformation_container = self atom.conformation_container = self
if not atom.molecular_container: if not atom.molecular_container:
atom.molecular_container = self.molecular_container atom.molecular_container = self.molecular_container
# store chain id for bookkeeping # store chain id for bookkeeping
if not atom.chain_id in self.chains: if not atom.chain_id in self.chains:
self.chains.append(atom.chain_id) self.chains.append(atom.chain_id)
return
def copy_atom(self, atom): def copy_atom(self, atom):
new_atom = atom.make_copy() """Add a copy of the atom to container.
Args:
atom: atom to copy and add
"""
new_atom = atom.make_copy()
self.atoms.append(new_atom) self.atoms.append(new_atom)
new_atom.conformation_container = self new_atom.conformation_container = self
return
def get_non_hydrogen_atoms(self): def get_non_hydrogen_atoms(self):
return [atom for atom in self.atoms if atom.element!='H'] """Get atoms that are not hydrogens.
Returns:
list of atoms
"""
return [atom for atom in self.atoms if atom.element != 'H']
def top_up(self, other): def top_up(self, other):
""" Tops up self with all atoms found in other but not in self """ """Adds any atoms found in `other` but not in this container.
my_residue_labels = { a.residue_label for a in self.atoms }
Tops up self with all atoms found in other but not in self.
Args:
other: conformation container with atoms to add
"""
my_residue_labels = {a.residue_label for a in self.atoms}
for atom in other.atoms: for atom in other.atoms:
if not atom.residue_label in my_residue_labels: if not atom.residue_label in my_residue_labels:
self.copy_atom(atom) self.copy_atom(atom)
return
def find_group(self, group): def find_group(self, group):
for g in self.groups: """Find a group in the container.
if g.atom.residue_label == group.atom.residue_label:
if g.type == group.type: Args:
return g group: group to find
Returns:
False (if group not found) or group
"""
for group_ in self.groups:
if group_.atom.residue_label == group.atom.residue_label:
if group_.type == group.type:
return group_
return False return False
def set_ligand_atom_names(self): def set_ligand_atom_names(self):
"""Set names for atoms in ligands."""
for atom in self.get_ligand_atoms(): for atom in self.get_ligand_atoms():
propka.ligand.assign_sybyl_type(atom) propka.ligand.assign_sybyl_type(atom)
return
def __str__(self): def __str__(self):
return'Conformation container %s with %d atoms and %d groups'%(self.name,len(self),len(self.groups)) """String that lists statistics of atoms and groups."""
fmt = (
"Conformation container {name} with {natoms:d} atoms and "
"{ngroups:d} groups")
str_ = fmt.format(
name=self.name, natoms=len(self), ngroups=len(self.groups))
return str_
def __len__(self): def __len__(self):
"""Number of atoms in container."""
return len(self.atoms) return len(self.atoms)
def sort_atoms(self): def sort_atoms(self):
"""Sort atoms by `self.sort_atoms_key()` and renumber."""
# sort the atoms ... # sort the atoms ...
self.atoms.sort(key=self.sort_atoms_key) self.atoms.sort(key=self.sort_atoms_key)
# ... and re-number them # ... and re-number them
for i in range(len(self.atoms)): for i in range(len(self.atoms)):
self.atoms[i].numb = i+1 self.atoms[i].numb = i+1
return @staticmethod
def sort_atoms_key(atom):
"""Generate key for atom sorting.
def sort_atoms_key(self, atom): Args:
key = ord(atom.chain_id)*1e7 atom: atom for key generation.
key += atom.res_num*1000 Returns:
key for atom
"""
key = ord(atom.chain_id) * UNICODE_MULTIPLIER
key += atom.res_num * RESIDUE_MULTIPLIER
if len(atom.name) > len(atom.element): if len(atom.name) > len(atom.element):
key += ord(atom.name[len(atom.element)]) key += ord(atom.name[len(atom.element)])
#info(atom,ord(atom.name[len(atom.element)]), '|%s||%s|'%(atom.name,atom.element))
return key return key

View File

@@ -1,113 +1,118 @@
"""Describe coupling between groups."""
from __future__ import division import itertools
from __future__ import print_function import propka.lib
from propka.group import Group
import math, propka.output, propka.group, propka.lib, itertools from propka.output import make_interaction_map
from propka.lib import info, warning from propka.lib import info
class non_covalently_couple_groups: class NonCovalentlyCoupledGroups:
"""Groups that are coupled without covalent bonding."""
def __init__(self): def __init__(self):
self.parameters = None self.parameters = None
# self.do_intrinsic = False
# self.do_pair_wise = False
self.do_prot_stat = True self.do_prot_stat = True
return def is_coupled_protonation_state_probability(self, group1, group2,
energy_method,
return_on_fail=True):
# """Check whether two groups are energetically coupled.
# Methods for finding coupled groups
#
def is_coupled_protonation_state_probability(self, group1, group2, energy_method, return_on_fail=True):
Args:
group1: first group for interaction
group2: second group for interaction
energy_method: function for calculating energy
return_on_fail: return if part of the calculation fails
Returns:
dictionary describing coupling
"""
# check if the interaction energy is high enough # check if the interaction energy is high enough
interaction_energy = max(self.get_interaction(group1,group2), self.get_interaction(group2,group1)) interaction_energy = max(self.get_interaction(group1, group2),
self.get_interaction(group2, group1))
if interaction_energy<=self.parameters.min_interaction_energy and return_on_fail: if (interaction_energy <= self.parameters.min_interaction_energy
return {'coupling_factor':-1.0} and return_on_fail):
return {'coupling_factor': -1.0}
# calculate intrinsic pKa's, if not already done # calculate intrinsic pKa's, if not already done
for group in [group1, group2]: for group in [group1, group2]:
if not hasattr(group, 'intrinsic_pKa'): if group.intrinsic_pka is None:
group.calculate_intrinsic_pka() group.calculate_intrinsic_pka()
use_ph = self.parameters.pH
use_pH = self.parameters.pH
if self.parameters.pH == 'variable': if self.parameters.pH == 'variable':
use_pH = min(group1.pka_value, group2.pka_value) use_ph = min(group1.pka_value, group2.pka_value)
default_energy = energy_method(ph=use_ph,
default_energy = energy_method(pH=use_pH, reference=self.parameters.reference) reference=self.parameters.reference)
default_pka1 = group1.pka_value default_pka1 = group1.pka_value
default_pka2 = group2.pka_value default_pka2 = group2.pka_value
# check that pka values are within relevant limits # check that pka values are within relevant limits
if max(default_pka1, default_pka2) < self.parameters.min_pka or \ if (max(default_pka1, default_pka2) < self.parameters.min_pka
min(default_pka1, default_pka2) > self.parameters.max_pka: or min(default_pka1, default_pka2) > self.parameters.max_pka):
if return_on_fail: if return_on_fail:
return {'coupling_factor':-1.0} return {'coupling_factor': -1.0}
# Swap interactions and re-calculate pKa values # Swap interactions and re-calculate pKa values
self.swap_interactions([group1], [group2]) self.swap_interactions([group1], [group2])
group1.calculate_total_pka() group1.calculate_total_pka()
group2.calculate_total_pka() group2.calculate_total_pka()
# store swapped energy and pka's # store swapped energy and pka's
swapped_energy = energy_method(pH=use_pH, reference=self.parameters.reference) swapped_energy = energy_method(
ph=use_ph, reference=self.parameters.reference)
swapped_pka1 = group1.pka_value swapped_pka1 = group1.pka_value
swapped_pka2 = group2.pka_value swapped_pka2 = group2.pka_value
pka_shift1 = swapped_pka1 - default_pka1 pka_shift1 = swapped_pka1 - default_pka1
pka_shift2 = swapped_pka2 - default_pka2 pka_shift2 = swapped_pka2 - default_pka2
# Swap back to original protonation state # Swap back to original protonation state
self.swap_interactions([group1], [group2]) self.swap_interactions([group1], [group2])
group1.calculate_total_pka() group1.calculate_total_pka()
group2.calculate_total_pka() group2.calculate_total_pka()
# check difference in free energy # check difference in free energy
if abs(default_energy - swapped_energy) > self.parameters.max_free_energy_diff and return_on_fail: if (abs(default_energy - swapped_energy)
return {'coupling_factor':-1.0} > self.parameters.max_free_energy_diff and return_on_fail):
return {'coupling_factor': -1.0}
# check pka shift # check pka shift
if max(abs(pka_shift1), abs(pka_shift2)) < self.parameters.min_swap_pka_shift and return_on_fail: if (max(abs(pka_shift1), abs(pka_shift2))
return {'coupling_factor':-1.0} < self.parameters.min_swap_pka_shift and return_on_fail):
return {'coupling_factor': -1.0}
# check intrinsic pka diff # check intrinsic pka diff
if abs(group1.intrinsic_pKa - group2.intrinsic_pKa) > self.parameters.max_intrinsic_pKa_diff and return_on_fail: if (abs(group1.intrinsic_pka - group2.intrinsic_pka)
return {'coupling_factor':-1.0} > self.parameters.max_intrinsic_pka_diff and return_on_fail):
return {'coupling_factor': -1.0}
# if everything is OK, calculate the coupling factor and return all info # if everything is OK, calculate the coupling factor and return all info
factor = self.get_free_energy_diff_factor(default_energy, swapped_energy)*\ factor = (
self.get_pka_diff_factor(group1.intrinsic_pKa, group2.intrinsic_pKa)*\ self.get_free_energy_diff_factor(default_energy, swapped_energy)
self.get_interaction_factor(interaction_energy) * self.get_pka_diff_factor(group1.intrinsic_pka,
group2.intrinsic_pka)
return {'coupling_factor':factor, * self.get_interaction_factor(interaction_energy))
'default_energy':default_energy, return {'coupling_factor': factor, 'default_energy': default_energy,
'swapped_energy':swapped_energy, 'swapped_energy': swapped_energy,
'interaction_energy':interaction_energy, 'interaction_energy': interaction_energy,
'swapped_pka1':swapped_pka1, 'swapped_pka1': swapped_pka1, 'swapped_pka2': swapped_pka2,
'swapped_pka2':swapped_pka2, 'pka_shift1': pka_shift1, 'pka_shift2': pka_shift2,
'pka_shift1':pka_shift1, 'pH': use_ph}
'pka_shift2':pka_shift2,
'pH':use_pH}
#
# Methods for calculating the coupling factor
#
def get_pka_diff_factor(self, pka1, pka2): def get_pka_diff_factor(self, pka1, pka2):
"""Get scaling factor for difference between intrinsic pKa values.
Args:
pka1: first pKa to compare
pka2: second pKa to compare
Returns:
float value of scaling factor
"""
intrinsic_pka_diff = abs(pka1-pka2) intrinsic_pka_diff = abs(pka1-pka2)
res = 0.0 res = 0.0
if intrinsic_pka_diff <= self.parameters.max_intrinsic_pKa_diff: if intrinsic_pka_diff <= self.parameters.max_intrinsic_pka_diff:
res = 1-(intrinsic_pka_diff/self.parameters.max_intrinsic_pKa_diff)**2 res = (
1-(intrinsic_pka_diff
/self.parameters.max_intrinsic_pka_diff)**2)
return res return res
def get_free_energy_diff_factor(self, energy1, energy2): def get_free_energy_diff_factor(self, energy1, energy2):
"""Get scaling factor for difference between free energies.
Args:
energy1: first energy to compare
energy2: second energy to compare
Returns:
float value of scaling factor
"""
free_energy_diff = abs(energy1-energy2) free_energy_diff = abs(energy1-energy2)
res = 0.0 res = 0.0
if free_energy_diff <= self.parameters.max_free_energy_diff: if free_energy_diff <= self.parameters.max_free_energy_diff:
@@ -115,208 +120,267 @@ class non_covalently_couple_groups:
return res return res
def get_interaction_factor(self, interaction_energy): def get_interaction_factor(self, interaction_energy):
"""Get scaling factor related to interaction energy.
Args:
interaction_energy: interaction energy
Returns:
float value of scaling factor
"""
res = 0.0 res = 0.0
interaction_energy = abs(interaction_energy) interaction_energy = abs(interaction_energy)
if interaction_energy >= self.parameters.min_interaction_energy: if interaction_energy >= self.parameters.min_interaction_energy:
res = (interaction_energy-self.parameters.min_interaction_energy)/(1.0+interaction_energy-self.parameters.min_interaction_energy) res = (
(interaction_energy-self.parameters.min_interaction_energy)
/ (1.0+interaction_energy
-self.parameters.min_interaction_energy))
return res return res
def identify_non_covalently_coupled_groups(self, conformation,
verbose=True):
"""Find coupled residues in protein.
Args:
conformation: protein conformation to test
def identify_non_covalently_coupled_groups(self, conformation, verbose=True): verbose: verbose output (boolean)
""" Finds coupled residues in protein """ """
self.parameters = conformation.parameters self.parameters = conformation.parameters
if verbose: if verbose:
info('') info_fmt = (
info(' Warning: When using the -d option, pKa values based on \'swapped\' interactions') '\n'
info(' will be writting to the output .pka file') ' Warning: When using the -d option, pKa values based on \n'
info('') '\'swapped\' interactions\n'
info('-' * 103) ' will be writting to the output .pka file\n'
info(' Detecting non-covalently coupled residues') '\n'
info('-' * 103) '{sep}\n'
info(' Maximum pKa difference: %4.2f pKa units' % self.parameters.max_intrinsic_pKa_diff) '\n'
info(' Minimum interaction energy: %4.2f pKa units' % self.parameters.min_interaction_energy) ' Detecting non-covalently coupled residues\n'
info(' Maximum free energy diff.: %4.2f pKa units' % self.parameters.max_free_energy_diff) '{sep}\n'
info(' Minimum swap pKa shift: %4.2f pKa units' % self.parameters.min_swap_pka_shift) ' Maximum pKa difference: {c.max_intrinsic_pka_diff:>4.2f} pKa units\n'
info(' pH: %6s ' % str(self.parameters.pH)) ' Minimum interaction energy: {c.min_interaction_energy:>4.2f} pKa units\n'
info(' Reference: %s' % self.parameters.reference) ' Maximum free energy diff.: {c.max_free_energy_diff:>4.2f} pKa units\n'
info(' Min pKa: %4.2f' % self.parameters.min_pka) ' Minimum swap pKa shift: {c.min_swap_pka_shift:>4.2f} pKa units\n'
info(' Max pKa: %4.2f' % self.parameters.max_pka) ' pH: {c.pH:>6} \n'
info('') ' Reference: {c.reference}\n'
' Min pKa: {c.min_pka:>4.2f}\n'
' Max pKa: {c.max_pka:>4.2f}\n'
'\n')
sep = "-" * 103
info(info_fmt.format(sep=sep, c=self))
# find coupled residues # find coupled residues
titratable_groups = conformation.get_titratable_groups() titratable_groups = conformation.get_titratable_groups()
if not conformation.non_covalently_coupled_groups: if not conformation.non_covalently_coupled_groups:
for g1 in titratable_groups: for group1 in titratable_groups:
for g2 in titratable_groups: for group2 in titratable_groups:
if g1==g2: if group1 == group2:
break break
if (group1 not in group2.non_covalently_coupled_groups
if not g1 in g2.non_covalently_coupled_groups and self.do_prot_stat: and self.do_prot_stat):
data = self.is_coupled_protonation_state_probability(g1, g2,conformation.calculate_folding_energy) data = (
if data['coupling_factor'] >0.0: self
g1.couple_non_covalently(g2) .is_coupled_protonation_state_probability(
group1, group2,
conformation.calculate_folding_energy))
if data['coupling_factor'] > 0.0:
group1.couple_non_covalently(group2)
if verbose: if verbose:
self.print_out_swaps(conformation) self.print_out_swaps(conformation)
return def print_out_swaps(self, conformation):
"""Print out something having to do with coupling interactions.
def print_out_swaps(self, conformation, verbose=True): Args:
map = propka.output.make_interaction_map('Non-covalent coupling map for %s'%conformation, conformation: conformation to print
conformation.get_non_covalently_coupled_groups(), """
lambda g1,g2: g1 in g2.non_covalently_coupled_groups) map_ = make_interaction_map(
info(map) 'Non-covalent coupling map for {0:s}'.format(str(conformation)),
conformation.get_non_covalently_coupled_groups(),
for system in conformation.get_coupled_systems(conformation.get_non_covalently_coupled_groups(),propka.group.Group.get_non_covalently_coupled_groups): lambda g1, g2: g1 in g2.non_covalently_coupled_groups)
info(map_)
for system in conformation.get_coupled_systems(
conformation.get_non_covalently_coupled_groups(),
Group.get_non_covalently_coupled_groups):
self.print_system(conformation, list(system)) self.print_system(conformation, list(system))
return
def print_system(self, conformation, system): def print_system(self, conformation, system):
"""Print out something about the system.
info('System containing %d groups:' % len(system)) Args:
conformation: conformation to print
# make list of interactions withi this system system: system to print
interactions = list(itertools.combinations(system,2)) """
info(
'System containing {0:d} groups:'.format(len(system)))
# make list of interactions within this system
interactions = list(itertools.combinations(system, 2))
# print out coupling info for each interaction # print out coupling info for each interaction
coup_info = '' coup_info = ''
for interaction in interactions: for interaction in interactions:
data = self.is_coupled_protonation_state_probability(interaction[0], interaction[1],conformation.calculate_folding_energy, return_on_fail=False) data = (
coup_info += self.make_data_to_string(data,interaction[0],interaction[1])+'\n\n' self.is_coupled_protonation_state_probability(
interaction[0], interaction[1],
conformation.calculate_folding_energy,
return_on_fail=False))
coup_info += (
self.make_data_to_string(data, interaction[0], interaction[1])
+ '\n\n')
info(coup_info) info(coup_info)
# make list of possible combinations of swap to try out # make list of possible combinations of swap to try out
combinations = propka.lib.generate_combinations(interactions) combinations = propka.lib.generate_combinations(interactions)
# Make possible swap combinations # Make possible swap combinations
swap_info = '' swap_info = ''
swap_info += self.print_determinants_section(system,'Original') swap_info += self.print_determinants_section(system, 'Original')
for combination in combinations: for combination in combinations:
# Tell the user what is swap in this combination # Tell the user what is swap in this combination
swap_info += 'Swapping the following interactions:\n' swap_info += 'Swapping the following interactions:\n'
for interaction in combination: for interaction in combination:
swap_info += ' %s %s\n'%(interaction[0].label, interaction[1].label) swap_info += ' {0:s} {1:s}\n'.format(
interaction[0].label, interaction[1].label)
# swap... # swap...
for interaction in combination: for interaction in combination:
self.swap_interactions([interaction[0]],[interaction[1]]) self.swap_interactions([interaction[0]], [interaction[1]])
swap_info += self.print_determinants_section(system, 'Swapped')
swap_info += self.print_determinants_section(system,'Swapped')
# ...and swap back
#for interaction in combination:
# self.swap_interactions([interaction[0]], [interaction[1]])
info(swap_info) info(swap_info)
return @staticmethod
# def get_interaction(group1, group2, include_side_chain_hbs=True):
# Interaction and swapping methods """Get interaction energy between two groups.
#
def get_interaction(self, group1, group2, include_side_chain_hbs = True): Args:
group1: first group for interaction
group2: second group for interaction
include_side_chain_hbs: include sidechain hydrogen bonds in energy
Returns:
interaction energy (float)
"""
determinants = group1.determinants['coulomb'] determinants = group1.determinants['coulomb']
if include_side_chain_hbs: if include_side_chain_hbs:
determinants = group1.determinants['sidechain'] + group1.determinants['coulomb'] determinants = (
group1.determinants['sidechain']
+ group1.determinants['coulomb'])
interaction_energy = 0.0 interaction_energy = 0.0
for det in determinants: for det in determinants:
if group2 == det.group: if group2 == det.group:
interaction_energy += det.value interaction_energy += det.value
return interaction_energy return interaction_energy
def print_determinants_section(self, system, tag): def print_determinants_section(self, system, tag):
"""Print determinants of system.
Args:
system: set of groups
tag: something to add to output
Returns:
string with summary
"""
all_labels = [g.label for g in system] all_labels = [g.label for g in system]
s = ' '+'-'*113+'\n' str_ = ' ' + '-' * 113 + '\n'
for group in system: for group in system:
s += self.tagged_format(' %-8s|' % tag, group.getDeterminantString(), all_labels) str_ += self.tagged_format(
' {0:<8s}|'.format(tag), group.get_determinant_string(),
all_labels)
return str_ + '\n'
return s+'\n' def swap_interactions(self, groups1, groups2, include_side_chain_hbs=True):
"""Swap interactions between two groups.
def swap_interactions(self, groups1, groups2, include_side_chain_hbs = True): Args:
group1: first group to swap
for i in range(len(groups1)): group2: second group to swap
group1 = groups1[i] """
for i, group1 in enumerate(groups1):
group2 = groups2[i] group2 = groups2[i]
# swap the interactions! # swap the interactions!
self.transfer_determinant(group1.determinants['coulomb'], group2.determinants['coulomb'], group1.label, group2.label) self.transfer_determinant(group1.determinants['coulomb'],
group2.determinants['coulomb'],
group1.label, group2.label)
if include_side_chain_hbs: if include_side_chain_hbs:
self.transfer_determinant(group1.determinants['sidechain'], group2.determinants['sidechain'], group1.label, group2.label) self.transfer_determinant(group1.determinants['sidechain'],
group2.determinants['sidechain'],
# re-calculate pKa values group1.label, group2.label)
# re-calculate pKa values
group1.calculate_total_pka() group1.calculate_total_pka()
group2.calculate_total_pka() group2.calculate_total_pka()
return @staticmethod
def transfer_determinant(determinants1, determinants2,
label1, label2):
"""Transfer information between two sets of determinants.
Args:
def transfer_determinant(self, determinants1, determinants2, label1, label2): determinants1: determinant list
determinants2: determinant list
label1: label for list 1
label2: label for list 2
"""
# find out what to transfer... # find out what to transfer...
from1to2 = [] from1to2 = []
from2to1 = [] from2to1 = []
for det in determinants1: for det in determinants1:
if det.label == label2: if det.label == label2:
from1to2.append(det) from1to2.append(det)
for det in determinants2: for det in determinants2:
if det.label == label1: if det.label == label1:
from2to1.append(det) from2to1.append(det)
# ...and transfer it! # ...and transfer it!
for det in from1to2: for det in from1to2:
det.label = label1 det.label = label1
determinants2.append(det) determinants2.append(det)
determinants1.remove(det) determinants1.remove(det)
for det in from2to1: for det in from2to1:
det.label = label2 det.label = label2
determinants1.append(det) determinants1.append(det)
determinants2.remove(det) determinants2.remove(det)
return @staticmethod
def tagged_format(tag, str_, labels):
"""Tag a string.
# Args:
# Output methods tag: tag to add
# str_: string to tag
labels: labels to replace
def tagged_format(self, tag, s, labels): Returns:
s = "%s %s"%(tag,s) tagged string
s = s.replace('\n','\n%s '%tag) """
str_ = "{0:s} {1:s}".format(tag, str_)
str_ = str_.replace('\n', '\n{0:s} '.format(tag))
for label in labels: for label in labels:
s = s.replace(label, '\033[31m%s\033[30m'%label) str_ = str_.replace(label, '\033[31m{0:s}\033[30m'.format(label))
return s+'\n' return str_ + '\n'
@staticmethod
def make_data_to_string(data, group1, group2):
"""Describe interaction between groups.
Args:
data: data about interactions
group1: first group
group2: second group
Returns:
formatted string with information.
"""
str_ = (
" {label1} and {label2} coupled (prot.state): {coupl_fact:>5.2f}\n"
" Energy levels: {def_energy:>6.2f}, {swap_energy:>6.2f} "
"(difference: {diff_energy:>6.2f}) at pH {ph:>6.2f}\n"
" Interaction energy: {int_energy:>6.2f}\n"
" Intrinsic pka's: {pka1:>6.2f}, {pka2:>6.2f} "
"(difference: {diff_pka:>6.2f})\n"
" Swapped pKa's: {swap1:>6.2f}, {swap2:>6.2f} "
"(difference: {shift1:>6.2f}, {shift2:>6.2f})"
).format(
label1=group1.label, label2=group2.label,
coupl_fact=data['coupling_factor'],
def_energy=data['default_energy'],
swap_energy=data['swapped_energy'],
diff_energy=data['default_energy']-data['swapped_energy'],
ph=data['pH'], int_energy=data['interaction_energy'],
pka1=group1.intrinsic_pka, pka2=group2.intrinsic_pka,
diff_pka=group1.intrinsic_pka-group2.intrinsic_pka,
swap1=data['swapped_pka1'], swap2=data['swapped_pka2'],
shift1=data['pka_shift1'], shift2=data['pka_shift2'])
return str_
def make_data_to_string(self, data, group1, group2): NCCG = NonCovalentlyCoupledGroups()
s = """ %s and %s coupled (prot.state): %5.2f
Energy levels: %6.2f, %6.2f (difference: %6.2f) at pH %6.2f
Interaction energy: %6.2f
Intrinsic pka's: %6.2f, %6.2f (difference: %6.2f)
Swapped pKa's: %6.2f, %6.2f (difference: %6.2f, %6.2f)"""%(group1.label,
group2.label,
data['coupling_factor'],
data['default_energy'], data['swapped_energy'],
data['default_energy'] - data['swapped_energy'],
data['pH'],
data['interaction_energy'],
group1.intrinsic_pKa,
group2.intrinsic_pKa,
group1.intrinsic_pKa-group2.intrinsic_pKa,
data['swapped_pka1'],
data['swapped_pka2'],
data['pka_shift1'],
data['pka_shift2'])
return s
nccg = non_covalently_couple_groups()

View File

@@ -1,29 +1,37 @@
"""Holds the Determinant class
TODO - it is confusing to have both `determinant.py` and `determinants.py`.
Should these be merged?
"""
from __future__ import division
from __future__ import print_function
class Determinant: class Determinant:
""" """Determinant class.
Determinant class - set up for later structurization
Appears to be a container for storing information and values about
groups that interact to influence titration states.
TODO - figure out what this class does.
""" """
def __init__(self, group, value): def __init__(self, group, value):
""" """Initialize the object.
Contructer of determinant object - simple, but helps in creating structure!
Args:
group: group associated with Determinant object
value: value to assign to group
""" """
self.group = group self.group = group
self.label = group.label self.label = group.label
self.value = value self.value = value
return
def add(self, value): def add(self, value):
""" """Increment determinant value.
adding a value to determinant
Args:
value: value to add to determinant
""" """
self.value += value self.value += value
return
def __str__(self): def __str__(self):
return '%s: %8.2f'%(self.label,self.value) return '{0:s}: {1:8.2f}'.format(self.label, self.value)

View File

@@ -1,19 +1,33 @@
"""Functions to manipulate Determinant objects.
from __future__ import division TODO - it is confusing to have both `determinant.py` and `determinants.py`.
from __future__ import print_function Should these be merged?
"""
import math, time import math
import propka.iterative
import propka.iterative, propka.lib, propka.vector_algebra import propka.lib
import propka.calculations import propka.vector_algebra
from propka.calculations import squared_distance, get_smallest_distance
from propka.calculations import angle_distance_factors, hydrogen_bond_energy
from propka.determinant import Determinant from propka.determinant import Determinant
def setDeterminants(propka_groups, version=None, options=None): # Cutoff for angle factor
""" # TODO - this constant appears elsewhere in the package.
adding side-chain and coulomb determinants/perturbations to all residues - note, backbone determinants are set separately # It should be moved to a configuration file.
""" FANGLE_MIN = 0.001
def set_determinants(propka_groups, version=None, options=None):
"""Add side-chain and coulomb determinants/perturbations to all residues.
NOTE - backbone determinants are set separately
Args:
propka_groups: groups to adjust
version: version object
options: options object
"""
iterative_interactions = [] iterative_interactions = []
# --- NonIterative section ---# # --- NonIterative section ---#
for group1 in propka_groups: for group1 in propka_groups:
@@ -23,172 +37,202 @@ def setDeterminants(propka_groups, version=None, options=None):
# do not calculate interactions for coupled groups # do not calculate interactions for coupled groups
if group2 in group1.covalently_coupled_groups: if group2 in group1.covalently_coupled_groups:
break break
distance = propka.calculations.distance(group1, group2) distance = propka.calculations.distance(group1, group2)
if distance < version.parameters.coulomb_cutoff2: if distance < version.parameters.coulomb_cutoff2:
interaction_type = version.parameters.interaction_matrix.get_value(group1.type,group2.type) interaction_type = (
version.parameters.interaction_matrix.get_value(
group1.type, group2.type))
if interaction_type == 'I': if interaction_type == 'I':
propka.iterative.addtoDeterminantList(group1, group2, distance, iterative_interactions, version=version) propka.iterative.add_to_determinant_list(
group1, group2, distance, iterative_interactions,
version=version)
elif interaction_type == 'N': elif interaction_type == 'N':
addDeterminants(group1, group2, distance, version) add_determinants(group1, group2, distance, version)
# --- Iterative section ---# # --- Iterative section ---#
propka.iterative.addDeterminants(iterative_interactions, version, options=options) propka.iterative.add_determinants(iterative_interactions, version)
def addDeterminants(group1, group2, distance, version): def add_determinants(group1, group2, distance, version):
"""Add determinants and perturbations for distance(R1,R2) < coulomb_cutoff.
Args:
group1: first group to add
group2: second group to add
distance: distance between groups
version: version object
""" """
adding determinants/perturbations, distance(R1, R2) < coulomb_cutoff always
"""
# side-chain determinant # side-chain determinant
addSidechainDeterminants(group1, group2, version) add_sidechain_determinants(group1, group2, version)
# Coulomb determinant # Coulomb determinant
addCoulombDeterminants(group1, group2, distance, version) add_coulomb_determinants(group1, group2, distance, version)
return
def addSidechainDeterminants(group1, group2, version=None): def add_sidechain_determinants(group1, group2, version=None):
"""Add side-chain determinants and perturbations.
NOTE - res_num1 > res_num2
Args:
group1: first group to add
group2: second group to add
version: version object
""" """
adding side-chain determinants/perturbations
Note, res_num1 > res_num2
"""
hbond_interaction = version.hydrogen_bond_interaction(group1, group2) hbond_interaction = version.hydrogen_bond_interaction(group1, group2)
if hbond_interaction: if hbond_interaction:
if group1.charge == group2.charge: if group1.charge == group2.charge:
# acid pair or base pair # acid pair or base pair
if group1.model_pka < group2.model_pka: if group1.model_pka < group2.model_pka:
newDeterminant1 = Determinant(group2, -hbond_interaction) new_determinant1 = Determinant(group2, -hbond_interaction)
newDeterminant2 = Determinant(group1, hbond_interaction) new_determinant2 = Determinant(group1, hbond_interaction)
else: else:
newDeterminant1 = Determinant(group2, hbond_interaction) new_determinant1 = Determinant(group2, hbond_interaction)
newDeterminant2 = Determinant(group1, -hbond_interaction) new_determinant2 = Determinant(group1, -hbond_interaction)
else: else:
newDeterminant1 = Determinant(group2, hbond_interaction*group1.charge) new_determinant1 = Determinant(
newDeterminant2 = Determinant(group1, hbond_interaction*group2.charge) group2, hbond_interaction*group1.charge)
new_determinant2 = Determinant(
group1, hbond_interaction*group2.charge)
group1.determinants['sidechain'].append(new_determinant1)
group2.determinants['sidechain'].append(new_determinant2)
group1.determinants['sidechain'].append(newDeterminant1)
group2.determinants['sidechain'].append(newDeterminant2)
return def add_coulomb_determinants(group1, group2, distance, version):
"""Add non-iterative Coulomb determinants and perturbations.
def addCoulombDeterminants(group1, group2, distance, version): Args:
group1: first group to add
group2: second group to add
distance: distance between groups
version: version object
""" """
adding NonIterative Coulomb determinants/perturbations coulomb_interaction = version.electrostatic_interaction(
""" group1, group2, distance)
coulomb_interaction = version.electrostatic_interaction(group1, group2, distance)
if coulomb_interaction: if coulomb_interaction:
Q1 = group1.charge q1 = group1.charge
Q2 = group2.charge q2 = group2.charge
# assigning the Coulombic interaction # assigning the Coulombic interaction
if Q1 < 0.0 and Q2 < 0.0: if q1 < 0.0 and q2 < 0.0:
""" both are acids """ # both are acids
addCoulombAcidPair(group1, group2, coulomb_interaction) add_coulomb_acid_pair(group1, group2, coulomb_interaction)
elif Q1 > 0.0 and Q2 > 0.0: elif q1 > 0.0 and q2 > 0.0:
""" both are bases """ # both are bases
addCoulombBasePair(group1, group2, coulomb_interaction) add_coulomb_base_pair(group1, group2, coulomb_interaction)
else: else:
""" one of each """ # one of each
addCoulombIonPair(group1, group2, coulomb_interaction) add_coulomb_ion_pair(group1, group2, coulomb_interaction)
return
def addCoulombAcidPair(object1, object2, value): def add_coulomb_acid_pair(object1, object2, value):
"""Add the Coulomb interaction (an acid pair).
The higher pKa is raised.
Args:
object1: first part of pair
object2: second part of pair
value: determinant value
""" """
Adding the Coulomb interaction (an acid pair):
the higher pKa is raised
"""
if object1.model_pka > object2.model_pka: if object1.model_pka > object2.model_pka:
newDeterminant = Determinant(object2, value) new_determinant = Determinant(object2, value)
object1.determinants['coulomb'].append(newDeterminant) object1.determinants['coulomb'].append(new_determinant)
else: else:
newDeterminant = Determinant(object1, value) new_determinant = Determinant(object1, value)
object2.determinants['coulomb'].append(newDeterminant) object2.determinants['coulomb'].append(new_determinant)
def addCoulombBasePair(object1, object2, value): def add_coulomb_base_pair(object1, object2, value):
""" """Add the Coulomb interaction (a base pair).
Adding the Coulomb interaction (a base pair):
the lower pKa is lowered The lower pKa is lowered.
Args:
object1: first part of pair
object2: second part of pair
value: determinant value
""" """
if object1.model_pka < object2.model_pka: if object1.model_pka < object2.model_pka:
newDeterminant = Determinant(object2, -value) new_determinant = Determinant(object2, -value)
object1.determinants['coulomb'].append(newDeterminant) object1.determinants['coulomb'].append(new_determinant)
else: else:
newDeterminant = Determinant(object1, -value) new_determinant = Determinant(object1, -value)
object2.determinants['coulomb'].append(newDeterminant) object2.determinants['coulomb'].append(new_determinant)
def addCoulombIonPair(object1, object2, value): def add_coulomb_ion_pair(object1, object2, value):
"""Add the Coulomb interaction (an acid-base pair).
The pKa of the acid is lowered & the pKa of the base is raised.
Args:
object1: first part of pair
object2: second part of pair
value: determinant value
""" """
Adding the Coulomb interaction (an acid-base pair):
the pKa of the acid is lowered & the pKa of the base is raised
"""
# residue1 # residue1
Q1 = object1.charge q1 = object1.charge
newDeterminant = Determinant(object2, Q1*value) new_determinant = Determinant(object2, q1*value)
object1.determinants['coulomb'].append(newDeterminant) object1.determinants['coulomb'].append(new_determinant)
# residue2 # residue2
Q2 = object2.charge q2 = object2.charge
newDeterminant = Determinant(object1, Q2*value) new_determinant = Determinant(object1, q2*value)
object2.determinants['coulomb'].append(newDeterminant) object2.determinants['coulomb'].append(new_determinant)
def set_ion_determinants(conformation_container, version):
"""Add ion determinants and perturbations.
Args:
def setIonDeterminants(conformation_container, version): conformation_container: conformation to set
""" version: version object
adding ion determinants/perturbations
""" """
for titratable_group in conformation_container.get_titratable_groups(): for titratable_group in conformation_container.get_titratable_groups():
for ion_group in conformation_container.get_ions(): for ion_group in conformation_container.get_ions():
squared_distance = propka.calculations.squared_distance(titratable_group, ion_group) dist_sq = squared_distance(titratable_group, ion_group)
if squared_distance < version.parameters.coulomb_cutoff2_squared: if dist_sq < version.parameters.coulomb_cutoff2_squared:
weight = version.calculatePairWeight(titratable_group.Nmass, ion_group.Nmass) weight = version.calculate_pair_weight(
# the pKa of both acids and bases are shifted up by negative ions (and vice versa) titratable_group.num_volume, ion_group.num_volume)
value = (-ion_group.charge) * version.calculateCoulombEnergy(math.sqrt(squared_distance), weight) # the pKa of both acids and bases are shifted up by negative
newDeterminant = Determinant(ion_group, value) # ions (and vice versa)
titratable_group.determinants['coulomb'].append(newDeterminant) value = (
-ion_group.charge
* version.calculate_coulomb_energy(
math.sqrt(dist_sq), weight))
new_det = Determinant(ion_group, value)
titratable_group.determinants['coulomb'].append(new_det)
return
def setBackBoneDeterminants(titratable_groups, backbone_groups, version): def set_backbone_determinants(titratable_groups, backbone_groups, version):
"""Set determinants between titrable and backbone groups.
Args:
titratable_groups: list of titratable groups
backbone_groups: list of backbone groups
version: version object
"""
for titratable_group in titratable_groups: for titratable_group in titratable_groups:
titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids titratable_group_interaction_atoms = (
titratable_group.interaction_atoms_for_acids)
if not titratable_group_interaction_atoms: if not titratable_group_interaction_atoms:
continue continue
# find out which backbone groups this titratable is interacting with # find out which backbone groups this titratable is interacting with
for backbone_group in backbone_groups: for backbone_group in backbone_groups:
# find the interacting atoms # find the interacting atoms
backbone_interaction_atoms = backbone_group.get_interaction_atoms(titratable_group) backbone_interaction_atoms = (
backbone_group.get_interaction_atoms(titratable_group))
if not backbone_interaction_atoms: if not backbone_interaction_atoms:
continue continue
# find the smallest distance # find the smallest distance
[backbone_atom, distance, titratable_atom] = propka.calculations.get_smallest_distance(backbone_interaction_atoms, [backbone_atom, distance, titratable_atom] = (
titratable_group_interaction_atoms) get_smallest_distance(
backbone_interaction_atoms,
titratable_group_interaction_atoms))
# get the parameters # get the parameters
parameters = version.get_backbone_hydrogen_bond_parameters(backbone_atom, titratable_atom) parameters = (
version.get_backbone_hydrogen_bond_parameters(
backbone_atom, titratable_atom))
if not parameters: if not parameters:
continue continue
[dpKa_max, [cutoff1, cutoff2]] = parameters [dpka_max, [cutoff1, cutoff2]] = parameters
if distance < cutoff2: if distance < cutoff2:
# calculate angle factor # calculate angle factor
f_angle = 1.0 f_angle = 1.0
@@ -202,19 +246,20 @@ def setBackBoneDeterminants(titratable_groups, backbone_groups, version):
# || # ||
# C # C
if backbone_group.type == 'BBC': if backbone_group.type == 'BBC':
if titratable_group.type in version.parameters.angular_dependent_sidechain_interactions: if (titratable_group.type
in version.parameters.angular_dependent_sidechain_interactions):
if titratable_atom.element == 'H': if titratable_atom.element == 'H':
heavy_atom = titratable_atom.bonded_atoms[0] heavy_atom = titratable_atom.bonded_atoms[0]
hydrogen_atom = titratable_atom hydrogen_atom = titratable_atom
[d1, f_angle, d2] = propka.calculations.AngleFactorX(atom1=heavy_atom, [_, f_angle, _] = angle_distance_factors(
atom2=hydrogen_atom, atom1=heavy_atom, atom2=hydrogen_atom,
atom3=backbone_atom) atom3=backbone_atom)
else: else:
# Either the structure is corrupt (no hydrogen), or the heavy atom is closer to # Either the structure is corrupt (no hydrogen),
# the titratable atom than the hydrogen. In either case we set the angle factor # or the heavy atom is closer to the titratable
# to 0 # atom than the hydrogen. In either case we set the
# angle factor to 0
f_angle = 0.0 f_angle = 0.0
# for BBN groups, the hydrogen is on the backbone group # for BBN groups, the hydrogen is on the backbone group
# #
# Titra. # Titra.
@@ -225,23 +270,22 @@ def setBackBoneDeterminants(titratable_groups, backbone_groups, version):
# / \ # / \
if backbone_group.type == 'BBN': if backbone_group.type == 'BBN':
if backbone_atom.element == 'H': if backbone_atom.element == 'H':
backbone_N = backbone_atom.bonded_atoms[0] backbone_n = backbone_atom.bonded_atoms[0]
backbone_H = backbone_atom backbone_h = backbone_atom
[d1, f_angle, d2] = propka.calculations.AngleFactorX(atom1=titratable_atom, [_, f_angle, _] = (
atom2=backbone_H, angle_distance_factors(
atom3=backbone_N) atom1=titratable_atom, atom2=backbone_h,
atom3=backbone_n))
else: else:
# Either the structure is corrupt (no hydrogen), or the heavy atom is closer to # Either the structure is corrupt (no hydrogen), or the
# the titratable atom than the hydrogen. In either case we set the angle factor # heavy atom is closer to the titratable atom than the
# to 0 # hydrogen. In either case we set the angle factor to 0
f_angle = 0.0 f_angle = 0.0
if f_angle > FANGLE_MIN:
value = (
if f_angle > 0.001: titratable_group.charge
value = titratable_group.charge * propka.calculations.HydrogenBondEnergy(distance, dpKa_max, [cutoff1,cutoff2], f_angle) * hydrogen_bond_energy(
distance, dpka_max, [cutoff1, cutoff2], f_angle))
newDeterminant = Determinant(backbone_group, value) new_determinant = Determinant(backbone_group, value)
titratable_group.determinants['backbone'].append(newDeterminant) titratable_group.determinants['backbone'].append(
new_determinant)
return

File diff suppressed because it is too large Load Diff

View File

@@ -1,21 +1,26 @@
"""Provides an alternative PDB format that can transparently encode larger atom numbers. """Provides alternative PDB format that can encode larger atom numbers.
http://cci.lbl.gov/hybrid_36/ http://cci.lbl.gov/hybrid_36/
""" """
import string import string
_hybrid36_upper_chars = set(string.ascii_uppercase)
_hybrid36_lower_chars = set(string.ascii_lowercase) _HYBRID36_UPPER_CHARS = set(string.ascii_uppercase)
_hybrid36_digits = set(string.digits) _HYBRID36_LOWER_CHARS = set(string.ascii_lowercase)
_hybrid36_upper_set = _hybrid36_upper_chars | _hybrid36_digits _HYBRID36_DIGITS = set(string.digits)
_hybrid36_lower_set = _hybrid36_lower_chars | _hybrid36_digits _HYBRID36_UPPER_SET = _HYBRID36_UPPER_CHARS | _HYBRID36_DIGITS
_HYBRID36_LOWER_SET = _HYBRID36_LOWER_CHARS | _HYBRID36_DIGITS
def decode(input_string): def decode(input_string):
""" """Convert an input string of a number in hybrid-36 format to an integer.
Convert an input string of a number in hybrid-36 format to an integer.
Args:
input_string: input string
Returns:
integer
""" """
value_error_message = "invalid literal for hybrid-36 conversion: '%s'" value_error_message = "invalid literal for hybrid-36 conversion: '{0:s}'"
original_input_string = input_string original_input_string = input_string
input_string = input_string.strip() input_string = input_string.strip()
@@ -27,30 +32,30 @@ def decode(input_string):
else: else:
sign = 1 sign = 1
if not len(input_string): if len(input_string) == 0:
raise ValueError(value_error_message % input_string) raise ValueError(value_error_message.format(input_string))
# See http://cci.lbl.gov/hybrid_36/ for documentation on the format. # See http://cci.lbl.gov/hybrid_36/ for documentation on the format.
num_chars = len(input_string) num_chars = len(input_string)
first_char = input_string[0] first_char = input_string[0]
if first_char in _hybrid36_digits: if first_char in _HYBRID36_DIGITS:
return sign * int(input_string) return sign * int(input_string)
elif first_char in _hybrid36_upper_chars: elif first_char in _HYBRID36_UPPER_CHARS:
reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars) reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars)
_hybrid36_set = _hybrid36_upper_set _hybrid36_set = _HYBRID36_UPPER_SET
elif first_char in _hybrid36_lower_chars: elif first_char in _HYBRID36_LOWER_CHARS:
reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars) reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars)
_hybrid36_set = _hybrid36_lower_set _hybrid36_set = _HYBRID36_LOWER_SET
else: else:
raise ValueError(value_error_message % original_input_string) raise ValueError(value_error_message.format(original_input_string))
# Check the validity of the input string: ASCII characters should be # Check the validity of the input string: ASCII characters should be
# either all uppercase or all lowercase. # either all uppercase or all lowercase.
for c in input_string[1:]: for char in input_string[1:]:
if c not in _hybrid36_set: if char not in _hybrid36_set:
raise ValueError(value_error_message % original_input_string) raise ValueError(value_error_message.format(original_input_string))
# Convert with the int function. # Convert with the int function.
return sign * (int(input_string, 36) + reference) return sign * (int(input_string, 36) + reference)

View File

@@ -1,60 +1,68 @@
"""Iterative functions for pKa calculations.
from __future__ import division These appear to mostly involve determinants.
from __future__ import print_function """
import math, time
import propka.lib as lib
from propka.determinant import Determinant from propka.determinant import Determinant
import propka.calculations from propka.lib import info, debug
from propka.lib import info, warning, debug
# Some library functions for the interative pKa determinants
def addtoDeterminantList(group1, group2, distance, iterative_interactions, version): # TODO - these are undocumented constants
UNK_MIN_VALUE = 0.005
def add_to_determinant_list(group1, group2, distance, iterative_interactions,
version):
"""Add iterative determinantes to the list.
[[R1, R2], [side-chain, coulomb], [A1, A2]], ...
NOTE - sign is determined when the interaction is added to the iterative
object!
NOTE - distance < coulomb_cutoff here
Args:
group1: first group in pair
group2: second group in pair
distance: distance between groups
iterative_interactions: interaction list to modify
version: version object
""" """
Adds 'iterative determinants' to list ..., [[R1, R2], [side-chain, coulomb], [A1, A2]], ... hbond_value = version.hydrogen_bond_interaction(group1, group2)
Note, the sign is determined when the interaction is added to the iterative object!
Note, distance < coulomb_cutoff here
"""
hbond_value = version.hydrogen_bond_interaction(group1, group2)
coulomb_value = version.electrostatic_interaction(group1, group2, distance) coulomb_value = version.electrostatic_interaction(group1, group2, distance)
# adding the interaction to 'iterative_interactions' # adding the interaction to 'iterative_interactions'
if hbond_value or coulomb_value: if hbond_value or coulomb_value:
pair = [group1, group2] pair = [group1, group2]
values = [hbond_value, coulomb_value]
values = [hbond_value, coulomb_value]
while None in values: while None in values:
values[values.index(None)] = 0.0 values[values.index(None)] = 0.0
annihilation = [0., 0.] annihilation = [0., 0.]
interaction = [pair, values, annihilation] interaction = [pair, values, annihilation]
iterative_interactions.append(interaction) iterative_interactions.append(interaction)
return
def add_iterative_acid_pair(object1, object2, interaction):
"""Add the Coulomb 'iterative' interaction (an acid pair).
def addIterativeAcidPair(object1, object2, interaction): The higher pKa is raised with QQ+HB
The lower pKa is lowered with HB
Args:
object1: first object in pair
object2: second object in pair
interaction: list with [values, annihilation]
""" """
Adding the Coulomb 'iterative' interaction (an acid pair): values = interaction[1]
the higher pKa is raised with QQ+HB
the lower pKa is lowered with HB
"""
values = interaction[1]
annihilation = interaction[2] annihilation = interaction[2]
hbond_value = values[0] hbond_value = values[0]
coulomb_value = values[1] coulomb_value = values[1]
diff = coulomb_value + 2*hbond_value diff = coulomb_value + 2*hbond_value
comp1 = object1.pKa_old + annihilation[0] + diff comp1 = object1.pka_old + annihilation[0] + diff
comp2 = object2.pKa_old + annihilation[1] + diff comp2 = object2.pka_old + annihilation[1] + diff
annihilation[0] = 0. annihilation[0] = 0.0
annihilation[1] = 0. annihilation[1] = 0.0
if comp1 > comp2: if comp1 > comp2:
# side-chain # side-chain
determinant = [object2, hbond_value] determinant = [object2, hbond_value]
object1.determinants['sidechain'].append(determinant) object1.determinants['sidechain'].append(determinant)
determinant = [object1, -hbond_value] determinant = [object1, -hbond_value]
object2.determinants['sidechain'].append(determinant) object2.determinants['sidechain'].append(determinant)
@@ -64,7 +72,7 @@ def addIterativeAcidPair(object1, object2, interaction):
annihilation[0] = -diff annihilation[0] = -diff
else: else:
# side-chain # side-chain
determinant = [object1, hbond_value] determinant = [object1, hbond_value]
object2.determinants['sidechain'].append(determinant) object2.determinants['sidechain'].append(determinant)
determinant = [object2, -hbond_value] determinant = [object2, -hbond_value]
object1.determinants['sidechain'].append(determinant) object1.determinants['sidechain'].append(determinant)
@@ -74,26 +82,31 @@ def addIterativeAcidPair(object1, object2, interaction):
annihilation[1] = -diff annihilation[1] = -diff
def addIterativeBasePair(object1, object2, interaction): def add_iterative_base_pair(object1, object2, interaction):
"""Add the Coulomb 'iterative' interaction (a base pair).
The lower pKa is lowered
Args:
object1: first object in pair
object2: second object in pair
interaction: list with [values, annihilation]
""" """
Adding the Coulomb 'iterative' interaction (a base pair): values = interaction[1]
the lower pKa is lowered
"""
values = interaction[1]
annihilation = interaction[2] annihilation = interaction[2]
hbond_value = values[0] hbond_value = values[0]
coulomb_value = values[1] coulomb_value = values[1]
diff = coulomb_value + 2*hbond_value diff = coulomb_value + 2*hbond_value
diff = -diff diff = -diff
comp1 = object1.pKa_old + annihilation[0] + diff comp1 = object1.pka_old + annihilation[0] + diff
comp2 = object2.pKa_old + annihilation[1] + diff comp2 = object2.pka_old + annihilation[1] + diff
annihilation[0] = 0. annihilation[0] = 0.0
annihilation[1] = 0. annihilation[1] = 0.0
if comp1 < comp2: if comp1 < comp2:
# side-chain # side-chain
determinant = [object2, -hbond_value] determinant = [object2, -hbond_value]
object1.determinants['sidechain'].append(determinant) object1.determinants['sidechain'].append(determinant)
determinant = [object1, hbond_value] determinant = [object1, hbond_value]
object2.determinants['sidechain'].append(determinant) object2.determinants['sidechain'].append(determinant)
# Coulomb # Coulomb
determinant = [object2, -coulomb_value] determinant = [object2, -coulomb_value]
@@ -103,7 +116,7 @@ def addIterativeBasePair(object1, object2, interaction):
# side-chain # side-chain
determinant = [object1, -hbond_value] determinant = [object1, -hbond_value]
object2.determinants['sidechain'].append(determinant) object2.determinants['sidechain'].append(determinant)
determinant = [object2, hbond_value] determinant = [object2, hbond_value]
object1.determinants['sidechain'].append(determinant) object1.determinants['sidechain'].append(determinant)
# Coulomb # Coulomb
determinant = [object1, -coulomb_value] determinant = [object1, -coulomb_value]
@@ -111,254 +124,249 @@ def addIterativeBasePair(object1, object2, interaction):
annihilation[1] = -diff annihilation[1] = -diff
def addIterativeIonPair(object1, object2, interaction, version): def add_iterative_ion_pair(object1, object2, interaction, version):
""" """Add the Coulomb 'iterative' interaction (an acid-base pair)
Adding the Coulomb 'iterative' interaction (an acid-base pair):
the pKa of the acid is lowered & the pKa of the base is raised the pKa of the acid is lowered & the pKa of the base is raised
Args:
object1: first object in pair
object2: second object in pair
interaction: list with [values, annihilation]
version: version object
""" """
values = interaction[1] values = interaction[1]
annihilation = interaction[2] annihilation = interaction[2]
hbond_value = values[0] hbond_value = values[0]
coulomb_value = values[1] coulomb_value = values[1]
Q1 = object1.Q q1 = object1.q
Q2 = object2.Q q2 = object2.q
comp1 = object1.pKa_old + annihilation[0] + Q1*coulomb_value comp1 = object1.pka_old + annihilation[0] + q1*coulomb_value
comp2 = object2.pKa_old + annihilation[1] + Q2*coulomb_value comp2 = object2.pka_old + annihilation[1] + q2*coulomb_value
if object1.res_name not in version.parameters.exclude_sidechain_interactions: if (object1.res_name
comp1 += Q1*hbond_value not in version.parameters.exclude_sidechain_interactions):
if object2.res_name not in version.parameters.exclude_sidechain_interactions: comp1 += q1*hbond_value
comp2 += Q2*hbond_value if (object2.res_name
not in version.parameters.exclude_sidechain_interactions):
if Q1 == -1.0 and comp1 < comp2: comp2 += q2*hbond_value
add_term = True # pKa(acid) < pKa(base) if q1 == -1.0 and comp1 < comp2:
elif Q1 == 1.0 and comp1 > comp2: # pKa(acid) < pKa(base)
add_term = True # pKa(base) > pKa(acid) add_term = True
elif q1 == 1.0 and comp1 > comp2:
# pKa(base) > pKa(acid)
add_term = True
else: else:
add_term = False add_term = False
annihilation[0] = 0.00 annihilation[0] = 0.00
annihilation[1] = 0.00 annihilation[1] = 0.00
if add_term:
if add_term == True: # Coulomb
if coulomb_value > UNK_MIN_VALUE:
# Coulomb # residue1
if coulomb_value > 0.005: interaction = [object2, q1*coulomb_value]
# residue1 annihilation[0] += -q1*coulomb_value
interaction = [object2, Q1*coulomb_value] object1.determinants['coulomb'].append(interaction)
annihilation[0] += -Q1*coulomb_value # residue2
object1.determinants['coulomb'].append(interaction) interaction = [object1, q2*coulomb_value]
# residue2 annihilation[1] += -q2*coulomb_value
interaction = [object1, Q2*coulomb_value] object2.determinants['coulomb'].append(interaction)
annihilation[1] += -Q2*coulomb_value # Side-chain
object2.determinants['coulomb'].append(interaction) if hbond_value > UNK_MIN_VALUE:
# residue1
# Side-chain if (object1.res_name
if hbond_value > 0.005: not in version.parameters.exclude_sidechain_interactions):
# residue1 interaction = [object2, q1*hbond_value]
if object1.res_name not in version.parameters.exclude_sidechain_interactions: annihilation[0] += -q1*hbond_value
interaction = [object2, Q1*hbond_value] object1.determinants['sidechain'].append(interaction)
annihilation[0] += -Q1*hbond_value # residue2
object1.determinants['sidechain'].append(interaction) if (object2.res_name
# residue2 not in version.parameters.exclude_sidechain_interactions):
if object2.res_name not in version.parameters.exclude_sidechain_interactions: interaction = [object1, q2*hbond_value]
interaction = [object1, Q2*hbond_value] annihilation[1] += -q2*hbond_value
annihilation[1] += -Q2*hbond_value object2.determinants['sidechain'].append(interaction)
object2.determinants['sidechain'].append(interaction)
def addDeterminants(iterative_interactions, version, options=None): def add_determinants(iterative_interactions, version, _=None):
""" """Add determinants iteratively.
The iterative pKa scheme. Later it is all added in 'calculateTotalPKA' The iterative pKa scheme. Later it is all added in 'calculateTotalPKA'
Args:
iterative_interactions: list of iterative interactions
version: version object
_: options object
""" """
# --- setup --- # --- setup ---
iteratives = [] iteratives = []
done_group = [] done_group = []
# create iterative objects with references to their real group counterparts
# creating iterative objects with references to their real group counterparts
for interaction in iterative_interactions: for interaction in iterative_interactions:
pair = interaction[0] pair = interaction[0]
for group in pair: for group in pair:
if group in done_group: if group in done_group:
#print "done already" # do nothing - already have an iterative object for this group
""" do nothing - already have an iterative object for this group """ pass
else: else:
newIterative = Iterative(group) new_iterative = Iterative(group)
iteratives.append(newIterative) iteratives.append(new_iterative)
done_group.append(group) done_group.append(group)
# Initialize iterative scheme # Initialize iterative scheme
debug("\n --- pKa iterations (%d groups, %d interactions) ---" % debug(
(len(iteratives), len(iterative_interactions))) "\n --- pKa iterations ({0:d} groups, {1:d} interactions) ---".format(
len(iteratives), len(iterative_interactions)))
converged = False converged = False
iteration = 0 iteration = 0
# set non-iterative pka values as first step # set non-iterative pka values as first step
for itres in iteratives: for iter_ in iteratives:
itres.pKa_iter.append(itres.pKa_NonIterative) iter_.pka_iter.append(iter_.pka_noniterative)
# --- starting pKa iterations --- # --- starting pKa iterations ---
while converged == False: while not converged:
# initialize pka_new
iteration += 1
for itres in iteratives:
itres.determinants = {'sidechain': [], 'backbone': [],
'coulomb': []}
itres.pka_new = itres.pka_noniterative
# Adding interactions to temporary determinant container
for interaction in iterative_interactions:
pair = interaction[0]
object1, object2 = find_iterative(pair, iteratives)
q1 = object1.q
q2 = object2.q
if q1 < 0.0 and q2 < 0.0:
# both are acids
add_iterative_acid_pair(object1, object2, interaction)
elif q1 > 0.0 and q2 > 0.0:
# both are bases
add_iterative_base_pair(object1, object2, interaction)
else:
# one of each
add_iterative_ion_pair(object1, object2, interaction, version)
# Calculating pka_new values
for itres in iteratives:
for type_ in ['sidechain', 'backbone', 'coulomb']:
for determinant in itres.determinants[type_]:
itres.pka_new += determinant[1]
# initialize pKa_new # Check convergence
iteration += 1 converged = True
for itres in iteratives: for itres in iteratives:
itres.determinants = {'sidechain':[],'backbone':[],'coulomb':[]} if itres.pka_new == itres.pka_old:
itres.pKa_new = itres.pKa_NonIterative itres.converged = True
else:
itres.converged = False
converged = False
# reset pka_old & storing pka_new in pka_iter
for itres in iteratives:
itres.pka_old = itres.pka_new
itres.pka_iter.append(itres.pka_new)
# Adding interactions to temporary determinant container if iteration == 10:
for interaction in iterative_interactions: info("did not converge in {0:d} iterations".format(iteration))
pair = interaction[0] break
values = interaction[1]
annihilation = interaction[2]
#print "len(interaction) = %d" % (len(interaction))
object1, object2 = findIterative(pair, iteratives)
Q1 = object1.Q
Q2 = object2.Q
if Q1 < 0.0 and Q2 < 0.0:
""" both are acids """
addIterativeAcidPair(object1, object2, interaction)
elif Q1 > 0.0 and Q2 > 0.0:
""" both are bases """
addIterativeBasePair(object1, object2, interaction)
else:
""" one of each """
addIterativeIonPair(object1, object2, interaction, version)
# Calculating pKa_new values
for itres in iteratives:
for type in ['sidechain','backbone','coulomb']:
for determinant in itres.determinants[type]:
itres.pKa_new += determinant[1]
# Check convergence
converged = True
for itres in iteratives:
if itres.pKa_new == itres.pKa_old:
itres.converged = True
else:
itres.converged = False
converged = False
# reset pKa_old & storing pKa_new in pKa_iter
for itres in iteratives:
itres.pKa_old = itres.pKa_new
itres.pKa_iter.append(itres.pKa_new)
if iteration == 10:
info("did not converge in %d iterations" % (iteration))
break
# --- Iterations finished ---
# printing pKa iterations # printing pKa iterations
# formerly was conditioned on if options.verbosity >= 2 - now unnecessary # formerly was conditioned on if options.verbosity >= 2 - now unnecessary
str = "%12s" % (" ") str_ = ' '
for index in range(0, iteration+1 ): for index in range(iteration+1):
str += "%8d" % (index) str_ += "{0:>8d}".format(index)
debug(str) debug(str_)
for itres in iteratives: for itres in iteratives:
str = "%s " % (itres.label) str_ = "{0:s} ".format(itres.label)
for pKa in itres.pKa_iter: for pka in itres.pka_iter:
str += "%8.2lf" % (pKa) str_ += "{0:>8.2f}".format(pka)
if itres.converged == False: if not itres.converged:
str += " *" str_ += " *"
debug(str) debug(str_)
# creating real determinants and adding them to group object # creating real determinants and adding them to group object
for itres in iteratives: for itres in iteratives:
for type in ['sidechain','backbone','coulomb']: for type_ in ['sidechain', 'backbone', 'coulomb']:
for interaction in itres.determinants[type]: for interaction in itres.determinants[type_]:
#info('done',itres.group.label,interaction[0],interaction[1]) #info('done',itres.group.label,interaction[0],interaction[1])
value = interaction[1] value = interaction[1]
if value > 0.005 or value < -0.005: if value > UNK_MIN_VALUE or value < -UNK_MIN_VALUE:
g = interaction[0] group = interaction[0]
newDeterminant = Determinant(g, value) new_det = Determinant(group, value)
itres.group.determinants[type].append(newDeterminant) itres.group.determinants[type_].append(new_det)
def find_iterative(pair, iteratives):
"""Find the 'iteratives' that correspond to the groups in 'pair'.
def findIterative(pair, iteratives): Args:
""" pair: groups to match
Function to find the two 'iteratives' that corresponds to the groups in 'pair' iteratives: list of iteratives to search
Returns:
1. first matched iterative
2. second matched iterative
""" """
for iterative in iteratives: for iterative in iteratives:
if iterative.group == pair[0]: if iterative.group == pair[0]:
iterative0 = iterative iterative0 = iterative
elif iterative.group == pair[1]: elif iterative.group == pair[1]:
iterative1 = iterative iterative1 = iterative
return iterative0, iterative1 return iterative0, iterative1
class Iterative: class Iterative:
""" """Iterative class - pKa values and references of iterative groups.
Iterative class - pKa values and references of iterative groups
Note, this class has a fake determinant list, true determinants are NOTE - this class has a fake determinant list, true determinants are made
made after the iterations are finished. after the iterations are finished.
""" """
def __init__(self, group): def __init__(self, group):
""" """Initialize object with group.
Contructer of the iterative object
"""
#print "creating 'iterative object' for %s" % (group.label) Args:
group: group to use for initialization.
self.label = group.label """
self.atom = group.atom self.label = group.label
self.res_name = group.residue_type self.atom = group.atom
self.Q = group.charge self.res_name = group.residue_type
self.pKa_old = None self.q = group.charge
self.pKa_new = None self.pka_old = None
self.pKa_iter = [] self.pka_new = None
self.pKa_NonIterative = 0.00 self.pka_iter = []
self.determinants = {'sidechain':[],'backbone':[],'coulomb':[]} self.pka_noniterative = 0.00
self.determinants = {'sidechain': [], 'backbone': [], 'coulomb': []}
self.group = group self.group = group
self.converged = True self.converged = True
# Calculate the Non-Iterative part of pKa from the group object # Calculate the Non-Iterative part of pKa from the group object
# Side chain # Side chain
side_chain = 0.00 side_chain = 0.00
for determinant in group.determinants['sidechain']: for determinant in group.determinants['sidechain']:
value = determinant.value value = determinant.value
side_chain += value side_chain += value
# Back bone # Back bone
back_bone = 0.00 back_bone = 0.00
for determinant in group.determinants['backbone']: for determinant in group.determinants['backbone']:
value = determinant.value value = determinant.value
back_bone += value back_bone += value
# Coulomb # Coulomb
coulomb = 0.00 coulomb = 0.00
for determinant in group.determinants['coulomb']: for determinant in group.determinants['coulomb']:
value = determinant.value value = determinant.value
coulomb += value coulomb += value
self.pka_noniterative = group.model_pka
self.pKa_NonIterative = group.model_pka self.pka_noniterative += group.energy_volume
self.pKa_NonIterative += group.Emass self.pka_noniterative += group.energy_local
self.pKa_NonIterative += group.Elocl self.pka_noniterative += side_chain
self.pKa_NonIterative += side_chain self.pka_noniterative += back_bone
self.pKa_NonIterative += back_bone self.pka_noniterative += coulomb
self.pKa_NonIterative += coulomb self.pka_old = self.pka_noniterative
self.pKa_old = self.pKa_NonIterative
def __eq__(self, other): def __eq__(self, other):
""" """Needed to use objects in sets."""
Check if two groups should be considered identical
"""
if self.atom.type == 'atom': if self.atom.type == 'atom':
# In case of protein atoms we trust the labels # In case of protein atoms we trust the labels
return self.label==other.label return self.label == other.label
else: else:
# For heterogene atoms we also need to check the residue number # For heterogene atoms we also need to check the residue number
return self.label==other.label and self.atom.res_num == other.atom.res_num return (
self.label == other.label
and self.atom.res_num == other.atom.res_num)
def __hash__(self): def __hash__(self):
""" Needed together with __eq__ - otherwise we can't make sets of groups """ """Needed to use objects in sets."""
return id(self) return id(self)

View File

@@ -1,112 +1,142 @@
from __future__ import division """Implements many of the main functions used to call PROPKA."""
from __future__ import print_function
import sys import sys
import pkg_resources
import logging import logging
import argparse import argparse
import pkg_resources
logger = logging.getLogger("propka") _LOGGER = logging.getLogger("propka")
stdout_handler = logging.StreamHandler(sys.stdout) _STDOUT_HANDLER = logging.StreamHandler(sys.stdout)
stdout_handler.setFormatter(logging.Formatter("%(message)s")) _STDOUT_HANDLER.setFormatter(logging.Formatter("%(message)s"))
logger.addHandler(stdout_handler) _LOGGER.addHandler(_STDOUT_HANDLER)
#
# file I/O
#
def open_file_for_reading(filename):
"""Open file or file-like stream *filename* for reading.
*filename* may be a string and then it is opened but if it is a def open_file_for_reading(input_file):
file-like object (such as an open :class:`file` or """Open file or file-like stream for reading.
:class:`StringIO.StringIO` --- really anything with ``next()``,
``read()``, ``readlines()``, ``readline``, ``close`` methods) then TODO - convert this to a context manager
the object is just passed through (the stream is attempted to be
reset to the beginning with ``fseek(0)``). Args:
input_file: path to file or file-like object. If file-like object,
then will attempt fseek(0).
""" """
if (hasattr(filename, 'next') or hasattr(filename, '__next__')) \ try:
and hasattr(filename, 'read') \ input_file.fseek(0)
and hasattr(filename, 'readline') and hasattr(filename, 'readlines') \ return input_file
and hasattr(filename, 'close'): except AttributeError:
# already a stream pass
try:
filename.fseek(0)
except AttributeError:
pass
return filename
try: try:
f = open(filename,'r') file_ = open(input_file, 'rt')
except: except:
raise IOError('Cannot find file %s' %filename) raise IOError('Cannot find file {0:s}'.format(input_file))
return f return file_
def open_file_for_writing(filename):
"""Open file or file-like stream for writing""" def open_file_for_writing(input_file):
if hasattr(filename, 'write') and hasattr(filename, 'writeline') and hasattr(filename, 'writelines') \ """Open file or file-like stream for writing.
and hasattr(filename, 'close'):
# already a stream TODO - convert this to a context manager.
try:
mode = filename.mode Args:
except AttributeError: input_file: path to file or file-like object. If file-like object,
mode = "w" then will attempt to get file mode.
"""
try:
mode = input_file.mode
if not ("w" in mode or "a" in mode or "+" in mode): if not ("w" in mode or "a" in mode or "+" in mode):
raise IOError("File/stream not open for writing") raise IOError("File/stream not open for writing")
return filename return input_file
except AttributeError:
pass
try: try:
f = open(filename,'w') file_ = open(input_file, 'wt')
except: except FileNotFoundError:
raise Exception('Could not open %s'%filename) raise Exception('Could not open {0:s}'.format(input_file))
return f return file_
#
# bookkeeping etc.
#
def conformation_sorter(conf): def conformation_sorter(conf):
"""TODO - figure out what this function does."""
model = int(conf[:-1]) model = int(conf[:-1])
altloc = conf[-1:] altloc = conf[-1:]
return model*100+ord(altloc) return model*100+ord(altloc)
def split_atoms_into_molecules(atoms): def split_atoms_into_molecules(atoms):
"""Maps atoms into molecules.
Args:
atoms: list of atoms
Returns:
list of molecules
"""
molecules = [] molecules = []
while len(atoms) > 0:
while len(atoms)>0:
initial_atom = atoms.pop() initial_atom = atoms.pop()
molecules.append( make_molecule(initial_atom,atoms)) molecules.append(make_molecule(initial_atom, atoms))
return molecules return molecules
def make_molecule(atom, atoms): def make_molecule(atom, atoms):
"""Make a molecule from atoms.
Args:
atom: one of the atoms
atoms: a list of the remaining atoms
Return:
list of atoms
"""
bonded_atoms = [a for a in atoms if atom in a.bonded_atoms] bonded_atoms = [a for a in atoms if atom in a.bonded_atoms]
res_atoms = [atom,] res_atoms = [atom,]
for bond_atom in bonded_atoms:
for ba in bonded_atoms: if bond_atom in atoms:
if ba in atoms: atoms.remove(bond_atom)
atoms.remove(ba) res_atoms.extend(make_molecule(bond_atom, atoms))
res_atoms.extend(make_molecule(ba, atoms))
return res_atoms return res_atoms
def make_grid(min,max,step): def make_grid(min_, max_, step):
x = min """Make a grid across the specified tange.
while x <= max:
TODO - figure out if this duplicates existing generators like `range` or
numpy function.
Args:
min_: minimum value of grid
max_: maximum value of grid
step: grid step size
"""
x = min_
while x <= max_:
yield x yield x
x += step x += step
return
def generate_combinations(interactions): def generate_combinations(interactions):
"""Generate combinations of interactions.
Args:
interactions: list of interactions
Returns:
list of combinations
"""
res = [[]] res = [[]]
for interaction in interactions: for interaction in interactions:
res = make_combination(res, interaction) res = make_combination(res, interaction)
res.remove([]) res.remove([])
return res return res
def make_combination(combis, interaction): def make_combination(combis, interaction):
"""Make a specific set of combinations.
Args:
combis: list of combinations
interaction: interaction to add to combinations
Returns:
list of combinations
"""
res = [] res = []
for combi in combis: for combi in combis:
res.append(combi+[interaction]) res.append(combi+[interaction])
@@ -115,15 +145,20 @@ def make_combination(combis, interaction):
def parse_res_string(res_str): def parse_res_string(res_str):
""" """Parse a residue string.
Parse the residue string, in format "chain:resnum[inscode]", and return
a tuple of (chain, resnum, inscode). Raises ValueError if the input Args:
string is invalid. res_string: residue string in format "chain:resnum[inscode]"
Returns:
a tuple of (chain, resnum, inscode).
Raises:
ValueError if the input string is invalid.
""" """
try: try:
chain, resnum_str = res_str.split(":") chain, resnum_str = res_str.split(":")
except ValueError: except ValueError:
raise ValueError("Invalid residue string (must contain 2 colon-separated values)") raise ValueError("Invalid residue string (must contain 2 "
"colon-separated values)")
try: try:
resnum = int(resnum_str) resnum = int(resnum_str)
except ValueError: except ValueError:
@@ -142,8 +177,8 @@ def build_parser(parser=None):
"""Build an argument parser for PROPKA. """Build an argument parser for PROPKA.
Args: Args:
parser_: existing parser. If this is not None, then the PROPKA parser will parser: existing parser. If this is not None, then the PROPKA parser will
be created as a subparser to this existing parser. Otherwise, a be created as a subparser to this existing parser. Otherwise, a
new parser will be created. new parser will be created.
Returns: Returns:
ArgumentParser object. ArgumentParser object.
@@ -151,74 +186,96 @@ def build_parser(parser=None):
if parser is not None: if parser is not None:
group = parser.add_argument_group(title="PROPKA invoation options") group = parser.add_argument_group(title="PROPKA invoation options")
else: else:
parser = argparse.ArgumentParser(description=("PROPKA predicts the pKa values of ionizable " parser = argparse.ArgumentParser(
"groups in proteins and protein-ligand " description=("PROPKA predicts the pKa values of ionizable "
"complexes based in the 3D structure"), "groups in proteins and protein-ligand "
formatter_class=argparse.ArgumentDefaultsHelpFormatter) "complexes based in the 3D structure"),
formatter_class=argparse.ArgumentDefaultsHelpFormatter)
# This is duck-typing at its finest # This is duck-typing at its finest
group = parser group = parser
group.add_argument("input_pdb", help="read data from <filename>") group.add_argument("input_pdb", help="read data from <filename>")
group.add_argument(
group.add_argument("-f", "--file", action="append", dest="filenames", default=[], "-f", "--file", action="append", dest="filenames", default=[],
help="read data from <filename>, i.e. <filename> is added to arguments") help="read data from <filename>, i.e. <filename> is added to arguments")
group.add_argument("-r", "--reference", dest="reference", default="neutral", group.add_argument(
help=("setting which reference to use for stability calculations " "-r", "--reference", dest="reference", default="neutral",
"[neutral/low-pH]")) help=("setting which reference to use for stability calculations "
group.add_argument("-c", "--chain", action="append", dest="chains", "[neutral/low-pH]"))
help=('creating the protein with only a specified chain. Specify ' group.add_argument(
'" " for chains without ID [all]')) "-c", "--chain", action="append", dest="chains",
group.add_argument("-i", "--titrate_only", dest="titrate_only", help=('creating the protein with only a specified chain. Specify '
help=('Treat only the specified residues as titratable. Value should ' '" " for chains without ID [all]'))
'be a comma-separated list of "chain:resnum" values; for example: ' group.add_argument(
'-i "A:10,A:11"')) "-i", "--titrate_only", dest="titrate_only",
group.add_argument("-t", "--thermophile", action="append", dest="thermophiles", help=('Treat only the specified residues as titratable. Value should '
help=("defining a thermophile filename; usually used in " 'be a comma-separated list of "chain:resnum" values; for example: '
"'alignment-mutations'")) '-i "A:10,A:11"'))
group.add_argument("-a", "--alignment", action="append", dest="alignment", group.add_argument(
help=("alignment file connecting <filename> and <thermophile> " "-t", "--thermophile", action="append", dest="thermophiles",
"[<thermophile>.pir]")) help=("defining a thermophile filename; usually used in "
group.add_argument("-m", "--mutation", action="append", dest="mutations", "'alignment-mutations'"))
help=("specifying mutation labels which is used to modify " group.add_argument(
"<filename> according to, e.g. N25R/N181D")) "-a", "--alignment", action="append", dest="alignment",
group.add_argument("-v", "--version", dest="version_label", default="Jan15", help=("alignment file connecting <filename> and <thermophile> "
help="specifying the sub-version of propka [Jan15/Dec19]") "[<thermophile>.pir]"))
group.add_argument("-p", "--parameters", dest="parameters", group.add_argument(
default=pkg_resources.resource_filename(__name__, "propka.cfg"), "-m", "--mutation", action="append", dest="mutations",
help="set the parameter file [%(default)s]") help=("specifying mutation labels which is used to modify "
"<filename> according to, e.g. N25R/N181D"))
group.add_argument(
"-v", "--version", dest="version_label", default="Jan15",
help="specifying the sub-version of propka [Jan15/Dec19]")
group.add_argument(
"-p", "--parameters", dest="parameters",
default=pkg_resources.resource_filename(__name__, "propka.cfg"),
help="set the parameter file [{default:s}]")
try: try:
group.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], group.add_argument(
help="logging level verbosity", default="INFO") "--log-level",
choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"],
help="logging level verbosity", default="INFO")
except argparse.ArgumentError: except argparse.ArgumentError:
# It is possible that --log-level has already been set by APBS # It is possible that --log-level has already been set by APBS
pass pass
group.add_argument("-o", "--pH", dest="pH", type=float, default=7.0, group.add_argument(
help="setting pH-value used in e.g. stability calculations [7.0]") "-o", "--pH", dest="pH", type=float, default=7.0,
group.add_argument("-w", "--window", dest="window", nargs=3, type=float, help="setting pH-value used in e.g. stability calculations [7.0]")
default=(0.0, 14.0, 1.0), group.add_argument(
help=("setting the pH-window to show e.g. stability profiles " "-w", "--window", dest="window", nargs=3, type=float,
"[0.0, 14.0, 1.0]")) default=(0.0, 14.0, 1.0),
group.add_argument("-g", "--grid", dest="grid", nargs=3, type=float, help=("setting the pH-window to show e.g. stability profiles "
default=(0.0, 14.0, 0.1), "[0.0, 14.0, 1.0]"))
help=("setting the pH-grid to calculate e.g. stability " group.add_argument(
"related properties [0.0, 14.0, 0.1]")) "-g", "--grid", dest="grid", nargs=3, type=float,
group.add_argument("--mutator", dest="mutator", default=(0.0, 14.0, 0.1),
help="setting approach for mutating <filename> [alignment/scwrl/jackal]") help=("setting the pH-grid to calculate e.g. stability "
group.add_argument("--mutator-option", dest="mutator_options", action="append", "related properties [0.0, 14.0, 0.1]"))
help="setting property for mutator [e.g. type=\"side-chain\"]") group.add_argument(
group.add_argument("-d", "--display-coupled-residues", dest="display_coupled_residues", "--mutator", dest="mutator",
action="store_true", help=("Displays alternative pKa values due " help="setting approach for mutating <filename> [alignment/scwrl/jackal]")
"to coupling of titratable groups")) group.add_argument(
group.add_argument("-l", "--reuse-ligand-mol2-files", dest="reuse_ligand_mol2_file", "--mutator-option", dest="mutator_options", action="append",
action="store_true", default=False, help="setting property for mutator [e.g. type=\"side-chain\"]")
help=("Reuses the ligand mol2 files allowing the user to alter " group.add_argument(
"ligand bond orders")) "-d", "--display-coupled-residues", dest="display_coupled_residues",
group.add_argument("-k", "--keep-protons", dest="keep_protons", action="store_true", action="store_true",
help="Keep protons in input file", default=False) help=("Displays alternative pKa values due "
group.add_argument("-q", "--quiet", action="store_const", const="WARNING", "to coupling of titratable groups"))
dest="log_level", help="suppress non-warning messages") group.add_argument(
group.add_argument("--protonate-all", dest="protonate_all", action="store_true", "-l", "--reuse-ligand-mol2-files", dest="reuse_ligand_mol2_file",
help="Protonate all atoms (will not influence pKa calculation)", action="store_true", default=False,
default=False) help=("Reuses the ligand mol2 files allowing the user to alter "
"ligand bond orders"))
group.add_argument(
"-k", "--keep-protons", dest="keep_protons", action="store_true",
help="Keep protons in input file", default=False)
group.add_argument(
"-q", "--quiet", action="store_const", const="WARNING",
dest="log_level", help="suppress non-warning messages")
group.add_argument(
"--protonate-all", dest="protonate_all", action="store_true",
help="Protonate all atoms (will not influence pKa calculation)",
default=False)
return parser return parser
@@ -234,13 +291,11 @@ def loadOptions(args=None):
""" """
# loading the parser # loading the parser
parser = build_parser() parser = build_parser()
# parsing and returning options and arguments # parsing and returning options and arguments
options = parser.parse_args(args) options = parser.parse_args(args)
# adding specified filenames to arguments # adding specified filenames to arguments
options.filenames.append(options.input_pdb) options.filenames.append(options.input_pdb)
# Convert titrate_only string to a list of (chain, resnum) items: # Convert titrate_only string to a list of (chain, resnum) items:
if options.titrate_only is not None: if options.titrate_only is not None:
res_list = [] res_list = []
@@ -248,76 +303,109 @@ def loadOptions(args=None):
try: try:
chain, resnum, inscode = parse_res_string(res_str) chain, resnum, inscode = parse_res_string(res_str)
except ValueError: except ValueError:
logger.critical('Invalid residue string: "%s"' % res_str) _LOGGER.critical(
'Invalid residue string: "{0:s}"'.format(res_str))
sys.exit(1) sys.exit(1)
res_list.append((chain, resnum, inscode)) res_list.append((chain, resnum, inscode))
options.titrate_only = res_list options.titrate_only = res_list
# Set the no-print variable # Set the no-print variable
level = getattr(logging, options.log_level) level = getattr(logging, options.log_level)
logger.setLevel(level) _LOGGER.setLevel(level)
# done! # done!
return options return options
def makeTidyAtomLabel(name,element): def make_tidy_atom_label(name, element):
""" """Returns a 'tidier' atom label for printing to the new PDB file.
Returns a 'tidier' atom label for printing the new pdbfile
"""
if len(name)>4:# if longer than 4, just truncate the name Args:
label=name[0:4] name: atom name
elif len(name)==4:# if lenght is 4, otherwise use the name as it is element: atom element
Returns:
string
"""
if len(name) > 4: # if longer than 4, just truncate the name
label = name[0:4]
elif len(name) == 4: # if length is 4, otherwise use the name as it is
label = name label = name
else: # if less than 4 characters long, insert white space as needed else: # if less than 4 characters long, insert white space as needed
if len(element)==1: if len(element) == 1:
label = ' %-3s'%name label = ' {0:<3s}'.format(name)
else: # The element shoul occupy the two first chars else: # The element should occupy the two first chars
label = '%-4s'%name label = '{0:<4s}'.format(name)
return label return label
def get_sorted_configurations(configuration_keys): def get_sorted_configurations(configuration_keys):
""" """Extract and sort configurations.
extract and sort configurations
Args:
configuration_keys: list of configuration keys
Returns:
list of configurations
""" """
configurations = list(configuration_keys) configurations = list(configuration_keys)
configurations.sort(key=configuration_compare) configurations.sort(key=configuration_compare)
return configurations return configurations
def configuration_compare(conf): def configuration_compare(conf):
"""TODO - figure out what this function does."""
return 100*int(conf[1:-2]) + ord(conf[-1]) return 100*int(conf[1:-2]) + ord(conf[-1])
def write_file(filename, lines):
"""Writes a new file.
Args:
def writeFile(filename, lines): filename: name of file
lines: lines to write to file
""" """
Writes a new file file_ = open_file_for_writing(filename)
"""
f = open_file_for_writing(filename)
for line in lines: for line in lines:
f.write( "%s\n" % (line) ) file_.write("{0:s}\n".format(line))
f.close() file_.close()
def _args_to_str(arg_list): def _args_to_str(arg_list):
"""Summarize list of arguments in string.
Args:
arg_list: list of arguments
Returns:
string
"""
return " ".join(map(str, arg_list)) return " ".join(map(str, arg_list))
def info(*args): def info(*args):
"""Log a message. Level defaults to INFO unless overridden.""" """Log a message to info.
logger.info(_args_to_str(args))
Level defaults to INFO unless overridden.
Args:
args: argument list
"""
_LOGGER.info(_args_to_str(args))
def debug(*args): def debug(*args):
"""Log a message on the DEBUG level.""" """Log a message to debug.
logger.debug(_args_to_str(args))
Level defaults to DEBUG unless overridden.
Args:
args: argument list
"""
_LOGGER.debug(_args_to_str(args))
def warning(*args): def warning(*args):
"""Log a WARN message""" """Log a message to warning.
logger.warning(_args_to_str(args))
Level defaults to WARNING unless overridden.
Args:
args: argument list
"""
_LOGGER.warning(_args_to_str(args))

View File

@@ -1,16 +1,9 @@
#!/usr/bin/python """Ligand classes and functions."""
from propka.calculations import squared_distance
from __future__ import division from propka.vector_algebra import Vector
from __future__ import print_function
import sys
import propka.calculations
from propka.vector_algebra import *
from propka.lib import info, warning
all_sybyl_types = [ ALL_SYBYL_TYPES = [
'C.3', # carbon sp3 'C.3', # carbon sp3
'H', # hydrogen 'H', # hydrogen
'C.2', # carbon sp2 'C.2', # carbon sp2
@@ -66,344 +59,277 @@ all_sybyl_types = [
'Sn'] # tin 'Sn'] # tin
#propka_input_types = ['1P','1N','2P','2N'] PROPKA_INPUT_TYPES = ['1P', '1N', '2P', '2N', 'C3', 'H', 'C2', 'Hsp', 'C1',
#for type in all_sybyl_types: 'Ht3', 'Car', 'LP', 'Cca', 'Du', 'N3', 'DuC', 'N2',
# temp = type.replace('.','') 'Any', 'N1', 'Hal', 'Nar', 'Het', 'Nam', 'Hev', 'Npl',
# if len(temp)>3: 'Li', 'N4', 'Na', 'O3', 'Mg', 'O2', 'Al', 'Oco', 'Si',
# temp = temp[0:3] 'Osp', 'K', 'Ot3', 'Ca', 'S3', 'Crt', 'S2', 'Cro', 'SO',
# propka_input_types.append(temp) 'Mn', 'SO2', 'Fe', 'P3', 'Coo', 'F', 'Cu', 'Cl', 'Zn',
# 'Br', 'Se', 'I', 'Mo', 'Sn']
#for t in propka_input_types:
# print (t)
propka_input_types = [
'1P',
'1N',
'2P',
'2N',
'C3',
'H',
'C2',
'Hsp',
'C1',
'Ht3',
'Car',
'LP',
'Cca',
'Du',
'N3',
'DuC',
'N2',
'Any',
'N1',
'Hal',
'Nar',
'Het',
'Nam',
'Hev',
'Npl',
'Li',
'N4',
'Na',
'O3',
'Mg',
'O2',
'Al',
'Oco',
'Si',
'Osp',
'K',
'Ot3',
'Ca',
'S3',
'Crt',
'S2',
'Cro',
'SO',
'Mn',
'SO2',
'Fe',
'P3',
'Coo',
'F',
'Cu',
'Cl',
'Zn',
'Br',
'Se',
'I',
'Mo',
'Sn']
max_C_double_bond = 1.3
max_C_triple_bond = 1.2
max_C_double_bond_squared = max_C_double_bond*max_C_double_bond
max_C_triple_bond_squared = max_C_triple_bond*max_C_triple_bond
MAX_C_DOUBLE_BOND = 1.3
MAX_C_TRIPLE_BOND = 1.2
MAX_C_DOUBLE_BOND_SQUARED = MAX_C_DOUBLE_BOND*MAX_C_DOUBLE_BOND
MAX_C_TRIPLE_BOND_SQUARED = MAX_C_TRIPLE_BOND*MAX_C_TRIPLE_BOND
PLANARITY_MARGIN = 0.20
def assign_sybyl_type(atom): def assign_sybyl_type(atom):
"""Assign Sybyl type to atom.
Args:
atom: atom to assign
"""
# check if we already have assigned a name to this atom # check if we already have assigned a name to this atom
if atom.sybyl_assigned: if atom.sybyl_assigned:
#info(atom.name,'already assigned')
return return
# find some properties of the atom # find some properties of the atom
ring_atoms = is_ring_member(atom) ring_atoms = is_ring_member(atom)
aromatic = is_aromatic_ring(ring_atoms) aromatic = is_aromatic_ring(ring_atoms)
planar = is_planar(atom) planar = is_planar(atom)
bonded_elements = {} bonded_elements = {}
for i in range(len(atom.bonded_atoms)): for i, bonded_atom in enumerate(atom.bonded_atoms):
bonded_elements[i]=atom.bonded_atoms[i].element bonded_elements[i] = bonded_atom.element
# Aromatic carbon/nitrogen # Aromatic carbon/nitrogen
if aromatic: if aromatic:
for ra in ring_atoms: for ring_atom in ring_atoms:
if ra.element in ['C','N']: if ring_atom.element in ['C', 'N']:
set_type(ra, ra.element+'.ar') set_type(ring_atom, ring_atom.element+'.ar')
return return
# check for amide # check for amide
if atom.element in ['O','N','C']: if atom.element in ['O', 'N', 'C']:
O=None o_atom = None
N=None n_atom = None
C=None c_atom = None
# oxygen, nitrogen # oxygen, nitrogen
if atom.element in ['O','N']: if atom.element in ['O', 'N']:
for b in atom.get_bonded_elements('C'): for bonded_elem in atom.get_bonded_elements('C'):
for bb in b.bonded_atoms: for bonded_atom in bonded_elem.bonded_atoms:
if (bb.element =='N' and atom.element == 'O'): if (bonded_atom.element == 'N' and atom.element == 'O'):
O=atom o_atom = atom
C=b c_atom = bonded_elem
N=bb n_atom = bonded_atom
elif (bb.element =='O' and atom.element == 'N'): elif (bonded_atom.element == 'O' and atom.element == 'N'):
N=atom n_atom = atom
C=b c_atom = bonded_elem
O=bb o_atom = bonded_atom
# carbon # carbon
if atom.element == 'C': if atom.element == 'C':
nitrogens = atom.get_bonded_elements('N') nitrogens = atom.get_bonded_elements('N')
oxygens = atom.get_bonded_elements('O') oxygens = atom.get_bonded_elements('O')
if len(nitrogens)==1 and len(oxygens)==1: if len(nitrogens) == 1 and len(oxygens) == 1:
C = atom c_atom = atom
N = nitrogens[0] n_atom = nitrogens[0]
O = oxygens[0] o_atom = oxygens[0]
if c_atom and n_atom and o_atom:
# make sure that the Nitrogen is not aromatic and that it has two
if C and N and O: # heavy atom bonds
# make sure that the Nitrogen is not aromatic and that it has two heavy atom bonds if (not is_aromatic_ring(is_ring_member(n_atom))
if not is_aromatic_ring(is_ring_member(N)) and len(N.get_bonded_heavy_atoms())==2: and len(n_atom.get_bonded_heavy_atoms()) == 2):
set_type(N,'N.am') set_type(n_atom, 'N.am')
set_type(C,'C.2') set_type(c_atom, 'C.2')
set_type(O,'O.2') set_type(o_atom, 'O.2')
return return
if atom.element == 'C':
if atom.element=='C':
# check for carboxyl # check for carboxyl
if len(atom.bonded_atoms)==3 and list(bonded_elements.values()).count('O')==2: if (len(atom.bonded_atoms) == 3
i1 = list(bonded_elements.values()).index('O') and list(bonded_elements.values()).count('O') == 2):
i2 = list(bonded_elements.values()).index('O',i1+1) index1 = list(bonded_elements.values()).index('O')
if len(atom.bonded_atoms[i1].bonded_atoms)==1 and len(atom.bonded_atoms[i2].bonded_atoms)==1: index2 = list(bonded_elements.values()).index('O', index1+1)
set_type(atom.bonded_atoms[i1],'O.co2-') if (len(atom.bonded_atoms[index1].bonded_atoms) == 1
set_type(atom.bonded_atoms[i2],'O.co2') and len(atom.bonded_atoms[index2].bonded_atoms) == 1):
set_type(atom,'C.2') set_type(atom.bonded_atoms[index1], 'O.co2-')
set_type(atom.bonded_atoms[index2], 'O.co2')
set_type(atom, 'C.2')
return return
# sp carbon # sp carbon
if len(atom.bonded_atoms)<=2: if len(atom.bonded_atoms) <= 2:
for b in atom.bonded_atoms: for bonded_atom in atom.bonded_atoms:
if propka.calculations.squared_distance(atom, b) < max_C_triple_bond_squared: if (squared_distance(atom, bonded_atom)
set_type(atom,'C.1') < MAX_C_TRIPLE_BOND_SQUARED):
set_type(b,b.element+'.1') set_type(atom, 'C.1')
set_type(bonded_atom, bonded_atom.element + '.1')
if atom.sybyl_assigned: if atom.sybyl_assigned:
return return
# sp2 carbon # sp2 carbon
if planar: if planar:
set_type(atom,'C.2') set_type(atom, 'C.2')
# check for N.pl3 # check for N.pl3
for b in atom.bonded_atoms: for bonded_atom in atom.bonded_atoms:
if b.element=='N': if bonded_atom.element == 'N':
if len(b.bonded_atoms)<3 or is_planar(b): if (len(bonded_atom.bonded_atoms) < 3
set_type(b,'N.pl3') or is_planar(bonded_atom)):
set_type(bonded_atom, 'N.pl3')
return return
# sp3 carbon # sp3 carbon
set_type(atom, 'C.3') set_type(atom, 'C.3')
return return
# Nitrogen # Nitrogen
if atom.element == 'N': if atom.element == 'N':
# check for planar N # check for planar N
if len(atom.bonded_atoms)==1: if len(atom.bonded_atoms) == 1:
if is_planar(atom.bonded_atoms[0]): if is_planar(atom.bonded_atoms[0]):
set_type(atom,'N.pl3') set_type(atom, 'N.pl3')
return return
if planar: if planar:
set_type(atom,'N.pl3') set_type(atom, 'N.pl3')
return return
set_type(atom, 'N.3')
set_type(atom,'N.3')
return return
# Oxygen # Oxygen
if atom.element == 'O': if atom.element == 'O':
set_type(atom,'O.3') set_type(atom, 'O.3')
if len(atom.bonded_atoms) == 1: if len(atom.bonded_atoms) == 1:
# check for carboxyl # check for carboxyl
if atom.bonded_atoms[0].element == 'C': if atom.bonded_atoms[0].element == 'C':
the_carbon = atom.bonded_atoms[0] the_carbon = atom.bonded_atoms[0]
if len(the_carbon.bonded_atoms)==3 and the_carbon.count_bonded_elements('O')==2: if (len(the_carbon.bonded_atoms) == 3
[O1,O2] = the_carbon.get_bonded_elements('O') and the_carbon.count_bonded_elements('O') == 2):
[oxy1, oxy2] = the_carbon.get_bonded_elements('O')
if len(O1.bonded_atoms)==1 and len(O2.bonded_atoms)==1: if (len(oxy1.bonded_atoms) == 1
set_type(O1,'O.co2-') and len(oxy2.bonded_atoms) == 1):
set_type(O2,'O.co2') set_type(oxy1, 'O.co2-')
set_type(the_carbon,'C.2') set_type(oxy2, 'O.co2')
set_type(the_carbon, 'C.2')
return return
# check for X=O # check for X=O
if propka.calculations.squared_distance(atom, atom.bonded_atoms[0]) < max_C_double_bond_squared: if (squared_distance(atom, atom.bonded_atoms[0])
set_type(atom,'O.2') < MAX_C_DOUBLE_BOND_SQUARED):
if atom.bonded_atoms[0].element=='C': set_type(atom, 'O.2')
set_type(atom.bonded_atoms[0],'C.2') if atom.bonded_atoms[0].element == 'C':
set_type(atom.bonded_atoms[0], 'C.2')
return return
# Sulphur # Sulphur
if atom.element == 'S': if atom.element == 'S':
# check for SO2 # check for SO2
if list(bonded_elements.values()).count('O')==2: if list(bonded_elements.values()).count('O') == 2:
i1 = list(bonded_elements.values()).index('O') index1 = list(bonded_elements.values()).index('O')
i2 = list(bonded_elements.values()).index('O',i1+1) index2 = list(bonded_elements.values()).index('O', index1+1)
set_type(atom.bonded_atoms[i1],'O.2') set_type(atom.bonded_atoms[index1], 'O.2')
set_type(atom.bonded_atoms[i2],'O.2') set_type(atom.bonded_atoms[index2], 'O.2')
set_type(atom,'S.o2') set_type(atom, 'S.o2')
return return
# check for SO4 # check for SO4
if list(bonded_elements.values()).count('O')==4: if list(bonded_elements.values()).count('O') == 4:
no_O2 = 0 no_o2 = 0
for i in range(len(atom.bonded_atoms)): for i in range(len(atom.bonded_atoms)):
if len(atom.bonded_atoms[i].bonded_atoms)==1 and no_O2<2: if len(atom.bonded_atoms[i].bonded_atoms) == 1 and no_o2 < 2:
set_type(atom.bonded_atoms[i],'O.2') set_type(atom.bonded_atoms[i], 'O.2')
no_O2+=1 no_o2 += 1
else: else:
set_type(atom.bonded_atoms[i],'O.3') set_type(atom.bonded_atoms[i], 'O.3')
set_type(atom, 'S.3')
set_type(atom,'S.3')
return return
# Phosphorus # Phosphorus
if atom.element == 'P': if atom.element == 'P':
set_type(atom,'P.3') set_type(atom, 'P.3')
# check for phosphate group # check for phosphate group
bonded_oxygens = atom.get_bonded_elements('O') bonded_oxygens = atom.get_bonded_elements('O')
for o in bonded_oxygens: set_type(o,'O.3') for o_atom in bonded_oxygens:
# if len(bonded_oxygens)>=3: set_type(o_atom, 'O.3')
# # find oxygens only bonded to current phosphorus
# bonded_oxygens_1 = [o for o in bonded_oxygens if len(o.get_bonded_heavy_atoms())==1]
# # find the closest oxygen ...
# closest_oxygen = min(bonded_oxygens_1,
# key= lambda o:propka.calculations.squared_distance(atom,o))
# # ... and set it to O.2
# set_type(closest_oxygen,'O.2')
return return
element = atom.element.capitalize() element = atom.element.capitalize()
set_type(atom,element) set_type(atom, element)
# info('Using element as type for %s'%atom.element)
return
def is_ring_member(atom): def is_ring_member(atom):
return identify_ring(atom,atom,0,[]) """Determine if atom is a member of a ring.
Args:
atom: atom to test
Returns:
list of atoms
"""
return identify_ring(atom, atom, 0, [])
def identify_ring(this_atom, original_atom, number, past_atoms): def identify_ring(this_atom, original_atom, number, past_atoms):
number+=1 """Identify the atoms in a ring
past_atoms=past_atoms+[this_atom]
Args:
this_atom: atom to test
original_atom: some other atom
number: number of atoms
past_atoms: atoms that have already been found
Returns:
list of atoms
"""
number += 1
past_atoms = past_atoms + [this_atom]
return_atoms = [] return_atoms = []
if number > 10: if number > 10:
return return_atoms return return_atoms
for atom in this_atom.get_bonded_heavy_atoms(): for atom in this_atom.get_bonded_heavy_atoms():
if atom == original_atom and number>2: if atom == original_atom and number > 2:
return past_atoms return past_atoms
if atom not in past_atoms: if atom not in past_atoms:
these_return_atoms = identify_ring(atom, original_atom, number, past_atoms) these_return_atoms = identify_ring(atom, original_atom, number,
past_atoms)
if len(these_return_atoms) > 0: if len(these_return_atoms) > 0:
if len(return_atoms)>len(these_return_atoms) or len(return_atoms)==0: if (len(return_atoms) > len(these_return_atoms)
or len(return_atoms) == 0):
return_atoms = these_return_atoms return_atoms = these_return_atoms
return return_atoms return return_atoms
def set_type(atom, type_):
"""Set atom type..
Args:
def set_type(atom,type): atom: atom to set
#info(atom, '->',type) type_: type value to set
atom.sybyl_type = type """
atom.sybyl_assigned=True atom.sybyl_type = type_
return atom.sybyl_assigned = True
def is_planar(atom): def is_planar(atom):
""" Finds out if atom forms a plane together with its bonded atoms""" """Finds out if atom forms a plane together with its bonded atoms.
atoms = [atom]+atom.bonded_atoms
Args:
atom: atom to test
Returns:
Boolean
"""
atoms = [atom] + atom.bonded_atoms
return are_atoms_planar(atoms) return are_atoms_planar(atoms)
def are_atoms_planar(atoms): def are_atoms_planar(atoms):
if len(atoms)==0: """Test whether a group of atoms are planar.
return False
if len(atoms)<4:
return False
v1 = vector(atom1=atoms[0], atom2=atoms[1])
v2 = vector(atom1=atoms[0], atom2=atoms[2])
n = (v1**v2).rescale(1.0)
margin = 0.20 Args:
for b in atoms[3:]: atoms: list of atoms
v = vector(atom1=atoms[0], atom2=b).rescale(1.0) Returns:
#info(atoms[0],abs(v*n) ) Boolean
if abs(v*n)>margin: """
if len(atoms) == 0:
return False
if len(atoms) < 4:
return False
vec1 = Vector(atom1=atoms[0], atom2=atoms[1])
vec2 = Vector(atom1=atoms[0], atom2=atoms[2])
norm = (vec1**vec2).rescale(1.0)
margin = PLANARITY_MARGIN
for atom in atoms[3:]:
vec = Vector(atom1=atoms[0], atom2=atom).rescale(1.0)
if abs(vec*norm) > margin:
return False return False
return True return True
def is_aromatic_ring(atoms):
if len(atoms)<5:
return False
def is_aromatic_ring(atoms):
"""Determine whether group of atoms form aromatic ring.
Args:
atoms: list of atoms to test
Returns:
Boolean
"""
if len(atoms) < 5:
return False
for i in range(len(atoms)): for i in range(len(atoms)):
if not are_atoms_planar(atoms[i:]+atoms[:i]): if not are_atoms_planar(atoms[i:]+atoms[:i]):
return False return False
return True return True

View File

@@ -1,15 +1,25 @@
#!/usr/bin/env python """Ligand pKa values"""
import os
from __future__ import division import subprocess
from __future__ import print_function import sys
import propka.molecular_container
import propka.calculations
import propka.parameters
import propka.pdb
import propka.lib
from propka.lib import info, warning from propka.lib import info, warning
import propka.molecular_container, propka.calculations, propka.calculations, propka.parameters, propka.pdb, propka.lib, os, subprocess, sys
class ligand_pka_values: class LigandPkaValues:
"""Ligand pKa value class."""
def __init__(self, parameters): def __init__(self, parameters):
self.parameters = parameters """Initialize object with parameters.
Args:
parameters: parameters
"""
self.parameters = parameters
# attempt to find Marvin executables in the path # attempt to find Marvin executables in the path
self.molconvert = self.find_in_path('molconvert') self.molconvert = self.find_in_path('molconvert')
self.cxcalc = self.find_in_path('cxcalc') self.cxcalc = self.find_in_path('cxcalc')
@@ -17,106 +27,173 @@ class ligand_pka_values:
info(self.cxcalc) info(self.cxcalc)
info(self.molconvert) info(self.molconvert)
return @staticmethod
def find_in_path(program):
"""Find a program in the system path.
Args:
def find_in_path(self, program): program: program to find
Returns:
location of program
"""
path = os.environ.get('PATH').split(os.pathsep) path = os.environ.get('PATH').split(os.pathsep)
locs = [
l = [i for i in filter(lambda loc: os.access(loc, os.F_OK), i for i in filter(lambda loc: os.access(loc, os.F_OK),
map(lambda dir: os.path.join(dir, program),path))] map(lambda dir: os.path.join(dir, program),
path))]
if len(l) == 0: if len(locs) == 0:
info('Error: Could not find %s. Please make sure that it is found in the path.' % program) str_ = "'Error: Could not find {0:s}.".format(program)
str_ += ' Please make sure that it is found in the path.'
info(str_)
sys.exit(-1) sys.exit(-1)
return locs[0]
return l[0] def get_marvin_pkas_for_pdb_file(self, pdbfile, num_pkas=10, min_ph=-10,
max_ph=20):
"""Use Marvin executables to get pKas for a PDB file.
def get_marvin_pkas_for_pdb_file(self, file, no_pkas=10, min_pH =-10, max_pH=20): Args:
molecule = propka.molecular_container.Molecular_container(file) pdbfile: PDB file
self.get_marvin_pkas_for_molecular_container(molecule, no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) num_pkas: number of pKas to get
return min_ph: minimum pH value
max_ph: maximum pH value
"""
molecule = propka.molecular_container.Molecular_container(pdbfile)
self.get_marvin_pkas_for_molecular_container(
molecule, num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph)
def get_marvin_pkas_for_molecular_container(self, molecule, no_pkas=10, min_pH =-10, max_pH=20): def get_marvin_pkas_for_molecular_container(self, molecule, num_pkas=10,
min_ph=-10, max_ph=20):
"""Use Marvin executables to calculate pKas for a molecular container.
Args:
molecule: molecular container
num_pkas: number of pKas to calculate
min_ph: minimum pH value
max_ph: maximum pH value
"""
for name in molecule.conformation_names: for name in molecule.conformation_names:
filename = '%s_%s'%(molecule.name,name) filename = '{0:s}_{1:s}'.format(molecule.name, name)
self.get_marvin_pkas_for_conformation_container(molecule.conformations[name], name=filename, reuse=molecule.options.reuse_ligand_mol2_file, self.get_marvin_pkas_for_conformation_container(
no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) molecule.conformations[name], name=filename,
reuse=molecule.options.reuse_ligand_mol2_file,
num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph)
return def get_marvin_pkas_for_conformation_container(self, conformation,
name='temp', reuse=False,
num_pkas=10, min_ph=-10,
max_ph=20):
"""Use Marvin executables to calculate pKas for a conformation container.
def get_marvin_pkas_for_conformation_container(self, conformation, name = 'temp', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): Args:
conformation: conformation container
name: filename
reuse: flag to reuse the structure files
num_pkas: number of pKas to calculate
min_ph: minimum pH value
max_ph: maximum pH value
"""
conformation.marvin_pkas_calculated = True conformation.marvin_pkas_calculated = True
self.get_marvin_pkas_for_atoms(conformation.get_heavy_ligand_atoms(), name=name, reuse=reuse, self.get_marvin_pkas_for_atoms(
no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) conformation.get_heavy_ligand_atoms(), name=name, reuse=reuse,
num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph)
return def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False,
num_pkas=10, min_ph=-10, max_ph=20):
"""Use Marvin executables to calculate pKas for a list of atoms.
def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): Args:
atoms: list of atoms
name: filename
reuse: flag to reuse the structure files
num_pkas: number of pKas to calculate
min_ph: minimum pH value
max_ph: maximum pH value
"""
# do one molecule at the time so we don't confuse marvin # do one molecule at the time so we don't confuse marvin
molecules = propka.lib.split_atoms_into_molecules(atoms) molecules = propka.lib.split_atoms_into_molecules(atoms)
for i in range(len(molecules)): for i, molecule in enumerate(molecules):
filename = '%s_%d.mol2'%(name, i+1) filename = '{0:s}_{1:d}.mol2'.format(name, i+1)
self.get_marvin_pkas_for_molecule(molecules[i], filename=filename, reuse=reuse, no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) self.get_marvin_pkas_for_molecule(
molecule, filename=filename, reuse=reuse, num_pkas=num_pkas,
min_ph=min_ph, max_ph=max_ph)
return def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2',
reuse=False, num_pkas=10, min_ph=-10,
max_ph=20):
"""Use Marvin executables to calculate pKas for a molecule.
Args:
def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): molecule: the molecule
name: filename
reuse: flag to reuse the structure files
num_pkas: number of pKas to calculate
min_ph: minimum pH value
max_ph: maximum pH value
"""
# print out structure unless we are using user-modified structure # print out structure unless we are using user-modified structure
if not reuse: if not reuse:
propka.pdb.write_mol2_for_atoms(atoms, filename) propka.pdb.write_mol2_for_atoms(atoms, filename)
# check that we actually have a file to work with # check that we actually have a file to work with
if not os.path.isfile(filename): if not os.path.isfile(filename):
warning('Didn\'t find a user-modified file \'%s\' - generating one' % filename) errstr = (
"Didn't find a user-modified file '{0:s}' "
"- generating one".format(
filename))
warning(errstr)
propka.pdb.write_mol2_for_atoms(atoms, filename) propka.pdb.write_mol2_for_atoms(atoms, filename)
# Marvin calculate pKa values
fmt = (
'pka -a {num1} -b {num2} --min {min_ph} '
# Marvin '--max {max_ph} -d large')
# calculate pKa values options = (
options = 'pka -a %d -b %d --min %f --max %f -d large'%(no_pkas, no_pkas, min_pH, max_pH) fmt.format(
(output,errors) = subprocess.Popen([self.cxcalc, filename]+options.split(), num1=num_pkas, num2=num_pkas, min_ph=min_ph, max_ph=max_ph))
stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() (output, errors) = subprocess.Popen(
[self.cxcalc, filename]+options.split(), stdout=subprocess.PIPE,
if len(errors)>0: stderr=subprocess.PIPE).communicate()
info('********************************************************************************************************') if len(errors) > 0:
info('* Warning: Marvin execution failed: *') info('***********************************************************'
info('* %-100s *' % errors) '*********************************************')
info('* *') info('* Warning: Marvin execution failed: '
info('* Please edit the ligand mol2 file and re-run PropKa with the -l option: %29s *' % filename) ' *')
info('********************************************************************************************************') info('* {0:<100s} *'.format(errors))
info('* '
' *')
info('* Please edit the ligand mol2 file and re-run PropKa with '
'the -l option: {0:>29s} *'.format(filename))
info('***********************************************************'
'*********************************************')
sys.exit(-1) sys.exit(-1)
# extract calculated pkas # extract calculated pkas
indices,pkas,types = self.extract_pkas(output) indices, pkas, types = self.extract_pkas(output)
# store calculated pka values # store calculated pka values
for i in range(len(indices)): for i, index in enumerate(indices):
atoms[indices[i]].marvin_pka = pkas[i] atoms[index].marvin_pka = pkas[i]
atoms[indices[i]].charge = {'a':-1,'b':+1}[types[i]] atoms[index].charge = {'a': -1, 'b': 1}[types[i]]
info('%s model pKa: %.2f' % (atoms[indices[i]], pkas[i])) info('{0:s} model pKa: {1:<.2f}'.format(atoms[index], pkas[i]))
return @staticmethod
def extract_pkas(output):
"""Extract pKa value from output.
def extract_pkas(self, output): Args:
output: output string to parse
Returns:
1. Indices
2. Values
3. Types
"""
# split output # split output
[tags, values,empty_line] = output.decode().split('\n') [tags, values, _] = output.decode().split('\n')
#info(tags)
#info(values)
tags = tags.split('\t') tags = tags.split('\t')
values = values.split('\t') values = values.split('\t')
# format values # format values
types = [tags[i][0] for i in range(1,len(tags)-1) if len(values)>i and values[i] != ''] types = [
indices = [int(a)-1 for a in values[-1].split(',') if a !=''] tags[i][0] for i in range(1, len(tags)-1)
values = [float(v.replace(',','.')) for v in values[1:-1] if v != ''] if len(values) > i and values[i] != '']
indices = [int(a)-1 for a in values[-1].split(',') if a != '']
values = [float(v.replace(',', '.')) for v in values[1:-1] if v != '']
if len(indices) != len(values) != len(types): if len(indices) != len(values) != len(types):
raise Exception('Lengths of atoms and pka values mismatch') raise Exception('Lengths of atoms and pka values mismatch')
return indices, values, types return indices, values, types

View File

@@ -1,145 +1,148 @@
#!/usr/bin/python """Molecular container for storing all contents of PDB files."""
# import os
# Molecular container for storing all contents of pdb files import sys
# import propka.pdb
# import propka.version
from __future__ import division import propka.output
from __future__ import print_function import propka.group
import propka.lib
import os, sys from propka.conformation_container import ConformationContainer
import propka.pdb, propka.version, propka.output, propka.conformation_container, propka.group, propka.lib
from propka.lib import info, warning from propka.lib import info, warning
class Molecular_container:
def __init__(self, input_file, options=None):
# printing out header before parsing input
propka.output.printHeader()
# TODO - these are constants whose origins are a little murky
UNK_PI_CUTOFF = 0.01
# Maximum number of iterations for finding PI
MAX_ITERATION = 4
class Molecular_container:
"""Container for storing molecular contents of PDB files.
TODO - this class name does not conform to PEP8 but has external use.
We should deprecate and change eventually.
"""
def __init__(self, input_file, options=None):
"""Initialize molecular container.
Args:
input_file: molecular input file
options: options object
"""
# printing out header before parsing input
propka.output.print_header()
# set up some values # set up some values
self.options = options self.options = options
self.input_file = input_file self.input_file = input_file
# TODO - replace this indelicate os.path code with pathlib
self.dir = os.path.split(input_file)[0] self.dir = os.path.split(input_file)[0]
self.file = os.path.split(input_file)[1] self.file = os.path.split(input_file)[1]
self.name = self.file[0:self.file.rfind('.')] self.name = self.file[0:self.file.rfind('.')]
input_file_extension = input_file[input_file.rfind('.'):] input_file_extension = input_file[input_file.rfind('.'):]
# set the version # set the version
if options: if options:
parameters = propka.parameters.Parameters(self.options.parameters) parameters = propka.parameters.Parameters(self.options.parameters)
else: else:
parameters = propka.parameters.Parameters('propka.cfg') parameters = propka.parameters.Parameters('propka.cfg')
try: try:
exec('self.version = propka.version.%s(parameters)'%parameters.version) version_class = getattr(propka.version, parameters.version)
except: self.version = version_class(parameters)
raise Exception('Error: Version %s does not exist'%parameters.version) except AttributeError as err:
print(err)
errstr = 'Error: Version {0:s} does not exist'.format(
parameters.version)
raise Exception(errstr)
# read the input file # read the input file
if input_file_extension[0:4] == '.pdb': if input_file_extension[0:4] == '.pdb':
# input is a pdb file # input is a pdb file. read in atoms and top up containers to make
# read in atoms and top up containers to make sure that all atoms are present in all conformations # sure that all atoms are present in all conformations
[self.conformations, self.conformation_names] = propka.pdb.read_pdb(input_file, self.version.parameters,self) [self.conformations, self.conformation_names] = (
if len(self.conformations)==0: propka.pdb.read_pdb(input_file, self.version.parameters, self))
info('Error: The pdb file does not seems to contain any molecular conformations') if len(self.conformations) == 0:
info('Error: The pdb file does not seems to contain any '
'molecular conformations')
sys.exit(-1) sys.exit(-1)
self.top_up_conformations() self.top_up_conformations()
# make a structure precheck # make a structure precheck
propka.pdb.protein_precheck(self.conformations, self.conformation_names) propka.pdb.protein_precheck(self.conformations,
self.conformation_names)
# set up atom bonding and protonation # set up atom bonding and protonation
self.version.setup_bonding_and_protonation(self) self.version.setup_bonding_and_protonation(self)
# Extract groups # Extract groups
self.extract_groups() self.extract_groups()
# sort atoms # sort atoms
for name in self.conformation_names: for name in self.conformation_names:
self.conformations[name].sort_atoms() self.conformations[name].sort_atoms()
# find coupled groups # find coupled groups
self.find_covalently_coupled_groups() self.find_covalently_coupled_groups()
# write out the input file # write out the input file
filename = self.file.replace(input_file_extension,'.propka_input') filename = self.file.replace(input_file_extension, '.propka_input')
propka.pdb.write_input(self, filename) propka.pdb.write_input(self, filename)
elif input_file_extension == '.propka_input': elif input_file_extension == '.propka_input':
#input is a propka_input file #input is a propka_input file
[self.conformations, self.conformation_names] = propka.pdb.read_input(input_file, self.version.parameters, self) [self.conformations, self.conformation_names] = (
propka.pdb.read_input(input_file, self.version.parameters,
# Extract groups - this merely sets up the groups found in the input file self))
# Extract groups - this merely sets up the groups found in the
# input file
self.extract_groups() self.extract_groups()
# do some additional set up # do some additional set up
self.additional_setup_when_reading_input_file() self.additional_setup_when_reading_input_file()
else: else:
info('Unrecognized input file:%s' % input_file) info('Unrecognized input file:{0:s}'.format(input_file))
sys.exit(-1) sys.exit(-1)
return
def top_up_conformations(self): def top_up_conformations(self):
""" Makes sure that all atoms are present in all conformations """ """Makes sure that all atoms are present in all conformations."""
for name in self.conformation_names: for name in self.conformation_names:
if name!='1A' and (len(self.conformations[name]) < len(self.conformations['1A'])): if (name != '1A' and (len(self.conformations[name])
< len(self.conformations['1A']))):
self.conformations[name].top_up(self.conformations['1A']) self.conformations[name].top_up(self.conformations['1A'])
return
def find_covalently_coupled_groups(self): def find_covalently_coupled_groups(self):
"""Find covalently coupled groups."""
info('-' * 103) info('-' * 103)
for name in self.conformation_names: for name in self.conformation_names:
self.conformations[name].find_covalently_coupled_groups() self.conformations[name].find_covalently_coupled_groups()
return
def find_non_covalently_coupled_groups(self): def find_non_covalently_coupled_groups(self):
"""Find non-covalently coupled groups."""
info('-' * 103) info('-' * 103)
verbose = self.options.display_coupled_residues
for name in self.conformation_names: for name in self.conformation_names:
self.conformations[name].find_non_covalently_coupled_groups(verbose=self.options.display_coupled_residues) self.conformations[name].find_non_covalently_coupled_groups(
verbose=verbose)
return
def extract_groups(self): def extract_groups(self):
""" Identify the groups needed for pKa calculation """ """Identify the groups needed for pKa calculation."""
for name in self.conformation_names: for name in self.conformation_names:
self.conformations[name].extract_groups() self.conformations[name].extract_groups()
return
def additional_setup_when_reading_input_file(self): def additional_setup_when_reading_input_file(self):
"""Additional setup."""
for name in self.conformation_names: for name in self.conformation_names:
self.conformations[name].additional_setup_when_reading_input_file() self.conformations[name].additional_setup_when_reading_input_file()
return
def calculate_pka(self): def calculate_pka(self):
"""Calculate pKa values."""
# calculate for each conformation # calculate for each conformation
for name in self.conformation_names: for name in self.conformation_names:
self.conformations[name].calculate_pka(self.version, self.options) self.conformations[name].calculate_pka(
self.version, self.options)
# find non-covalently coupled groups # find non-covalently coupled groups
self.find_non_covalently_coupled_groups() self.find_non_covalently_coupled_groups()
# find the average of the conformations # find the average of the conformations
self.average_of_conformations() self.average_of_conformations()
# print out the conformation-average results # print out the conformation-average results
propka.output.printResult(self, 'AVR', self.version.parameters) propka.output.print_result(self, 'AVR', self.version.parameters)
return
def average_of_conformations(self): def average_of_conformations(self):
"""Generate an average of conformations."""
parameters = self.conformations[self.conformation_names[0]].parameters
# make a new configuration to hold the average values # make a new configuration to hold the average values
avr_conformation = propka.conformation_container.Conformation_container(name='average', avr_conformation = ConformationContainer(
parameters=self.conformations[self.conformation_names[0]].parameters, name='average', parameters=parameters, molecular_container=self)
molecular_container=self)
container = self.conformations[self.conformation_names[0]] container = self.conformations[self.conformation_names[0]]
for group in container.get_groups_for_calculations(): for group in container.get_groups_for_calculations():
# new group to hold average values # new group to hold average values
@@ -150,104 +153,135 @@ class Molecular_container:
if group_to_add: if group_to_add:
avr_group += group_to_add avr_group += group_to_add
else: else:
warning('Group %s could not be found in conformation %s.' % (group.atom.residue_label, name)) str_ = (
'Group {0:s} could not be found in '
'conformation {0:s}.'.format(
group.atom.residue_label, name))
warning(str_)
# ... and store the average value # ... and store the average value
avr_group = avr_group / len(self.conformation_names) avr_group = avr_group / len(self.conformation_names)
avr_conformation.groups.append(avr_group) avr_conformation.groups.append(avr_group)
# store information on coupling in the average container # store information on coupling in the average container
if len(list(filter(lambda c: c.non_covalently_coupled_groups, self.conformations.values()))): if len(list(filter(lambda c: c.non_covalently_coupled_groups,
self.conformations.values()))):
avr_conformation.non_covalently_coupled_groups = True avr_conformation.non_covalently_coupled_groups = True
# store chain info # store chain info
avr_conformation.chains = self.conformations[self.conformation_names[0]].chains avr_conformation.chains = self.conformations[
self.conformation_names[0]].chains
self.conformations['AVR'] = avr_conformation self.conformations['AVR'] = avr_conformation
return
def write_pka(self, filename=None, reference="neutral", direction="folding", options=None): def write_pka(self, filename=None, reference="neutral",
#for name in self.conformation_names: direction="folding", options=None):
# propka.output.writePKA(self, self.version.parameters, filename='%s_3.1_%s.pka'%(self.name, name), """Write pKa information to a file.
# conformation=name,reference=reference,
# direction=direction, options=options)
Args:
filename: file to write to
reference: reference state
direction: folding vs. unfolding
options: options object
"""
# write out the average conformation # write out the average conformation
filename=os.path.join('%s.pka'%(self.name)) filename = os.path.join('{0:s}.pka'.format(self.name))
# if the display_coupled_residues option is true, write the results out
# if the display_coupled_residues option is true, # to an alternative pka file
# write the results out to an alternative pka file
if self.options.display_coupled_residues: if self.options.display_coupled_residues:
filename=os.path.join('%s_alt_state.pka'%(self.name)) filename = os.path.join('{0:s}_alt_state.pka'.format(self.name))
if (hasattr(self.version.parameters, 'output_file_tag')
and len(self.version.parameters.output_file_tag) > 0):
filename = os.path.join(
'{0:s}_{1:s}.pka'.format(
self.name, self.version.parameters.output_file_tag))
propka.output.write_pka(
self, self.version.parameters, filename=filename,
conformation='AVR', reference=reference)
if hasattr(self.version.parameters, 'output_file_tag') and len(self.version.parameters.output_file_tag)>0: def get_folding_profile(self, conformation='AVR', reference="neutral",
filename=os.path.join('%s_%s.pka'%(self.name,self.version.parameters.output_file_tag)) grid=[0., 14., 0.1]):
"""Get a folding profile.
propka.output.writePKA(self, self.version.parameters, filename=filename, Args:
conformation='AVR',reference=reference, conformation: conformation to select
direction=direction, options=options) reference: reference state
direction: folding direction (folding)
return grid: the grid of pH values [min, max, step_size]
options: options object
def getFoldingProfile(self, conformation='AVR',reference="neutral", direction="folding", grid=[0., 14., 0.1], options=None): Returns:
TODO - figure out what these are
1. profile
2. opt
3. range_80pct
4. stability_range
"""
# calculate stability profile # calculate stability profile
profile = [] profile = []
for ph in propka.lib.make_grid(*grid): for ph in propka.lib.make_grid(*grid):
ddg = self.conformations[conformation].calculate_folding_energy( pH=ph, reference=reference) conf = self.conformations[conformation]
#info(ph,ddg) ddg = conf.calculate_folding_energy(ph=ph, reference=reference)
profile.append([ph, ddg]) profile.append([ph, ddg])
# find optimum # find optimum
opt =[None, 1e6] opt = [None, 1e6]
for point in profile: for point in profile:
opt = min(opt, point, key=lambda v:v[1]) opt = min(opt, point, key=lambda v: v[1])
# find values within 80 % of optimum # find values within 80 % of optimum
range_80pct = [None, None] range_80pct = [None, None]
values_within_80pct = [p[0] for p in profile if p[1]< 0.8*opt[1]] values_within_80pct = [p[0] for p in profile if p[1] < 0.8*opt[1]]
if len(values_within_80pct)>0: if len(values_within_80pct) > 0:
range_80pct = [min(values_within_80pct), max(values_within_80pct)] range_80pct = [min(values_within_80pct), max(values_within_80pct)]
# find stability range # find stability range
stability_range = [None, None] stability_range = [None, None]
stable_values = [p[0] for p in profile if p[1]< 0.0] stable_values = [p[0] for p in profile if p[1] < 0.0]
if len(stable_values) > 0:
if len(stable_values)>0:
stability_range = [min(stable_values), max(stable_values)] stability_range = [min(stable_values), max(stable_values)]
return profile, opt, range_80pct, stability_range return profile, opt, range_80pct, stability_range
def get_charge_profile(self, conformation='AVR', grid=[0., 14., .1]):
"""Get charge profile for conformation as function of pH.
def getChargeProfile(self, conformation='AVR', grid=[0., 14., .1]): Args:
conformation: conformation to test
grid: grid of pH values [min, max, step]
Returns:
list of charge state values
"""
charge_profile = [] charge_profile = []
for ph in propka.lib.make_grid(*grid): for ph in propka.lib.make_grid(*grid):
q_unfolded, q_folded = self.conformations[conformation].calculate_charge(self.version.parameters, pH=ph) conf = self.conformations[conformation]
q_unfolded, q_folded = conf.calculate_charge(
self.version.parameters, ph=ph)
charge_profile.append([ph, q_unfolded, q_folded]) charge_profile.append([ph, q_unfolded, q_folded])
return charge_profile return charge_profile
def getPI(self, conformation='AVR', grid=[0., 14., 1], iteration=0): def get_pi(self, conformation='AVR', grid=[0., 14., 1], iteration=0):
#info('staring',grid, iteration) """Get the isoelectric points for folded and unfolded states.
# search
charge_profile = self.getChargeProfile(conformation=conformation, grid=grid)
pi = []
pi_folded = pi_unfolded = [None, 1e6,1e6]
for point in charge_profile:
pi_folded = min(pi_folded, point, key=lambda v:abs(v[2]))
pi_unfolded = min(pi_unfolded, point, key=lambda v:abs(v[1]))
# If results are not good enough, do it again with a higher sampling resolution Args:
pi_folded_value = pi_folded[0] conformation: conformation to test
grid: grid of pH values [min, max, step]
iteration: iteration number of process
Returns:
1. Folded state PI
2. Unfolded state PI
"""
charge_profile = self.get_charge_profile(
conformation=conformation, grid=grid)
pi_folded = pi_unfolded = [None, 1e6, 1e6]
for point in charge_profile:
pi_folded = min(pi_folded, point, key=lambda v: abs(v[2]))
pi_unfolded = min(pi_unfolded, point, key=lambda v: abs(v[1]))
# If results are not good enough, do it again with a higher sampling
# resolution
pi_folded_value = pi_folded[0]
pi_unfolded_value = pi_unfolded[0] pi_unfolded_value = pi_unfolded[0]
step = grid[2] step = grid[2]
if (pi_folded[2] > 0.01 or pi_unfolded[1] > 0.01) and iteration<4: # TODO - need to warn if maximum number of iterations is exceeded
pi_folded_value, x = self.getPI(conformation=conformation, grid=[pi_folded[0]-step, pi_folded[0]+step, step/10.0], iteration=iteration+1) if ((pi_folded[2] > UNK_PI_CUTOFF
x, pi_unfolded_value = self.getPI(conformation=conformation, grid=[pi_unfolded[0]-step, pi_unfolded[0]+step, step/10.0], iteration=iteration+1) or pi_unfolded[1] > UNK_PI_CUTOFF) and iteration < MAX_ITERATION):
pi_folded_value, _ = self.get_pi(
conformation=conformation,
grid=[pi_folded[0]-step, pi_folded[0]+step, step/10.0],
iteration=iteration+1)
_, pi_unfolded_value = self.get_pi(
conformation=conformation,
grid=[pi_unfolded[0]-step, pi_unfolded[0]+step, step/10.0],
iteration=iteration+1)
return pi_folded_value, pi_unfolded_value return pi_folded_value, pi_unfolded_value
if __name__ == '__main__':
input_file = sys.argv[1]
mc = Molecular_container(input_file)

View File

@@ -1,397 +1,463 @@
"""Output routines."""
from __future__ import division from datetime import date
from __future__ import print_function from propka.lib import info
import sys
import propka.lib
from propka.lib import info, warning
def printHeader(): def print_header():
"""Print header section of output."""
str_ = "{0:s}\n".format(get_propka_header())
str_ += "{0:s}\n".format(get_references_header())
str_ += "{0:s}\n".format(get_warning_header())
info(str_)
def write_pdb(protein, pdbfile=None, filename=None, include_hydrogens=False,
_=None):
"""Write a residue to the new PDB file.
Args:
protein: protein object
pdbfile: PDB file
filename: file to write to
include_hydrogens: Boolean indicating whether to include hydrogens
options: options object
""" """
prints the header section if pdbfile is None:
""" # opening file if not given
str = "%s\n" % ( getPropkaHeader() ) if filename is None:
str += "%s\n" % ( getReferencesHeader() ) filename = "{0:s}.pdb".format(protein.name)
str += "%s\n" % ( getWarningHeader() ) # TODO - this would be better as a context manager
pdbfile = open(filename, 'w')
info(str) info("writing pdbfile {0:s}".format(filename))
close_file = True
def writePDB(protein, file=None, filename=None, include_hydrogens=False, options=None):
"""
Write the residue to the new pdbfile
"""
if file == None:
# opening file if not given
if filename == None:
filename = "%s.pdb" % (protein.name)
file = open(filename, 'w')
info("writing pdbfile %s" % (filename))
close_file = True
else: else:
# don't close the file, it was opened in a different place # don't close the file, it was opened in a different place
close_file = False close_file = False
numb = 0 numb = 0
for chain in protein.chains: for chain in protein.chains:
for residue in chain.residues: for residue in chain.residues:
if residue.res_name not in ["N+ ", "C- "]: if residue.res_name not in ["N+ ", "C- "]:
for atom in residue.atoms: for atom in residue.atoms:
if include_hydrogens == False and atom.name[0] == "H": if (not include_hydrogens) and atom.name[0] == "H":
""" don't print """ # don't print
else: pass
numb += 1 else:
line = atom.make_pdb_line2(numb=numb) numb += 1
line += "\n" line = atom.make_pdb_line2(numb=numb)
file.write(line) line += "\n"
pdbfile.write(line)
if close_file == True: if close_file:
file.close() pdbfile.close()
def writePKA(protein, parameters, filename=None, conformation ='1A',reference="neutral", direction="folding", verbose=False, options=None): def write_pka(protein, parameters, filename=None, conformation='1A',
""" reference="neutral", _="folding", verbose=False,
Write the pka-file based on the given protein __=None):
"""Write the pKa-file based on the given protein.
Args:
protein: protein object
filename: output file name
conformation: TODO - figure this out
reference: reference state
_: "folding" or other
verbose: Boolean flag for verbosity
__: options object
""" """
# TODO - the code immediately overrides the verbose argument; why?
verbose = True verbose = True
if filename == None: if filename is None:
filename = "%s.pka" % (protein.name) filename = "{0:s}.pka".format(protein.name)
file = open(filename, 'w') # TODO - this would be much better with a context manager
if verbose == True: file_ = open(filename, 'w')
info("Writing %s" % (filename)) if verbose:
info("Writing {0:s}".format(filename))
# writing propka header # writing propka header
str = "%s\n" % ( getPropkaHeader() ) str_ = "{0:s}\n".format(get_propka_header())
str += "%s\n" % ( getReferencesHeader() ) str_ += "{0:s}\n".format(get_references_header())
str += "%s\n" % ( getWarningHeader() ) str_ += "{0:s}\n".format(get_warning_header())
# writing pKa determinant section # writing pKa determinant section
str += getDeterminantSection(protein,conformation, parameters) str_ += get_determinant_section(protein, conformation, parameters)
# writing pKa summary section # writing pKa summary section
str += getSummarySection(protein,conformation,parameters) str_ += get_summary_section(protein, conformation, parameters)
str += "%s\n" % ( getTheLine() ) str_ += "{0:s}\n".format(get_the_line())
# printing Folding Profile # printing Folding Profile
str += getFoldingProfileSection(protein, conformation=conformation, reference=reference, direction=direction, window=[0., 14., 1.0], options=options) str_ += get_folding_profile_section(
protein, conformation=conformation, reference=reference,
window=[0., 14., 1.0])
# printing Protein Charge Profile # printing Protein Charge Profile
str += getChargeProfileSection(protein, conformation=conformation) str_ += get_charge_profile_section(protein, conformation=conformation)
# now, writing the pka text to file # now, writing the pka text to file
file.write(str) file_.write(str_)
file_.close()
file.close()
def printTmProfile(protein, reference="neutral", window=[0., 14., 1.], Tm=[0.,0.], Tms=None, ref=None, verbose=False, options=None): def print_tm_profile(protein, reference="neutral", window=[0., 14., 1.],
__=[0., 0.], tms=None, ref=None, _=False,
options=None):
"""Print Tm profile.
I think Tm refers to the denaturation temperature.
Args:
protein: protein object
reference: reference state
window: pH window [min, max, step]
__: temperature range [min, max]
tms: TODO - figure this out
ref: TODO - figure this out (probably reference state?)
_: Boolean for verbosity
options: options object
""" """
prints Tm profile profile = protein.getTmProfile(
""" reference=reference, grid=[0., 14., 0.1], tms=tms, ref=ref,
profile = protein.getTmProfile(reference=reference, grid=[0., 14., 0.1], Tms=Tms, ref=ref, options=options) options=options)
if profile == None: if profile is None:
str = "Could not determine Tm-profile\n" str_ = "Could not determine Tm-profile\n"
else: else:
str = " suggested Tm-profile for %s\n" % (protein.name) str_ = " suggested Tm-profile for {0:s}\n".format(protein.name)
for (pH, Tm) in profile: for (ph, tm_) in profile:
if pH >= window[0] and pH <= window[1] and (pH%window[2] < 0.01 or pH%window[2] > 0.99*window[2]): if (ph >= window[0] and ph <= window[1]
str += "%6.2lf%10.2lf\n" % (pH, Tm) and (ph % window[2] < 0.01
info(str) or ph % window[2] > 0.99*window[2])):
str_ += "{0:>6.2f}{1:>10.2f}\n".format(ph, tm_)
info(str_)
def printResult(protein, conformation, parameters): def print_result(protein, conformation, parameters):
"""Prints all resulting output from determinants and down.
Args:
protein: protein object
conformation: specific conformation
parameters: parameters
""" """
prints all resulting output from determinants and down print_pka_section(protein, conformation, parameters)
"""
printPKASection(protein, conformation, parameters)
def printPKASection(protein, conformation, parameters): def print_pka_section(protein, conformation, parameters):
""" """Prints out pKa section of results.
prints out the pka-section of the result
Args:
protein: protein object
conformation: specific conformation
parameters: parameters
""" """
# geting the determinants section # geting the determinants section
str = getDeterminantSection(protein, conformation, parameters) str_ = get_determinant_section(protein, conformation, parameters)
info(str) info(str_)
str_ = get_summary_section(protein, conformation, parameters)
str = getSummarySection(protein,conformation,parameters) info(str_)
info(str)
def getDeterminantSection(protein, conformation, parameters): def get_determinant_section(protein, conformation, parameters):
""" """Returns string with determinant section of results.
prints out the pka-section of the result
Args:
protein: protein object
conformation: specific conformation
parameters: parameters
Returns:
string
""" """
# getting the same order as in propka2.0 # getting the same order as in propka2.0
str = "%s\n" % ( getDeterminantsHeader() ) str_ = "{0:s}\n".format(get_determinants_header())
# printing determinants # printing determinants
for chain in protein.conformations[conformation].chains: for chain in protein.conformations[conformation].chains:
for residue_type in parameters.write_out_order: for residue_type in parameters.write_out_order:
groups = [g for g in protein.conformations[conformation].groups if g.atom.chain_id == chain] groups = [
g for g in protein.conformations[conformation].groups
if g.atom.chain_id == chain]
for group in groups: for group in groups:
if group.residue_type == residue_type: if group.residue_type == residue_type:
str += "%s" % ( group.getDeterminantString(parameters.remove_penalised_group) ) str_ += "{0:s}".format(
group.get_determinant_string(
parameters.remove_penalised_group))
# Add a warning in case of coupled residues # Add a warning in case of coupled residues
if protein.conformations[conformation].non_covalently_coupled_groups and not protein.options.display_coupled_residues: if (protein.conformations[conformation].non_covalently_coupled_groups
str += 'Coupled residues (marked *) were detected. Please rerun PropKa with the --display-coupled-residues \nor -d option for detailed information.\n' and not protein.options.display_coupled_residues):
str_ += 'Coupled residues (marked *) were detected.'
return str str_ += 'Please rerun PropKa with the --display-coupled-residues \n'
str_ += 'or -d option for detailed information.\n'
return str_
def getSummarySection(protein, conformation, parameters): def get_summary_section(protein, conformation, parameters):
"""Returns string with summary section of the results.
Args:
protein: protein object
conformation: specific conformation
parameters: parameters
Returns:
string
""" """
prints out the pka-section of the result str_ = "{0:s}\n".format(get_summary_header())
"""
str = "%s\n" % ( getSummaryHeader() )
# printing pKa summary # printing pKa summary
for residue_type in parameters.write_out_order: for residue_type in parameters.write_out_order:
for group in protein.conformations[conformation].groups: for group in protein.conformations[conformation].groups:
if group.residue_type == residue_type: if group.residue_type == residue_type:
str += "%s" % ( group.getSummaryString(parameters.remove_penalised_group) ) str_ += "{0:s}".format(
group.get_summary_string(
return str parameters.remove_penalised_group))
return str_
def getFoldingProfileSection(protein, conformation='AVR', direction="folding", reference="neutral", window=[0., 14., 1.0], verbose=False, options=None): def get_folding_profile_section(
protein, conformation='AVR', direction="folding", reference="neutral",
window=[0., 14., 1.0], _=False, __=None):
"""Returns string with the folding profile section of the results.
Args:
protein: protein object
conformation: specific conformation
direction: 'folding' or other
reference: reference state
window: pH window [min, max, step]
_: Boolean for verbose output
__: options object
Returns:
string
""" """
returns the protein-folding-profile section str_ = get_the_line()
""" str_ += "\n"
str = getTheLine() str_ += "Free energy of {0:>9s} (kcal/mol) as a function".format(direction)
str += "\n" str_ += " of pH (using {0:s} reference)\n".format(reference)
str += "Free energy of %9s (kcal/mol) as a function of pH (using %s reference)\n" % (direction, reference) profile, [ph_opt, dg_opt], [dg_min, dg_max], [ph_min, ph_max] = (
protein.get_folding_profile(
profile, [pH_opt, dG_opt], [dG_min, dG_max], [pH_min, pH_max] = protein.getFoldingProfile(conformation=conformation, conformation=conformation, reference=reference,
reference=reference, grid=[0., 14., 0.1]))
direction=direction, grid=[0., 14., 0.1], options=options) if profile is None:
if profile == None: str_ += "Could not determine folding profile\n"
str += "Could not determine folding profile\n"
else: else:
for (pH, dG) in profile: for (ph, dg) in profile:
if pH >= window[0] and pH <= window[1] and (pH%window[2] < 0.05 or pH%window[2] > 0.95): if ph >= window[0] and ph <= window[1]:
str += "%6.2lf%10.2lf\n" % (pH, dG) if ph % window[2] < 0.05 or ph % window[2] > 0.95:
str += "\n" str_ += "{0:>6.2f}{1:>10.2f}\n".format(ph, dg)
str_ += "\n"
if pH_opt == None or dG_opt == None: if ph_opt is None or dg_opt is None:
str += "Could not determine pH optimum\n" str_ += "Could not determine pH optimum\n"
else: else:
str += "The pH of optimum stability is %4.1lf for which the free energy is%6.1lf kcal/mol at 298K\n" % (pH_opt, dG_opt) str_ += "The pH of optimum stability is {0:>4.1f}".format(ph_opt)
str_ += (
if dG_min == None or dG_max == None: " for which the free energy is {0:>6.1f} kcal/mol at 298K\n".format(
str += "Could not determine pH values where the free energy is within 80 %s of minimum\n" % ("%") dg_opt))
if dg_min is None or dg_max is None:
str_ += "Could not determine pH values where the free energy"
str_ += " is within 80 % of minimum\n"
else: else:
str += "The free energy is within 80 %s of maximum at pH %4.1lf to %4.1lf\n" % ("%", dG_min, dG_max) str_ += "The free energy is within 80 % of maximum"
str_ += " at pH {0:>4.1f} to {1:>4.1f}\n".format(dg_min, dg_max)
if pH_min == None or pH_max == None: if ph_min is None or ph_max is None:
str += "Could not determine the pH-range where the free energy is negative\n\n" str_ += "Could not determine the pH-range where the free"
str_ += " energy is negative\n\n"
else: else:
str += "The free energy is negative in the range %4.1lf - %4.1lf\n\n" % (pH_min, pH_max) str_ += "The free energy is negative in the range"
str_ += " {0:>4.1f} - {1:>4.1f}\n\n".format(ph_min, ph_max)
return str_
return str def get_charge_profile_section(protein, conformation='AVR', _=None):
"""Returns string with the charge profile section of the results.
Args:
protein: protein object
def getChargeProfileSection(protein, conformation='AVR', options=None): conformation: specific conformation
_: options object
Returns:
string
""" """
returns the protein-folding-profile section str_ = "Protein charge of folded and unfolded state as a function of pH\n"
""" profile = protein.get_charge_profile(conformation=conformation,
str = "Protein charge of folded and unfolded state as a function of pH\n" grid=[0., 14., 1.])
if profile is None:
profile = protein.getChargeProfile(conformation=conformation,grid=[0., 14., 1.]) str_ += "Could not determine charge profile\n"
if profile == None:
str += "Could not determine charge profile\n"
else: else:
str += "%6s%10s%8s\n" % ("pH", "unfolded", "folded") str_ += ' pH unfolded folded\n'
for (pH, Q_mod, Q_pro) in profile: for (ph, q_mod, q_pro) in profile:
str += "%6.2lf%10.2lf%8.2lf\n" % (pH, Q_mod, Q_pro) str_ += "{ph:6.2f}{qm:10.2f}{qp:8.2f}\n".format(
ph=ph, qm=q_mod, qp=q_pro)
pi_pro, pi_mod = protein.get_pi(conformation=conformation)
pI_pro, pI_mod = protein.getPI(conformation=conformation) if pi_pro is None or pi_mod is None:
if pI_pro == None or pI_mod == None: str_ += "Could not determine the pI\n\n"
str += "Could not determine the pI\n\n"
else: else:
str += "The pI is %5.2lf (folded) and %5.2lf (unfolded)\n" % (pI_pro, pI_mod) str_ += ("The pI is {0:>5.2f} (folded) and {1:>5.2f} (unfolded)\n")
return str_
return str def write_jackal_scap_file(mutation_data=None, filename="1xxx_scap.list",
_=None):
"""Write a scap file for, i.e., generating a mutated protein
TODO - figure out what this is
def writeJackalScapFile(mutationData=None, filename="1xxx_scap.list", options=None):
""" """
writing a scap file for, i.e., generating a mutated protein with open(filename, 'w') as file_:
for chain_id, _, res_num, code2 in mutation_data:
str_ = "{chain:s}, {num:d}, {code:s}\n".format(
chain=chain_id, num=res_num, code=code2)
file_.write(str_)
def write_scwrl_sequence_file(sequence, filename="x-ray.seq", _=None):
"""Write a scwrl sequence file for, e.g., generating a mutated protein
TODO - figure out what this is
""" """
file = open(filename, 'w') with open(filename, 'w') as file_:
start = 0
for chain_id, code1, res_num, code2 in mutationData: while len(sequence[start:]) > 60:
str = "%s, %d, %s\n" % (chain_id, res_num, code2) file_.write("{0:s}s\n".format(sequence[start:start+60]))
file.write(str) start += 60
file.close() file_.write("{0:s}\n".format(sequence[start:]))
def writeScwrlSequenceFile(sequence, filename="x-ray.seq", options=None): def get_propka_header():
"""Create the header.
Returns:
string
""" """
writing a scwrl sequence file for, e.g., generating a mutated protein
"""
file = open(filename, 'w')
start = 0
while len(sequence[start:]) > 60:
file.write( "%s\n" % (sequence[start:start+60]) )
start += 60
file.write( "%s\n" % (sequence[start:]) )
file.close()
# --- various header text --- #
def getPropkaHeader():
"""
Creates the header
"""
from datetime import date
today = date.today() today = date.today()
str_ = "propka3.1 {0!s:>93s}\n".format(today)
str_ += """
-------------------------------------------------------------------------------
-- --
-- PROPKA: A PROTEIN PKA PREDICTOR --
-- --
-- VERSION 1.0, 04/25/2004, IOWA CITY --
-- BY HUI LI --
-- --
-- VERSION 2.0, 11/05/2007, IOWA CITY/COPENHAGEN --
-- BY DELPHINE C. BAS AND DAVID M. ROGERS --
-- --
-- VERSION 3.0, 01/06/2011, COPENHAGEN --
-- BY MATS H.M. OLSSON AND CHRESTEN R. SONDERGARD --
-- --
-- VERSION 3.1, 07/01/2011, COPENHAGEN --
-- BY CHRESTEN R. SONDERGARD AND MATS H.M. OLSSON --
-- --
-------------------------------------------------------------------------------
"""
return str_
def get_references_header():
"""Create the 'references' part of output file.
str = "propka3.1 %93s\n" % (today) Returns:
str += "-------------------------------------------------------------------------------------------------------\n" string
str += "-- --\n"
str += "-- PROPKA: A PROTEIN PKA PREDICTOR --\n"
str += "-- --\n"
str += "-- VERSION 1.0, 04/25/2004, IOWA CITY --\n"
str += "-- BY HUI LI --\n"
str += "-- --\n"
str += "-- VERSION 2.0, 11/05/2007, IOWA CITY/COPENHAGEN --\n"
str += "-- BY DELPHINE C. BAS AND DAVID M. ROGERS --\n"
str += "-- --\n"
str += "-- VERSION 3.0, 01/06/2011, COPENHAGEN --\n"
str += "-- BY MATS H.M. OLSSON AND CHRESTEN R. SONDERGARD --\n"
str += "-- --\n"
str += "-- VERSION 3.1, 07/01/2011, COPENHAGEN --\n"
str += "-- BY CHRESTEN R. SONDERGARD AND MATS H.M. OLSSON --\n"
str += "-------------------------------------------------------------------------------------------------------\n"
str += "\n"
return str
def getReferencesHeader():
""" """
Returns the 'references' part in output file str_ = """
-------------------------------------------------------------------------------
References:
Very Fast Empirical Prediction and Rationalization of Protein pKa Values.
Hui Li, Andrew D. Robertson and Jan H. Jensen. PROTEINS: Structure, Function,
and Bioinformatics. 61:704-721 (2005)
Very Fast Prediction and Rationalization of pKa Values for Protein-Ligand
Complexes. Delphine C. Bas, David M. Rogers and Jan H. Jensen. PROTEINS:
Structure, Function, and Bioinformatics 73:765-783 (2008)
PROPKA3: Consistent Treatment of Internal and Surface Residues in Empirical
pKa predictions. Mats H.M. Olsson, Chresten R. Sondergard, Michal Rostkowski,
and Jan H. Jensen. Journal of Chemical Theory and Computation, 7(2):525-537
(2011)
Improved Treatment of Ligands and Coupling Effects in Empirical Calculation
and Rationalization of pKa Values. Chresten R. Sondergaard, Mats H.M. Olsson,
Michal Rostkowski, and Jan H. Jensen. Journal of Chemical Theory and
Computation, (2011)
-------------------------------------------------------------------------------
"""
return str_
def get_warning_header():
"""Create the 'warning' part of the output file.
TODO - this function is essentially a no-op.
Returns:
string
""" """
str_ = ""
str = "" return str_
str += "-------------------------------------------------------------------------------------------------------\n"
str += " References:\n"
str += "\n"
str += " Very Fast Empirical Prediction and Rationalization of Protein pKa Values\n"
str += " Hui Li, Andrew D. Robertson and Jan H. Jensen\n"
str += " PROTEINS: Structure, Function, and Bioinformatics 61:704-721 (2005)\n"
str += " \n"
str += " Very Fast Prediction and Rationalization of pKa Values for Protein-Ligand Complexes\n"
str += " Delphine C. Bas, David M. Rogers and Jan H. Jensen\n"
str += " PROTEINS: Structure, Function, and Bioinformatics 73:765-783 (2008)\n"
str += " \n"
str += " PROPKA3: Consistent Treatment of Internal and Surface Residues in Empirical pKa predictions\n"
str += " Mats H.M. Olsson, Chresten R. Sondergard, Michal Rostkowski, and Jan H. Jensen\n"
str += " Journal of Chemical Theory and Computation, 7(2):525-537 (2011)\n"
str += " \n"
str += " Improved Treatment of Ligands and Coupling Effects in Empirical Calculation\n"
str += " and Rationalization of pKa Values\n"
str += " Chresten R. Sondergaard, Mats H.M. Olsson, Michal Rostkowski, and Jan H. Jensen\n"
str += " Journal of Chemical Theory and Computation, (2011)\n"
str += " \n"
str += "-------------------------------------------------------------------------------------------------------\n"
return str
def getWarningHeader(): def get_determinants_header():
"""Create the Determinant header.
Returns:
string
""" """
Returns the 'warning' part in output file str_ = """
--------- ----- ------ --------------------- -------------- -------------- --------------
DESOLVATION EFFECTS SIDECHAIN BACKBONE COULOMBIC
RESIDUE pKa BURIED REGULAR RE HYDROGEN BOND HYDROGEN BOND INTERACTION
--------- ----- ------ --------- --------- -------------- -------------- --------------
"""
return str_
def get_summary_header():
"""Create the summary header.
Returns:
string
""" """
str_ = get_the_line()
str = "" str_ += "\n"
str_ += "SUMMARY OF THIS PREDICTION\n"
return str str_ += " Group pKa model-pKa ligand atom-type"
return str_
def getDeterminantsHeader(): def get_the_line():
"""Draw the line-Johnny Cash would have been proud-or actually Aerosmith!
NOTE - Johnny Cash walked the line.
Returns:
string
""" """
Creates the Determinant header str_ = ""
str_ += ("-" * 104)
return str_
def make_interaction_map(name, list_, interaction):
"""Print out an interaction map named 'name' of the groups in 'list'
based on the function 'interaction'
Args:
list_: list of groups
interaction: some sort of function
Returns:
string
""" """
str = ""
str += "--------- ----- ------ --------------------- -------------- -------------- --------------\n"
str += " DESOLVATION EFFECTS SIDECHAIN BACKBONE COULOMBIC \n"
str += " RESIDUE pKa BURIED REGULAR RE HYDROGEN BOND HYDROGEN BOND INTERACTION \n"
str += "--------- ----- ------ --------- --------- -------------- -------------- --------------\n"
return str
def getSummaryHeader():
"""
returns the summary header
"""
str = getTheLine()
str += "\n"
str += "SUMMARY OF THIS PREDICTION\n"
str += " Group pKa model-pKa ligand atom-type"
return str
def getTheLine():
"""
draw the line - Johnny Cash would have been proud - or actually Aerosmith!
"""
str = ""
for i in range(0, 104):
str += "-"
return str
# Interaction maps
def make_interaction_map(name, list, interaction):
""" Print out an interaction map named 'name' of the groups in 'list'
based on the function 'interaction' """
# return an empty string, if the list is empty # return an empty string, if the list is empty
if len(list)==0: if len(list_) == 0:
return '' return ''
# for long list, use condensed formatting # for long list, use condensed formatting
if len(list)>10: if len(list_) > 10:
res = 'Condensed form:\n' res = 'Condensed form:\n'
for i in range(len(list)): for i, group1 in enumerate(list_):
for j in range(i,len(list)): for group2 in list_[i:]:
if interaction(list[i],list[j]): if interaction(group1, group2):
res += 'Coupling: %9s - %9s\n'%(list[i].label,list[j].label) res += 'Coupling: {0:>9s} - {1:>9s}\n'.format(
group1.label, group2.label)
return res return res
# Name and map header # Name and map header
res = '%s\n%12s'%(name,'') res = '{0:s}\n{1:>12s}'.format(name, '')
for g in list: for group in list_:
res += '%9s | '%g.label res += '{0:>9s} | '.format(group.label)
# do the map # do the map
for g1 in list: for group1 in list_:
res += '\n%-12s'%(g1.label) res += '\n{0:<12s}'.format(group1.label)
for g2 in list: for group2 in list_:
tag = '' tag = ''
if interaction(g1, g2): if interaction(group1, group2):
tag = ' X ' tag = ' X '
res += '%10s| '%tag res += '{0:>10s}| '.format(tag)
return res return res

View File

@@ -1,232 +1,276 @@
"""Holds parameters and settings."""
from __future__ import division import pkg_resources
from __future__ import print_function
import math
import propka.lib as lib import propka.lib as lib
import sys, os
from propka.lib import info, warning from propka.lib import info, warning
import pkg_resources
# names and types of all key words in configuration file # names and types of all key words in configuration file
matrices = ['interaction_matrix'] MATRICES = ['interaction_matrix']
PAIR_WISE_MATRICES = ['sidechain_cutoffs']
pair_wise_matrices = ['sidechain_cutoffs'] NUMBER_DICTIONARIES = [
'VanDerWaalsVolume', 'charge', 'model_pkas', 'ions', 'valence_electrons',
number_dictionaries = ['VanDerWaalsVolume','charge','model_pkas','ions', 'custom_model_pkas']
'valence_electrons','custom_model_pkas'] LIST_DICTIONARIES = ['backbone_NH_hydrogen_bond', 'backbone_CO_hydrogen_bond']
STRING_DICTIONARIES = ['protein_group_mapping']
list_dictionaries = ['backbone_NH_hydrogen_bond','backbone_CO_hydrogen_bond'] STRING_LISTS = [
'ignore_residues', 'angular_dependent_sidechain_interactions',
string_dictionaries = ['protein_group_mapping'] 'acid_list', 'base_list', 'exclude_sidechain_interactions',
'backbone_reorganisation_list', 'write_out_order']
string_lists = ['ignore_residues','angular_dependent_sidechain_interactions', DISTANCES = ['desolv_cutoff', 'buried_cutoff', 'coulomb_cutoff1',
'acid_list','base_list','exclude_sidechain_interactions', 'coulomb_cutoff2']
'backbone_reorganisation_list','write_out_order'] PARAMETERS = [
'Nmin', 'Nmax', 'desolvationSurfaceScalingFactor', 'desolvationPrefactor',
distances = ['desolv_cutoff','buried_cutoff','coulomb_cutoff1','coulomb_cutoff2'] 'desolvationAllowance', 'coulomb_diel', 'COO_HIS_exception',
'OCO_HIS_exception', 'CYS_HIS_exception', 'CYS_CYS_exception',
parameters = ['Nmin','Nmax','desolvationSurfaceScalingFactor','desolvationPrefactor', 'min_ligand_model_pka', 'max_ligand_model_pka',
'desolvationAllowance','coulomb_diel','COO_HIS_exception','OCO_HIS_exception', 'include_H_in_interactions', 'coupling_max_number_of_bonds',
'CYS_HIS_exception','CYS_CYS_exception','min_ligand_model_pka','max_ligand_model_pka', 'min_bond_distance_for_hydrogen_bonds', 'coupling_penalty',
'include_H_in_interactions','coupling_max_number_of_bonds', 'shared_determinants', 'common_charge_centre', 'hide_penalised_group',
'min_bond_distance_for_hydrogen_bonds','coupling_penalty', 'remove_penalised_group', 'max_intrinsic_pka_diff',
'shared_determinants','common_charge_centre','hide_penalised_group', 'remove_penalised_group', 'min_interaction_energy', 'max_free_energy_diff', 'min_swap_pka_shift',
'max_intrinsic_pKa_diff','min_interaction_energy','max_free_energy_diff','min_swap_pka_shift', 'min_pka', 'max_pka', 'sidechain_interaction']
'min_pka','max_pka','sidechain_interaction'] STRINGS = ['version', 'output_file_tag', 'ligand_typing', 'pH', 'reference']
strings = ['version','output_file_tag','ligand_typing','pH','reference']
class Parameters: class Parameters:
def __init__(self, parameter_file): """PROPKA parameter class."""
def __init__(self, parameter_file):
"""Initialize parameter class.
Args:
parameter_file: file with parameters
"""
# TODO - need to define all members explicitly
self.model_pkas = {}
self.interaction_matrix = InteractionMatrix("interaction_matrix")
self.sidechain_cutoffs = None
# TODO - it would be nice to rename these; they're defined everywhere
self.COO_HIS_exception = None
self.OCO_HIS_exception = None
self.CYS_HIS_exception = None
self.CYS_CYS_exception = None
# These functions set up remaining data structures implicitly
self.set_up_data_structures() self.set_up_data_structures()
self.read_parameters(parameter_file) self.read_parameters(parameter_file)
#self.print_interaction_parameters() def read_parameters(self, file_):
#self.print_interaction_parameters_latex() """Read parameters from file.
#####self.print_interactions_latex()
#sys.exit(0)
Args:
return file_: file to read
"""
def read_parameters(self, file):
# try to locate the parameters file # try to locate the parameters file
try: try:
ifile = pkg_resources.resource_filename(__name__, file) ifile = pkg_resources.resource_filename(__name__, file_)
input = lib.open_file_for_reading(ifile) input_ = lib.open_file_for_reading(ifile)
except: except (IOError, FileNotFoundError, ValueError):
input = lib.open_file_for_reading(file) input_ = lib.open_file_for_reading(file_)
for line in input_:
for line in input:
self.parse_line(line) self.parse_line(line)
return
def parse_line(self, line): def parse_line(self, line):
"""Parse parameter file line."""
# first, remove comments # first, remove comments
comment_pos = line.find('#') comment_pos = line.find('#')
if comment_pos != -1: if comment_pos != -1:
line = line[:comment_pos] line = line[:comment_pos]
# split the line into words # split the line into words
words = line.split() words = line.split()
if len(words) == 0: if len(words) == 0:
return return
# parse the words # parse the words
if len(words)==3 and words[0] in number_dictionaries: if len(words) == 3 and words[0] in NUMBER_DICTIONARIES:
self.parse_to_number_dictionary(words) self.parse_to_number_dictionary(words)
elif len(words)==2 and words[0] in string_lists: elif len(words) == 2 and words[0] in STRING_LISTS:
self.parse_to_string_list(words) self.parse_to_string_list(words)
elif len(words)==2 and words[0] in distances: elif len(words) == 2 and words[0] in DISTANCES:
self.parse_distance(words) self.parse_distance(words)
elif len(words)==2 and words[0] in parameters: elif len(words) == 2 and words[0] in PARAMETERS:
self.parse_parameter(words) self.parse_parameter(words)
elif len(words)==2 and words[0] in strings: elif len(words) == 2 and words[0] in STRINGS:
self.parse_string(words) self.parse_string(words)
elif len(words)>2 and words[0] in list_dictionaries: elif len(words) > 2 and words[0] in LIST_DICTIONARIES:
self.parse_to_list_dictionary(words) self.parse_to_list_dictionary(words)
elif words[0] in matrices+pair_wise_matrices: elif words[0] in MATRICES+PAIR_WISE_MATRICES:
self.parse_to_matrix(words) self.parse_to_matrix(words)
elif len(words)==3 and words[0] in string_dictionaries: elif len(words) == 3 and words[0] in STRING_DICTIONARIES:
self.parse_to_string_dictionary(words) self.parse_to_string_dictionary(words)
#info(words)
return
def parse_to_number_dictionary(self, words): def parse_to_number_dictionary(self, words):
exec('self.%s[\'%s\'] = %s'%tuple(words)) """Parse field to number dictionary.
return
Args:
words: strings to parse.
"""
dict_ = getattr(self, words[0])
key = words[1]
value = words[2]
dict_[key] = float(value)
def parse_to_string_dictionary(self, words): def parse_to_string_dictionary(self, words):
exec('self.%s[\'%s\'] = \'%s\''%tuple(words)) """Parse field to string dictionary.
return
Args:
words: strings to parse
"""
dict_ = getattr(self, words[0])
key = words[1]
value = words[2]
dict_[key] = value
def parse_to_list_dictionary(self, words): def parse_to_list_dictionary(self, words):
exec('if not \'%s\' in self.%s.keys(): self.%s[\'%s\'] = []'%(words[1],words[0],words[0],words[1])) """Parse field to list dictionary.
for word in words[2:]:
exec('self.%s[\'%s\'].append(%s)'%(words[0],words[1],word))
return Args:
words: strings to parse.
"""
dict_ = getattr(self, words[0])
key = words[1]
if not key in dict_:
dict_[key] = []
for value in words[2:]:
if isinstance(value, list):
dict_[key].append([float(x) for x in value])
dict_[key].append(float(value))
def parse_to_string_list(self, words): def parse_to_string_list(self, words):
exec('self.%s.append(\'%s\')'%tuple(words)) """Parse field to string list.
return
Args:
words: strings to parse
"""
list_ = getattr(self, words[0])
value = words[1]
list_.append(value)
def parse_to_matrix(self, words): def parse_to_matrix(self, words):
exec('self.%s.add(%s)'%(words[0],tuple(words[1:]))) """Parse field to matrix.
return
Args:
words: strings to parse
"""
matrix = getattr(self, words[0])
value = tuple(words[1:])
matrix.add(value)
def parse_distance(self, words): def parse_distance(self, words):
# float check needed """Parse field to distance.
exec('self.%s = %s'%tuple(words))
exec('self.%s_squared = pow(%s,2)'%tuple(words)) Args:
return words: strings to parse
"""
value = float(words[1])
setattr(self, words[0], value)
value_sq = value*value
attr = "{0:s}_squared".format(words[0])
setattr(self, attr, value_sq)
def parse_parameter(self, words): def parse_parameter(self, words):
exec('self.%s = %s'%tuple(words)) """Parse field to parameters.
return
Args:
words: strings to parse
"""
value = float(words[1])
setattr(self, words[0], value)
def parse_string(self, words): def parse_string(self, words):
#info('self.%s = \'%s\''%tuple(words)) """Parse field to strings.
exec('self.%s = \'%s\''%tuple(words))
return
Args:
words: strings to parse
"""
setattr(self, words[0], words[1])
def set_up_data_structures(self): def set_up_data_structures(self):
for key_word in number_dictionaries+list_dictionaries+string_dictionaries: """Set up internal data structures.
exec('self.%s = {}'%key_word)
for key_word in string_lists:
exec('self.%s = []'%key_word)
for key_word in strings:
exec('self.%s = \'\''%key_word)
for key_word in matrices:
exec('self.%s = Interaction_matrix(\'%s\')'%(key_word,key_word))
for key_word in pair_wise_matrices:
exec('self.%s =Pair_wise_matrix(\'%s\')'%(key_word,key_word))
return TODO - it would be better to make these assignments explicit in
__init__.
"""
for key_word in (NUMBER_DICTIONARIES + LIST_DICTIONARIES
+ STRING_DICTIONARIES):
setattr(self, key_word, {})
for key_word in STRING_LISTS:
setattr(self, key_word, [])
for key_word in STRINGS:
setattr(self, key_word, "")
for key_word in MATRICES:
matrix = InteractionMatrix(key_word)
setattr(self, key_word, matrix)
for key_word in PAIR_WISE_MATRICES:
matrix = PairwiseMatrix(key_word)
setattr(self, key_word, matrix)
def print_interaction_parameters(self): def print_interaction_parameters(self):
"""Print interaction parameters."""
info('--------------- Model pKa values ----------------------') info('--------------- Model pKa values ----------------------')
for k in self.model_pkas.keys(): for k in self.model_pkas:
info('%3s %8.2f' % (k, self.model_pkas[k])) info('{0:>3s} {1:8.2f}'.format(k, self.model_pkas[k]))
info('') info('')
info('--------------- Interactions --------------------------') info('--------------- Interactions --------------------------')
agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] agroups = [
lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] 'COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG',
'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR',
map = { 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
'CG' :['ARG'], lgroups = [
'C2N':['ARG'], 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1',
'N30':['N+','LYS'], 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
'N31':['N+','LYS'], map_ = {
'N32':['N+','LYS'], 'CG': ['ARG'], 'C2N': ['ARG'], 'N30': ['N+', 'LYS'],
'N33':['N+','LYS'] , 'N31': ['N+', 'LYS'], 'N32': ['N+', 'LYS'], 'N33': ['N+', 'LYS'],
'NAR':['HIS'], 'NAR': ['HIS'], 'OCO': ['COO'], 'OP': [], 'SH': ['CYS'],
'OCO':['COO'], 'NP1': [], 'OH': ['ROH'], 'O3': [], 'CL': [], 'F': [],
'OP' :[],#['TYR','SER'], 'NAM': ['AMD'], 'N1': [], 'O2': []}
'SH' :['CYS'] , for group1 in agroups:
'NP1':[], for group2 in lgroups:
'OH' :['ROH'], fmt = "{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}"
'O3' :[] , interaction = fmt.format(
'CL' :[], grp1=group1, grp2=group2,
'F' :[], mat=self.interaction_matrix[group1][group2],
'NAM':['AMD'], val1=self.sidechain_cutoffs.get_value(group1, group2)[0],
'N1' :[], val2=self.sidechain_cutoffs.get_value(group1, group2)[1])
'O2' :[]}
for g1 in agroups:
for g2 in lgroups:
interaction = '%3s %3s %1s %4s %4s'%(g1,g2,
self.interaction_matrix[g1][g2],
self.sidechain_cutoffs.get_value(g1,g2)[0],
self.sidechain_cutoffs.get_value(g1,g2)[1])
map_interaction = '' map_interaction = ''
if g2 in map: if group2 in map_:
for m in map[g2]: for val in map_[group2]:
map_interaction += '|%3s %3s %1s %4s %4s'%(g1,m, fmt = "|{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}"
self.interaction_matrix[g1][m], map_interaction += fmt.format(
self.sidechain_cutoffs.get_value(g1,m)[0], group1, val, self.interaction_matrix[group1][val],
self.sidechain_cutoffs.get_value(g1,m)[1]) self.sidechain_cutoffs.get_value(group1, val)[0],
if self.interaction_matrix[g1][m] != self.interaction_matrix[g1][g2]: self.sidechain_cutoffs.get_value(group1, val)[1])
if (self.interaction_matrix[group1][val]
!= self.interaction_matrix[group1][group2]):
map_interaction += '* ' map_interaction += '* '
if self.sidechain_cutoffs.get_value(g1,m)[0] != self.sidechain_cutoffs.get_value(g1,g2)[0] or \ if (self.sidechain_cutoffs.get_value(group1, val)[0]
self.sidechain_cutoffs.get_value(g1,m)[1] != self.sidechain_cutoffs.get_value(g1,g2)[1]: != self.sidechain_cutoffs.get_value(
group1, group2)[0]
or self.sidechain_cutoffs.get_value(
group1, val)[1]
!= self.sidechain_cutoffs.get_value(
group1, group2)[1]):
map_interaction += '! ' map_interaction += '! '
else: else:
map_interaction += ' ' map_interaction += ' '
if len(map[g2])==0 and (self.sidechain_cutoffs.get_value(g1,g2)[0] !=3 or self.sidechain_cutoffs.get_value(g1,g2)[1] != 4): if (len(map_[group2]) == 0
and (self.sidechain_cutoffs.get_value(
group1, group2)[0]
!= 3
or self.sidechain_cutoffs.get_value(
group1, group2)[1]
!= 4)):
map_interaction += '? ' map_interaction += '? '
info(interaction, map_interaction) info(interaction, map_interaction)
if group1 == group2:
if g1==g2:
break break
info('-') info('-')
info('--------------- Exceptions ----------------------------') info('--------------- Exceptions ----------------------------')
info('COO-HIS', self.COO_HIS_exception) info('COO-HIS', self.COO_HIS_exception)
info('OCO-HIS', self.OCO_HIS_exception) info('OCO-HIS', self.OCO_HIS_exception)
info('CYS-HIS', self.CYS_HIS_exception) info('CYS-HIS', self.CYS_HIS_exception)
info('CYS-CYS', self.CYS_CYS_exception) info('CYS-CYS', self.CYS_CYS_exception)
info('--------------- Mapping -------------------------------') info('--------------- Mapping -------------------------------')
info(""" info("""
Titratable: Titratable:
@@ -251,284 +295,281 @@ NAM
N1 N1
O2 O2
""") """)
return
def print_interaction_parameters_latex(self): def print_interaction_parameters_latex(self):
# info('--------------- Model pKa values ----------------------') """Print interaction parameters in LaTeX format."""
# for k in self.model_pkas.keys(): # TODO - if these lists and dictionaries are the same as above, then
# info('%3s %8.2f'%(k,self.model_pkas[k])) # should be constants at the level of the module
agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD',
# info('') 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32',
# info('--------------- Interactions --------------------------') 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM',
agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] 'N1', 'O2', 'OP', 'SH']
lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO',
'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP',
map = { 'SH']
'CG' :['ARG'], lines = [
'C2N':['ARG'], "",
'N30':['N+','LYS'], "\\begin{{longtable}}{{lllll}}",
'N31':['N+','LYS'], ("\\caption{{Ligand interaction parameters. For interactions not "
'N32':['N+','LYS'], "listed, the default value of {0:s} is applied.}}").format(
'N33':['N+','LYS'] , self.sidechain_cutoffs.default),
'NAR':['HIS'], "\\label{{tab:ligand_interaction_parameters}}\\\\",
'OCO':['COO'], "\\toprule",
'OP' :[],#['TYR','SER'], "Group1 & Group2 & Interaction & c1 &c2 \\\\",
'SH' :['CYS'] , "\\midrule",
'NP1':['AMD'], "\\endfirsthead",
'OH' :['ROH'], "",
'O3' :[] , "\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous page}}}\\\\",
'CL' :[], "\\toprule",
'F' :[], "Group1 & Group2 & Interaction & c1 &c2 \\\\",
'NAM':[], "\\midrule",
'N1' :[], "\\endhead",
'O2' :[]} "",
"\\midrule",
"\\multicolumn{{5}}{{r}}{\\emph{{continued on the next page}}}\\\\",
s = """ "\\endfoot",
\\begin{longtable}{lllll} "",
\\caption{Ligand interaction parameters. For interactions not listed, the default value of %s is applied.} "\\bottomrule",
\\label{tab:ligand_interaction_parameters}\\\\ "\\endlastfoot",
""]
\\toprule str_ = "\n".join(lines)
Group1 & Group2 & Interaction & c1 &c2 \\\\ for group1 in agroups:
\\midrule for group2 in lgroups:
\\endfirsthead if self.interaction_matrix[group1][group2] == '-':
\\multicolumn{5}{l}{\\emph{continued from the previous page}}\\\\
\\toprule
Group1 & Group2 & Interaction & c1 &c2 \\\\
\\midrule
\\endhead
\\midrule
\\multicolumn{5}{r}{\\emph{continued on the next page}}\\\\
\\endfoot
\\bottomrule
\\endlastfoot
"""%(self.sidechain_cutoffs.default)
for g1 in agroups:
for g2 in lgroups:
if self.interaction_matrix[g1][g2]=='-':
continue continue
if self.sidechain_cutoffs.get_value(g1,g2)==self.sidechain_cutoffs.default: if (self.sidechain_cutoffs.get_value(group1, group2)
== self.sidechain_cutoffs.default):
continue continue
fmt = (
"{grp1:>3s} & {grp2:>3s} & {mat:1s} & {val1:4} & "
s+= '%3s & %3s & %1s & %4s & %4s\\\\ \n'%(g1,g2, "{val2:4}\\\\ \n")
self.interaction_matrix[g1][g2], str_ += fmt.format(
self.sidechain_cutoffs.get_value(g1,g2)[0], group1, group2,
self.sidechain_cutoffs.get_value(g1,g2)[1]) self.interaction_matrix[group1][group2],
self.sidechain_cutoffs.get_value(group1, group2)[0],
if g1==g2: self.sidechain_cutoffs.get_value(group1, group2)[1])
if group1 == group2:
break break
str_ += ' \\end{{longtable}}\n'
s += ' \\end{longtable}\n' info(str_)
info(s)
return
def print_interactions_latex(self): def print_interactions_latex(self):
agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] """Print interactions in LaTeX."""
lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] # TODO - are these the same lists as above? Convert to module constants.
agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD',
'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32',
s = """ 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM',
\\begin{longtable}{%s} 'N1', 'O2', 'OP', 'SH']
\\caption{Ligand interaction parameters. For interactions not listed, the default value of %s is applied.} lines = [
\\label{tab:ligand_interaction_parameters}\\\\ "",
"\\begin{longtable}{{{0:s}}}".format('l'*len(agroups)),
\\toprule ("\\caption{{Ligand interaction parameters. For interactions not "
Group1 & Group2 & Interaction & c1 &c2 \\\\ "listed, the default value of {0:s} is applied.}}").format(
\\midrule str(self.sidechain_cutoffs.default)),
\\endfirsthead "\\label{{tab:ligand_interaction_parameters}}\\\\",
"\\toprule",
\\multicolumn{5}{l}{\\emph{continued from the previous page}}\\\\ "Group1 & Group2 & Interaction & c1 &c2 \\\\",
\\toprule "\\midrule",
Group1 & Group2 & Interaction & c1 &c2 \\\\ "\\endfirsthead",
\\midrule "",
\\endhead "\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous page}}}\\\\",
"\\toprule",
\\midrule "Group1 & Group2 & Interaction & c1 &c2 \\\\",
\\multicolumn{5}{r}{\\emph{continued on the next page}}\\\\ "\\midrule",
\\endfoot "\\endhead",
"",
\\bottomrule "\\midrule",
\\endlastfoot "\\multicolumn{{5}}{{r}}{\\emph{{continued on the next page}}}\\\\",
"\\endfoot",
"""%('l'*len(agroups),self.sidechain_cutoffs.default) "",
for g1 in agroups: "\\bottomrule",
for g2 in agroups: "\\endlastfoot",
""
s+= '%3s & %3s & %1s & %4s & %4s\\\\ \n'%(g1,g2, ]
self.interaction_matrix[g1][g2], str_ = "\n".join(lines)
self.sidechain_cutoffs.get_value(g1,g2)[0], for group1 in agroups:
self.sidechain_cutoffs.get_value(g1,g2)[1]) for group2 in agroups:
fmt = '{g1:>3s} & {g2:>3s} & {mat:1s} & {val1:>4s} & {val2:>4s}\\\\ \n'
if g1==g2: str_ += fmt.format(
group1, group2, self.interaction_matrix[group1][group2],
str(self.sidechain_cutoffs.get_value(group1, group2)[0]),
str(self.sidechain_cutoffs.get_value(group1, group2)[1]))
if group1 == group2:
break break
str_ += ' \\end{{longtable}}\n'
s += ' \\end{longtable}\n' info(str_)
info(s)
return
class InteractionMatrix:
"""Interaction matrix class."""
class Interaction_matrix:
def __init__(self, name): def __init__(self, name):
"""Initialize with name of matrix.
Args:
name: name of interaction matrix
"""
self.name = name self.name = name
self.value = None
self.ordered_keys = [] self.ordered_keys = []
self.dictionary = {} self.dictionary = {}
return
def add(self,words): def add(self, words):
"""Add values to matrix.
Args:
words: values to add
"""
new_group = words[0] new_group = words[0]
self.ordered_keys.append(new_group) self.ordered_keys.append(new_group)
if not new_group in self.dictionary.keys(): if not new_group in self.dictionary.keys():
self.dictionary[new_group] = {} self.dictionary[new_group] = {}
for i, group in enumerate(self.ordered_keys):
for i in range(len(self.ordered_keys)): if len(words) > i+1:
group = self.ordered_keys[i]
if len(words)>i+1:
try: try:
exec('self.value = %s'%words[i+1]) self.value = float(words[i+1])
except: except ValueError:
self.value = words[i+1] self.value = words[i+1]
self.dictionary[group][new_group] = self.value self.dictionary[group][new_group] = self.value
self.dictionary[new_group][group] = self.value self.dictionary[new_group][group] = self.value
return
def get_value(self, item1, item2): def get_value(self, item1, item2):
"""Get specific matrix value.
Args:
item1: matrix row index
item2: matrix column index
Returns:
matrix value or None
"""
try: try:
return self.dictionary[item1][item2] return self.dictionary[item1][item2]
except: except KeyError:
return None return None
def __getitem__(self, group): def __getitem__(self, group):
"""Get specific group from matrix.
Args:
group: group to get
"""
if group not in self.dictionary.keys(): if group not in self.dictionary.keys():
raise Exception('%s not found in interaction matrix %s'%(group,self.name)) str_ = '{0:s} not found in interaction matrix {1:s}'.format(
group, self.name)
raise KeyError(str_)
return self.dictionary[group] return self.dictionary[group]
def keys(self): def keys(self):
"""Get keys from matrix.
Returns:
dictionary key list
"""
return self.dictionary.keys() return self.dictionary.keys()
def __str__(self): def __str__(self):
s = ' ' str_ = ' '
for k1 in self.ordered_keys: for key in self.ordered_keys:
s+='%3s '%k1 str_ += '{0:>3s} '.format(key)
s+='\n' str_ += '\n'
for k1 in self.ordered_keys: for key1 in self.ordered_keys:
s+='%3s '%k1 str_ += '{0:>3s} '.format(key1)
for k2 in self.ordered_keys: for key2 in self.ordered_keys:
s+='%3s '%self[k1][k2] str_ += '{0:>3s} '.format(self[key1][key2])
s+='\n' str_ += '\n'
return str_
return s
# ks = ['COO', 'SER', 'ARG', 'LYS', 'HIS', 'AMD', 'CYS', 'TRP','ROH','TYR','N+','CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH']
# p = ''
# n=0
# for i in range(len(ks)):
# for j in range(i,len(ks)):
# if not [0.0,0.0]==self[ks[i]][ks[j]]:
# if not [3.0,4.0]==self[ks[i]][ks[j]]:
# p+='sidechain_cutoff %3s %3s %s\n'%(ks[i],ks[j],self[ks[i]][ks[j]])
# n+=1
# info('total',n,len(ks))
# return p
class PairwiseMatrix:
"""Pairwise interaction matrix class."""
class Pair_wise_matrix:
def __init__(self, name): def __init__(self, name):
"""Initialize pairwise matrix.
Args:
name: name of pairwise interaction
"""
self.name = name self.name = name
self.dictionary = {} self.dictionary = {}
self.default = [0.0, 0.0] self.default = [0.0, 0.0]
return
def add(self,words): def add(self, words):
"""Add information to the matrix.
TODO - this function unnecessarily bundles arguments into a tuple
Args:
words: tuple with assignment information and value
"""
# assign the default value # assign the default value
if len(words)==3 and words[0]=='default': if len(words) == 3 and words[0] == 'default':
self.default = [float(words[1]), float(words[2])] self.default = [float(words[1]), float(words[2])]
return return
# assign non-default values # assign non-default values
g1 = words[0] group1 = words[0]
g2 = words[1] group2 = words[1]
v = [float(words[2]), float(words[3])] value = [float(words[2]), float(words[3])]
self.insert(group1, group2, value)
self.insert(group2, group1, value)
self.insert(g1,g2,v) def insert(self, key1, key2, value):
self.insert(g2,g1,v) """Insert value into matrix.
return Args:
key1: first matrix key (row)
def insert(self, k1,k2,v): key2: second matrix key (column)
value: value to insert
if k1 in self.dictionary.keys() and k2 in self.dictionary[k1].keys(): """
if k1!=k2: if key1 in self.dictionary and key2 in self.dictionary[key1]:
warning('Parameter value for %s, %s defined more than once' % (k1, k2)) if key1 != key2:
str_ = (
if not k1 in self.dictionary: 'Parameter value for {0:s}, {1:s} defined more '
self.dictionary[k1] = {} 'than once'.format(key1, key2))
warning(str_)
self.dictionary[k1][k2] =v if not key1 in self.dictionary:
self.dictionary[key1] = {}
return self.dictionary[key1][key2] = value
def get_value(self, item1, item2): def get_value(self, item1, item2):
"""Get specified value from matrix.
Args:
item1: row index
item2: column index
Returns:
matrix value (or default)
"""
try: try:
return self.dictionary[item1][item2] return self.dictionary[item1][item2]
except: except KeyError:
return self.default return self.default
def __getitem__(self, group): def __getitem__(self, group):
"""Get item from matrix corresponding to specific group.
Args:
group: group to retrieve
Returns:
matrix information
"""
if group not in self.dictionary.keys(): if group not in self.dictionary.keys():
raise Exception('%s not found in interaction matrix %s'%(group,self.name)) str_ = '{0:s} not found in interaction matrix {1:s}'.format(
group, self.name)
raise KeyError(str_)
return self.dictionary[group] return self.dictionary[group]
def keys(self): def keys(self):
"""Get keys from matrix.
Returns:
dictionary key list
"""
return self.dictionary.keys() return self.dictionary.keys()
def __str__(self): def __str__(self):
s='' str_ = ''
for k1 in self.keys(): for key1 in self.keys():
for k2 in self[k1].keys(): for key2 in self[key1].keys():
s += '%s %s %s\n'%(k1,k2,self[k1][k2]) str_ += '{0:s} {1:s} {2:s}\n'.format(
key1, key2, self[key1][key2])
return s return str_

View File

@@ -1,239 +1,257 @@
"""PDB parsing functionality."""
from __future__ import division
from __future__ import print_function
import string, sys, copy
import propka.lib import propka.lib
from propka.lib import info, warning from propka.lib import warning
from propka.atom import Atom from propka.atom import Atom
from propka.conformation_container import Conformation_container from propka.conformation_container import ConformationContainer
expected_atom_numbers = {'ALA':5,
'ARG':11, EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6,
'ASN':8, 'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8,
'ASP':8, 'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7,
'CYS':6, 'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7}
'GLY':4,
'GLN':9,
'GLU':9,
'HIS':10,
'ILE':8,
'LEU':8,
'LYS':9,
'MET':8,
'PHE':11,
'PRO':7,
'SER':6,
'THR':7,
'TRP':14,
'TYR':12,
'VAL':7}
def read_pdb(pdb_file, parameters, molecule): def read_pdb(pdb_file, parameters, molecule):
conformations = {} """Parse a PDB file.
Args:
pdb_file: file to read
parameters: parameters to guide parsing
molecule: molecular container
Returns:
list with elements:
1. list of conformations
2. list of names
"""
conformations = {}
# read in all atoms in the file # read in all atoms in the file
lines = get_atom_lines_from_pdb(pdb_file, ignore_residues = parameters.ignore_residues, keep_protons = molecule.options.keep_protons, chains=molecule.options.chains) lines = get_atom_lines_from_pdb(
pdb_file, ignore_residues=parameters.ignore_residues,
keep_protons=molecule.options.keep_protons,
chains=molecule.options.chains)
for (name, atom) in lines: for (name, atom) in lines:
if not name in conformations.keys(): if not name in conformations.keys():
conformations[name] = Conformation_container(name=name, parameters=parameters, molecular_container=molecule) conformations[name] = ConformationContainer(
name=name, parameters=parameters, molecular_container=molecule)
conformations[name].add_atom(atom) conformations[name].add_atom(atom)
# make a sorted list of conformation names # make a sorted list of conformation names
names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
return [conformations, names] return [conformations, names]
def protein_precheck(conformations, names):
def protein_precheck(conformations, names):
"""Check protein for correct number of atoms, etc.
Args:
names: conformation names to check
"""
for name in names: for name in names:
atoms = conformations[name].atoms atoms = conformations[name].atoms
# Group the atoms by their residue: # Group the atoms by their residue:
atoms_by_residue = {} atoms_by_residue = {}
for a in atoms: for atom in atoms:
if a.element != 'H': if atom.element != 'H':
res_id = resid_from_atom(a) res_id = resid_from_atom(atom)
try: try:
atoms_by_residue[res_id].append(a) atoms_by_residue[res_id].append(atom)
except KeyError: except KeyError:
atoms_by_residue[res_id] = [a] atoms_by_residue[res_id] = [atom]
for res_id, res_atoms in atoms_by_residue.items(): for res_id, res_atoms in atoms_by_residue.items():
res_name = res_atoms[0].res_name res_name = res_atoms[0].res_name
residue_label = '%3s%5s'%(res_name, res_id) residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id)
# ignore ligand residues # ignore ligand residues
if res_name not in expected_atom_numbers: if res_name not in EXPECTED_ATOM_NUMBERS:
continue continue
# check for c-terminal # check for c-terminal
if 'C-' in [a.terminal for a in res_atoms]: if 'C-' in [a.terminal for a in res_atoms]:
if len(res_atoms) != expected_atom_numbers[res_name]+1: if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1:
warning('Unexpected number (%d) of atoms in residue %s in conformation %s' % (len(res_atoms), residue_label, name)) str_ = ("Unexpected number ({num:d}) of atoms in residue "
"{res:s} in conformation {conf:s}".format(
num=len(res_atoms), res=residue_label,
conf=name))
warning(str_)
continue continue
# check number of atoms in residue # check number of atoms in residue
if len(res_atoms) != expected_atom_numbers[res_name]: if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]:
warning('Unexpected number (%d) of atoms in residue %s in conformation %s' % (len(res_atoms), residue_label, name)) str_ = ("Unexpected number ({num:d}) of atoms in residue "
"{res:s} in conformation {conf:s}".format(
return num=len(res_atoms), res=residue_label,
conf=name))
def resid_from_atom(a): warning(str_)
return '%4d %s %s'%(a.res_num,a.chain_id,a.icode)
def get_atom_lines_from_pdb(pdb_file, ignore_residues = [], keep_protons=False, tags = ['ATOM ', 'HETATM'], chains=None): def resid_from_atom(atom):
"""Return string with atom residue information.
Args:
atom: atom to generate string for
Returns
string
"""
return '{0:>4d} {1:s} {2:s}'.format(
atom.res_num, atom.chain_id, atom.icode)
def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False,
tags=['ATOM ', 'HETATM'], chains=None):
"""Get atom lines from PDB file.
Args:
pdb_file: PDB file to parse
ignore_residues: list of residues to ignore
keep_protons: bool to keep/ignore protons
tags: tags of lines that include atoms
chains: list of chains
"""
lines = propka.lib.open_file_for_reading(pdb_file).readlines() lines = propka.lib.open_file_for_reading(pdb_file).readlines()
nterm_residue = 'next_residue' nterm_residue = 'next_residue'
old_residue = None old_residue = None
terminal = None terminal = None
model = 1 model = 1
for line in lines: for line in lines:
tag = line[0:6] tag = line[0:6]
# set the model number # set the model number
if tag == 'MODEL ': if tag == 'MODEL ':
model = int(line[6:]) model = int(line[6:])
nterm_residue = 'next_residue' nterm_residue = 'next_residue'
if tag == 'TER ': if tag == 'TER ':
nterm_residue = 'next_residue' nterm_residue = 'next_residue'
if tag in tags: if tag in tags:
alt_conf_tag = line[16] alt_conf_tag = line[16]
residue_name = line[12:16] residue_name = line[12: 16]
residue_number = line[22:26] residue_number = line[22: 26]
# check if we want this residue # check if we want this residue
if line[17:20] in ignore_residues: if line[17: 20] in ignore_residues:
continue continue
if chains and line[21] not in chains: if chains and line[21] not in chains:
continue continue
# set the Nterm residue number - nessecary because we may need to # set the Nterm residue number - nessecary because we may need to
# identify more than one N+ group for structures with alt_conf tags # identify more than one N+ group for structures with alt_conf tags
if nterm_residue == 'next_residue' and tag == 'ATOM ': if nterm_residue == 'next_residue' and tag == 'ATOM ':
# make sure that we reached a new residue - nessecary if OXT is not the last atom in # make sure that we reached a new residue - nessecary if OXT is
# the previous residue # not the last atom inthe previous residue
if old_residue != residue_number: if old_residue != residue_number:
nterm_residue = residue_number nterm_residue = residue_number
old_residue = None old_residue = None
# Identify the configuration # Identify the configuration
# convert digits to letters # convert digits to letters
if alt_conf_tag in '123456789': if alt_conf_tag in '123456789':
alt_conf_tag = chr(ord(alt_conf_tag)+16) alt_conf_tag = chr(ord(alt_conf_tag)+16)
if alt_conf_tag == ' ': if alt_conf_tag == ' ':
alt_conf_tag = 'A' alt_conf_tag = 'A'
conformation = '%d%s'%(model, alt_conf_tag) conformation = '{0:d}{1:s}'.format(model, alt_conf_tag)
# set the terminal # set the terminal
if tag == 'ATOM ': if tag == 'ATOM ':
if residue_name.strip() == 'N' and nterm_residue == residue_number: if (residue_name.strip() == 'N'
and nterm_residue == residue_number):
terminal = 'N+' terminal = 'N+'
if residue_name.strip() in ['OXT','O\'\'']: if residue_name.strip() in ['OXT', 'O\'\'']:
terminal = 'C-' terminal = 'C-'
nterm_residue = 'next_residue' nterm_residue = 'next_residue'
old_residue = residue_number old_residue = residue_number
# and yield the atom # and yield the atom
atom = Atom(line=line) atom = Atom(line=line)
atom.terminal = terminal atom.terminal = terminal
#if keep_protons: #ignore hydrogen
# atom.is_protonated = True if not (atom.element == 'H' and not keep_protons):
if not (atom.element == 'H' and not keep_protons): #ignore hydrogen
yield (conformation, atom) yield (conformation, atom)
terminal = None terminal = None
return
def write_pdb(conformation, filename): def write_pdb(conformation, filename):
"""Write PDB conformation to a file.
Args:
conformation: conformation container
filename: filename for output
"""
write_pdb_for_atoms(conformation.atoms, filename) write_pdb_for_atoms(conformation.atoms, filename)
return
def write_pdb_for_atoms(atoms, filename, make_conect_section=False): def write_pdb_for_atoms(atoms, filename, make_conect_section=False):
out = propka.lib.open_file_for_writing(filename) """Write out PDB file for atoms.
Args:
atoms: list of atoms
filename: name of file
make_conect_section: generate a CONECT PDB section
"""
out = propka.lib.open_file_for_writing(filename)
for atom in atoms: for atom in atoms:
out.write(atom.make_pdb_line()) out.write(atom.make_pdb_line())
if make_conect_section: if make_conect_section:
for atom in atoms: for atom in atoms:
out.write(atom.make_conect_line()) out.write(atom.make_conect_line())
out.close() out.close()
return
def write_mol2_for_atoms(atoms, filename): def write_mol2_for_atoms(atoms, filename):
"""Write out MOL2 file for atoms.
header = '@<TRIPOS>MOLECULE\n\n%d %d\nSMALL\nUSER_CHARGES\n' Args:
atoms: list of atoms
filename: name of file
"""
# TODO - header needs to be converted to format string
header = '@<TRIPOS>MOLECULE\n\n{natom:d} {id:d}\nSMALL\nUSER_CHARGES\n'
atoms_section = '@<TRIPOS>ATOM\n' atoms_section = '@<TRIPOS>ATOM\n'
for i in range(len(atoms)): for i, atom in enumerate(atoms):
atoms_section += atoms[i].make_mol2_line(i+1) atoms_section += atom.make_mol2_line(i+1)
bonds_section = '@<TRIPOS>BOND\n' bonds_section = '@<TRIPOS>BOND\n'
id = 1 id_ = 1
for i in range(len(atoms)): for i, atom1 in enumerate(atoms):
for j in range(i+1,len(atoms)): for j, atom2 in enumerate(atoms, i+1):
if atoms[i] in atoms[j].bonded_atoms: if atom1 in atom2.bonded_atoms:
type = get_bond_order(atoms[i],atoms[j]) type_ = get_bond_order(atom1, atom2)
bonds_section += '%7d %7d %7d %7s\n'%(id, i+1, j+1, type) bonds_section += '{0:>7d} {1:>7d} {2:>7d} {3:>7s}\n'.format(
id+=1 id_, i+1, j+1, type_)
id_ += 1
substructure_section = '@<TRIPOS>SUBSTRUCTURE\n\n' substructure_section = '@<TRIPOS>SUBSTRUCTURE\n\n'
if len(atoms)>0: if len(atoms) > 0:
substructure_section = '@<TRIPOS>SUBSTRUCTURE\n%-7d %10s %7d\n'%(atoms[0].res_num,atoms[0].res_name,atoms[0].numb) substructure_section = (
'@<TRIPOS>SUBSTRUCTURE\n{0:<7d} {1:>10s} {2:>7d}\n'.format(
atoms[0].res_num, atoms[0].res_name, atoms[0].numb))
out = propka.lib.open_file_for_writing(filename) out = propka.lib.open_file_for_writing(filename)
out.write(header%(len(atoms),id-1)) out.write(header.format(natom=len(atoms), id=id_-1))
out.write(atoms_section) out.write(atoms_section)
out.write(bonds_section) out.write(bonds_section)
out.write(substructure_section) out.write(substructure_section)
out.close() out.close()
return
def get_bond_order(atom1, atom2): def get_bond_order(atom1, atom2):
type = '1' """Get the order of a bond between two atoms.
Args:
atom1: first atom in bond
atom2: second atom in bond
Returns:
string with bond type
"""
type_ = '1'
pi_electrons1 = atom1.num_pi_elec_2_3_bonds pi_electrons1 = atom1.num_pi_elec_2_3_bonds
pi_electrons2 = atom2.num_pi_elec_2_3_bonds pi_electrons2 = atom2.num_pi_elec_2_3_bonds
if '.ar' in atom1.sybyl_type: if '.ar' in atom1.sybyl_type:
pi_electrons1 -=1 pi_electrons1 -= 1
if '.ar' in atom2.sybyl_type: if '.ar' in atom2.sybyl_type:
pi_electrons2 -=1 pi_electrons2 -= 1
if pi_electrons1 > 0 and pi_electrons2 > 0: if pi_electrons1 > 0 and pi_electrons2 > 0:
type = '%d'%(min(pi_electrons1, pi_electrons2)+1) type_ = '{0:d}'.format(min(pi_electrons1, pi_electrons2)+1)
if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type: if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type:
type = 'ar' type_ = 'ar'
return type_
return type
def write_input(molecular_container, filename): def write_input(molecular_container, filename):
out = propka.lib.open_file_for_writing(filename) """Write PROPKA input file for molecular container.
Args:
molecular_container: molecular container
filename: output file name
"""
out = propka.lib.open_file_for_writing(filename)
for conformation_name in molecular_container.conformation_names: for conformation_name in molecular_container.conformation_names:
out.write('MODEL %s\n'%conformation_name) out.write('MODEL {0:s}\n'.format(conformation_name))
# write atoms # write atoms
for atom in molecular_container.conformations[conformation_name].atoms: for atom in molecular_container.conformations[conformation_name].atoms:
out.write(atom.make_input_line()) out.write(atom.make_input_line())
@@ -241,49 +259,59 @@ def write_input(molecular_container, filename):
for atom in molecular_container.conformations[conformation_name].atoms: for atom in molecular_container.conformations[conformation_name].atoms:
out.write(atom.make_conect_line()) out.write(atom.make_conect_line())
# write covalently coupled groups # write covalently coupled groups
for group in molecular_container.conformations[conformation_name].groups: for group in (
molecular_container.conformations[conformation_name].groups):
out.write(group.make_covalently_coupled_line()) out.write(group.make_covalently_coupled_line())
# write non-covalently coupled groups # write non-covalently coupled groups
for group in molecular_container.conformations[conformation_name].groups: for group in (
molecular_container.conformations[conformation_name].groups):
out.write(group.make_non_covalently_coupled_line()) out.write(group.make_non_covalently_coupled_line())
out.write('ENDMDL\n') out.write('ENDMDL\n')
out.close() out.close()
return
def read_input(input_file, parameters, molecule):
"""Read PROPKA input file for molecular container.
def read_input(input_file, parameters,molecule): Args:
input_file: input file
parameters: parameters for parsing/setup
molecule: molecular container
Returns:
list with [conformations, names of conformations]
"""
conformations = {} conformations = {}
# read in all atoms in the input file # read in all atoms in the input file
lines = get_atom_lines_from_input(input_file) lines = get_atom_lines_from_input(input_file)
for (name, atom) in lines: for (name, atom) in lines:
if not name in conformations.keys(): if not name in conformations.keys():
conformations[name] = Conformation_container(name=name, parameters=parameters, molecular_container=molecule) conformations[name] = ConformationContainer(
name=name, parameters=parameters,
molecular_container=molecule)
conformations[name].add_atom(atom) conformations[name].add_atom(atom)
# make a sorted list of conformation names # make a sorted list of conformation names
names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) names = sorted(conformations.keys(), key=propka.lib.conformation_sorter)
return [conformations, names] return [conformations, names]
def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']):
"""Get atom lines from a PROPKA input file.
def get_atom_lines_from_input(input_file, tags = ['ATOM ','HETATM']): Args:
input_file: input file
tags: tags defining atom lines
Yields:
conformation container, list of atoms
"""
lines = propka.lib.open_file_for_reading(input_file).readlines() lines = propka.lib.open_file_for_reading(input_file).readlines()
conformation = '' conformation = ''
atoms = {} atoms = {}
numbers = [] numbers = []
for line in lines: for line in lines:
tag = line[0:6] tag = line[0:6]
# set the conformation # set the conformation
if tag == 'MODEL ': if tag == 'MODEL ':
conformation = line[6:].strip() conformation = line[6:].strip()
# found an atom - save it # found an atom - save it
if tag in tags: if tag in tags:
atom = Atom(line=line) atom = Atom(line=line)
@@ -292,46 +320,39 @@ def get_atom_lines_from_input(input_file, tags = ['ATOM ','HETATM']):
atom.is_protonated = True atom.is_protonated = True
atoms[atom.numb] = atom atoms[atom.numb] = atom
numbers.append(atom.numb) numbers.append(atom.numb)
# found bonding information - apply it # found bonding information - apply it
if tag == 'CONECT' and len(line)>14: if tag == 'CONECT' and len(line) > 14:
conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)]
center_atom = atoms[int(conect_numbers[0])] center_atom = atoms[int(conect_numbers[0])]
for n in conect_numbers[1:]: for num in conect_numbers[1:]:
b = atoms[int(n)] bond_atom = atoms[int(num)]
# remember to check for cysteine bridges # remember to check for cysteine bridges
if center_atom.element == 'S' and b.element == 'S': if center_atom.element == 'S' and bond_atom.element == 'S':
center_atom.cysteine_bridge = True center_atom.cysteine_bridge = True
b.cysteine_bridge = True bond_atom.cysteine_bridge = True
# set up bonding # set up bonding
if not b in center_atom.bonded_atoms: if not bond_atom in center_atom.bonded_atoms:
center_atom.bonded_atoms.append(b) center_atom.bonded_atoms.append(bond_atom)
if not center_atom in b.bonded_atoms: if not center_atom in bond_atom.bonded_atoms:
b.bonded_atoms.append(center_atom) bond_atom.bonded_atoms.append(center_atom)
# found info on covalent coupling # found info on covalent coupling
if tag == 'CCOUPL' and len(line)>14: if tag == 'CCOUPL' and len(line) > 14:
conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)]
center_atom = atoms[int(conect_numbers[0])] center_atom = atoms[int(conect_numbers[0])]
for n in conect_numbers[1:]: for num in conect_numbers[1:]:
cg = atoms[int(n)] cov_atom = atoms[int(num)]
center_atom.group.couple_covalently(cg.group) center_atom.group.couple_covalently(cov_atom.group)
# found info on non-covalent coupling # found info on non-covalent coupling
if tag == 'NCOUPL' and len(line)>14: if tag == 'NCOUPL' and len(line) > 14:
conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)]
center_atom = atoms[int(conect_numbers[0])] center_atom = atoms[int(conect_numbers[0])]
for n in conect_numbers[1:]: for num in conect_numbers[1:]:
cg = atoms[int(n)] cov_atom = atoms[int(num)]
center_atom.group.couple_non_covalently(cg.group) center_atom.group.couple_non_covalently(cov_atom.group)
# this conformation is done - yield the atoms # this conformation is done - yield the atoms
if tag == 'ENDMDL': if tag == 'ENDMDL':
for n in numbers: for num in numbers:
yield (conformation, atoms[n]) yield (conformation, atoms[num])
# prepare for next conformation # prepare for next conformation
atoms = {} atoms = {}
numbers = [] numbers = []
return

View File

@@ -1,6 +1,6 @@
# PropKa configuration file # PropKa configuration file
version version_A version VersionA
# Model pKa values # Model pKa values
model_pkas C- 3.20 model_pkas C- 3.20
@@ -342,7 +342,7 @@ common_charge_centre 0
remove_penalised_group 1 remove_penalised_group 1
# non-covalent coupling # non-covalent coupling
max_intrinsic_pKa_diff 2.0 max_intrinsic_pka_diff 2.0
min_interaction_energy 0.5 min_interaction_energy 0.5
max_free_energy_diff 1.0 max_free_energy_diff 1.0
min_swap_pka_shift 1.0 min_swap_pka_shift 1.0

View File

@@ -1,441 +1,365 @@
#!/usr/bin/python """Protonate a structure."""
import math
import propka.bonds
import propka.atom
from propka.vector_algebra import rotate_vector_around_an_axis, Vector
from propka.lib import warning, debug
from __future__ import division
from __future__ import print_function
from propka.vector_algebra import *
import propka.bonds, propka.pdb, propka.atom
from propka.lib import info, warning, debug
class Protonate: class Protonate:
""" Protonates atoms using VSEPR theory """ """ Protonates atoms using VSEPR theory """
def __init__(self, verbose=False): def __init__(self, verbose=False):
self.verbose=verbose """Initialize with flag for verbosity
self.valence_electrons = {'H': 1,
'He':2,
'Li':1,
'Be':2,
'B': 3,
'C': 4,
'N': 5,
'O': 6,
'F': 7,
'Ne':8,
'Na':1,
'Mg':2,
'Al':3,
'Si':4,
'P': 5,
'S': 6,
'Cl':7,
'Ar':8,
'K': 1,
'Ca':2,
'Sc':2,
'Ti':2,
'Va':2,
'Cr':1,
'Mn':2,
'Fe':2,
'Co':2,
'Ni':2,
'Cu':1,
'Zn':2,
'Ga':3,
'Ge':4,
'As':5,
'Se':6,
'Br':7,
'Kr':8,
'I':7,
}
self.standard_charges= {'ARG-NH1':1.0,
'ASP-OD2':-1.0,
'GLU-OE2':-1.0,
'HIS-ND1':1.0,
'LYS-NZ':1.0,
'N+':1.0,
'C-':-1.0}
self.sybyl_charges = {'N.pl3':+1,
'N.3':+1,
'N.4':+1,
'N.ar':+1,
'O.co2-':-1}
self.bond_lengths = {'C':1.09,
'N':1.01,
'O':0.96,
'F':0.92,
'Cl':1.27,
'Br':1.41,
'I':1.61,
'S':1.35}
# protonation_methods[steric_number] = method
self.protonation_methods = {4:self.tetrahedral,
3:self.trigonal}
return
Args:
verbose: True for verbose output
"""
self.verbose = verbose
self.valence_electrons = {
'H': 1, 'He': 2, 'Li': 1, 'Be': 2, 'B': 3, 'C': 4, 'N': 5,
'O': 6, 'F': 7, 'Ne': 8, 'Na': 1, 'Mg': 2, 'Al': 3, 'Si': 4,
'P': 5, 'S': 6, 'Cl': 7, 'Ar': 8, 'K': 1, 'Ca': 2, 'Sc': 2,
'Ti': 2, 'Va': 2, 'Cr': 1, 'Mn': 2, 'Fe': 2, 'Co': 2, 'Ni': 2,
'Cu': 1, 'Zn': 2, 'Ga': 3, 'Ge': 4, 'As': 5, 'Se': 6, 'Br': 7,
'Kr': 8, 'I': 7}
# TODO - consider putting charges in a configuration file
self.standard_charges = {
'ARG-NH1': 1.0, 'ASP-OD2': -1.0, 'GLU-OE2': -1.0, 'HIS-ND1': 1.0,
'LYS-NZ': 1.0, 'N+': 1.0, 'C-': -1.0}
self.sybyl_charges = {
'N.pl3': 1, 'N.3': 1, 'N.4': 1, 'N.ar': 1, 'O.co2-': 1}
# TODO - consider putting bond lengths in a configuration file
self.bond_lengths = {
'C': 1.09, 'N': 1.01, 'O': 0.96, 'F': 0.92, 'Cl': 1.27,
'Br': 1.41, 'I': 1.61, 'S': 1.35}
self.protonation_methods = {4: self.tetrahedral, 3: self.trigonal}
def protonate(self, molecules): def protonate(self, molecules):
""" Will protonate all atoms in the molecular container """ """Protonate all atoms in the molecular container.
Args:
molecules: molecular containers
"""
debug('----- Protonation started -----') debug('----- Protonation started -----')
# Remove all currently present hydrogen atoms # Remove all currently present hydrogen atoms
self.remove_all_hydrogen_atoms(molecules) self.remove_all_hydrogen_atoms(molecules)
# protonate all atoms # protonate all atoms
for name in molecules.conformation_names: for name in molecules.conformation_names:
non_H_atoms = molecules.conformations[name].get_non_hydrogen_atoms() non_h_atoms = (molecules.conformations[name]
.get_non_hydrogen_atoms())
for atom in non_H_atoms: for atom in non_h_atoms:
self.protonate_atom(atom) self.protonate_atom(atom)
# fix hydrogen names @staticmethod
#self.set_proton_names(non_H_atoms) def remove_all_hydrogen_atoms(molecular_container):
"""Remove all hydrogen atoms from molecule.
return Args:
molecular_container: molecule to remove hydrogens from
"""
def remove_all_hydrogen_atoms(self, molecular_container):
for name in molecular_container.conformation_names: for name in molecular_container.conformation_names:
molecular_container.conformations[name].atoms = molecular_container.conformations[name].get_non_hydrogen_atoms() molecular_container.conformations[name].atoms = (
return molecular_container.conformations[name]
.get_non_hydrogen_atoms())
def set_charge(self, atom): def set_charge(self, atom):
"""Set charge for atom.
Args:
atom: atom to be charged
"""
# atom is a protein atom # atom is a protein atom
if atom.type=='atom': if atom.type == 'atom':
key = '%3s-%s'%(atom.res_name, atom.name) key = '{0:3s}-{1:s}'.format(atom.res_name, atom.name)
if atom.terminal: if atom.terminal:
debug(atom.terminal) debug(atom.terminal)
key=atom.terminal key = atom.terminal
if key in list(self.standard_charges.keys()): if key in self.standard_charges:
atom.charge = self.standard_charges[key] atom.charge = self.standard_charges[key]
debug('Charge', atom, atom.charge) debug('Charge', atom, atom.charge)
atom.charge_set = True atom.charge_set = True
# atom is a ligand atom # atom is a ligand atom
elif atom.type=='hetatm': elif atom.type == 'hetatm':
if atom.sybyl_type in list(self.sybyl_charges.keys()): if atom.sybyl_type in self.sybyl_charges:
atom.charge = self.sybyl_charges[atom.sybyl_type] atom.charge = self.sybyl_charges[atom.sybyl_type]
atom.sybyl_type = atom.sybyl_type.replace('-','') atom.sybyl_type = atom.sybyl_type.replace('-', '')
atom.charge_set = True atom.charge_set = True
return
def protonate_atom(self, atom): def protonate_atom(self, atom):
if atom.is_protonated: return """Protonate an atom.
if atom.element == 'H': return
Args:
atom: atom to be protonated
"""
if atom.is_protonated:
return
if atom.element == 'H':
return
self.set_charge(atom) self.set_charge(atom)
self.set_number_of_protons_to_add(atom) self.set_number_of_protons_to_add(atom)
self.set_steric_number_and_lone_pairs(atom) self.set_steric_number_and_lone_pairs(atom)
self.add_protons(atom) self.add_protons(atom)
atom.is_protonated = True atom.is_protonated = True
return
def set_proton_names(self, heavy_atoms): @staticmethod
def set_proton_names(heavy_atoms):
"""Set names for protons.
Args:
heavy_atoms: list of heavy atoms with protons to be named
"""
for heavy_atom in heavy_atoms: for heavy_atom in heavy_atoms:
i = 1 i = 1
for bonded in heavy_atom.bonded_atoms: for bonded in heavy_atom.bonded_atoms:
if bonded.element == 'H': if bonded.element == 'H':
bonded.name+='%d'%i bonded.name += str(i)
i+=1 i += 1
return
def set_number_of_protons_to_add(self, atom): def set_number_of_protons_to_add(self, atom):
debug('*'*10) """Set the number of protons to add to this atom.
debug('Setting number of protons to add for',atom)
atom.number_of_protons_to_add = 8
debug(' %4d'%8)
atom.number_of_protons_to_add -= self.valence_electrons[atom.element]
debug('Valence eletrons: %4d'%-self.valence_electrons[atom.element])
atom.number_of_protons_to_add -= len(atom.bonded_atoms)
debug('Number of bonds: %4d'%- len(atom.bonded_atoms))
atom.number_of_protons_to_add -= atom.num_pi_elec_2_3_bonds
debug('Pi electrons: %4d'%-atom.num_pi_elec_2_3_bonds)
atom.number_of_protons_to_add += int(atom.charge)
debug('Charge: %4.1f'%atom.charge)
Args:
atom: atom for calculation
"""
debug('*'*10)
debug('Setting number of protons to add for', atom)
atom.number_of_protons_to_add = 8
debug(" 8")
atom.number_of_protons_to_add -= self.valence_electrons[atom.element]
debug('Valence electrons: {0:>4d}'.format(
-self.valence_electrons[atom.element]))
atom.number_of_protons_to_add -= len(atom.bonded_atoms)
debug('Number of bonds: {0:>4d}'.format(-len(atom.bonded_atoms)))
atom.number_of_protons_to_add -= atom.num_pi_elec_2_3_bonds
debug('Pi electrons: {0:>4d}'.format(-atom.num_pi_elec_2_3_bonds))
atom.number_of_protons_to_add += int(atom.charge)
debug('Charge: {0:>4.1f}'.format(atom.charge))
debug('-'*10) debug('-'*10)
debug(atom.number_of_protons_to_add) debug(atom.number_of_protons_to_add)
return
def set_steric_number_and_lone_pairs(self, atom): def set_steric_number_and_lone_pairs(self, atom):
"""Set steric number and lone pairs for atom.
Args:
atom: atom for calculation
"""
# If we already did this, there is no reason to do it again # If we already did this, there is no reason to do it again
if atom.steric_num_lone_pairs_set: if atom.steric_num_lone_pairs_set:
return return
debug('='*10) debug('='*10)
debug('Setting steric number and lone pairs for',atom) debug('Setting steric number and lone pairs for', atom)
# costumly set the N backbone atoms up for peptide bond trigonal planer shape
#if atom.name == 'N' and len(atom.bonded_atoms) == 2:
# atom.steric_number = 3
# atom.number_of_lone_pairs = 0
# self.display 'Peptide bond: steric number is %d and number of lone pairs is %s'%(atom.steric_number,
# atom.number_of_lone_pairs)
# return
atom.steric_number = 0 atom.steric_number = 0
debug('{0:>65s}: {1:>4d}'.format(
debug('%65s: %4d'%('Valence electrons',self.valence_electrons[atom.element])) 'Valence electrons', self.valence_electrons[atom.element]))
atom.steric_number += self.valence_electrons[atom.element] atom.steric_number += self.valence_electrons[atom.element]
debug('{0:>65s}: {1:>4d}'.format(
debug('%65s: %4d'%('Number of bonds',len(atom.bonded_atoms))) 'Number of bonds', len(atom.bonded_atoms)))
atom.steric_number += len(atom.bonded_atoms) atom.steric_number += len(atom.bonded_atoms)
debug('{0:>65s}: {1:>4d}'.format(
debug('%65s: %4d'%('Number of hydrogen atoms to add',atom.number_of_protons_to_add)) 'Number of hydrogen atoms to add', atom.number_of_protons_to_add))
atom.steric_number += atom.number_of_protons_to_add atom.steric_number += atom.number_of_protons_to_add
debug('{0:>65s}: {1:>4d}'.format(
debug('%65s: %4d'%('Number of pi-electrons in double and triple bonds(-)',atom.num_pi_elec_2_3_bonds)) 'Number of pi-electrons in double and triple bonds(-)',
atom.num_pi_elec_2_3_bonds))
atom.steric_number -= atom.num_pi_elec_2_3_bonds atom.steric_number -= atom.num_pi_elec_2_3_bonds
debug('{0:>65s}: {1:>4d}'.format(
debug('%65s: %4d'%('Number of pi-electrons in conjugated double and triple bonds(-)',atom.num_pi_elec_conj_2_3_bonds)) 'Number of pi-electrons in conjugated double and triple bonds(-)',
atom.num_pi_elec_conj_2_3_bonds))
atom.steric_number -= atom.num_pi_elec_conj_2_3_bonds atom.steric_number -= atom.num_pi_elec_conj_2_3_bonds
debug('{0:>65s}: {1:>4d}'.format(
debug('%65s: %4d'%('Number of donated co-ordinated bonds',0)) 'Number of donated co-ordinated bonds', 0))
atom.steric_number += 0 atom.steric_number += 0
debug('{0:>65s}: {1:>4.1f}'.format(
debug('%65s: %4.1f'%('Charge(-)',atom.charge)) 'Charge(-)', atom.charge))
atom.steric_number -= atom.charge atom.steric_number -= atom.charge
atom.steric_number = math.floor(atom.steric_number/2.0) atom.steric_number = math.floor(atom.steric_number/2.0)
atom.number_of_lone_pairs = (
atom.number_of_lone_pairs = atom.steric_number - len(atom.bonded_atoms) - atom.number_of_protons_to_add atom.steric_number-len(atom.bonded_atoms)-atom.number_of_protons_to_add)
debug('-'*70) debug('-'*70)
debug('%65s: %4d'%('Steric number',atom.steric_number)) debug('{0:>65s}: {1:>4d}'.format(
debug('%65s: %4d'%('Number of lone pairs',atom.number_of_lone_pairs)) 'Steric number', atom.steric_number))
debug('{0:>65s}: {1:>4d}'.format(
'Number of lone pairs', atom.number_of_lone_pairs))
atom.steric_num_lone_pairs_set = True atom.steric_num_lone_pairs_set = True
return
def add_protons(self, atom): def add_protons(self, atom):
"""Add protons to atom.
Args:
atom: atom for calculation
"""
# decide which method to use # decide which method to use
debug('PROTONATING',atom) debug('PROTONATING', atom)
if atom.steric_number in list(self.protonation_methods.keys()): if atom.steric_number in list(self.protonation_methods.keys()):
self.protonation_methods[atom.steric_number](atom) self.protonation_methods[atom.steric_number](atom)
else: else:
warning('Do not have a method for protonating', atom, '(steric number: %d)' % atom.steric_number) warning('Do not have a method for protonating', atom,
'(steric number: {0:d})'.format(atom.steric_number))
return
def trigonal(self, atom): def trigonal(self, atom):
debug('TRIGONAL - %d bonded atoms'%(len(atom.bonded_atoms))) """Add hydrogens in trigonal geometry.
Args:
atom: atom to protonate
"""
debug('TRIGONAL - {0:d} bonded atoms'.format(len(atom.bonded_atoms)))
rot_angle = math.radians(120.0) rot_angle = math.radians(120.0)
cvec = Vector(atom1=atom)
c = vector(atom1 = atom)
# 0 bonds # 0 bonds
if len(atom.bonded_atoms) == 0: if len(atom.bonded_atoms) == 0:
pass pass
# 1 bond # 1 bond
if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0: if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0:
# Add another atom with the right angle to the first one # Add another atom with the right angle to the first one
a = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]) avec = Vector(atom1=atom, atom2=atom.bonded_atoms[0])
# use plane of bonded trigonal atom - e.g. arg # use plane of bonded trigonal atom - e.g. arg
self.set_steric_number_and_lone_pairs(atom.bonded_atoms[0]) self.set_steric_number_and_lone_pairs(atom.bonded_atoms[0])
if atom.bonded_atoms[0].steric_number == 3 and len(atom.bonded_atoms[0].bonded_atoms)>1: if (atom.bonded_atoms[0].steric_number == 3
# use other atoms bonded to the neighbour to establish the plane, if possible and len(atom.bonded_atoms[0].bonded_atoms) > 1):
# use other atoms bonded to the neighbour to establish the
# plane, if possible
other_atom_indices = [] other_atom_indices = []
for i in range(len(atom.bonded_atoms[0].bonded_atoms)): for i, bonded_atom in enumerate(
if atom.bonded_atoms[0].bonded_atoms[i] != atom: atom.bonded_atoms[0].bonded_atoms):
if bonded_atom != atom:
other_atom_indices.append(i) other_atom_indices.append(i)
vec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0])
vec2 = Vector(atom1=atom.bonded_atoms[0],
v1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]) atom2=atom.bonded_atoms[0]
v2 = vector(atom1 = atom.bonded_atoms[0], .bonded_atoms[other_atom_indices[0]])
atom2 = atom.bonded_atoms[0].bonded_atoms[other_atom_indices[0]]) axis = vec1**vec2
# this is a trick to make sure that the order of atoms doesn't
axis = v1**v2 # influence the final postions of added protons
if len(other_atom_indices) > 1:
# this is a trick to make sure that the order of atoms doesn't influence vec3 = Vector(atom1=atom.bonded_atoms[0],
# the final postions of added protons atom2=atom.bonded_atoms[0]
if len(other_atom_indices)>1: .bonded_atoms[other_atom_indices[1]])
v3 = vector(atom1 = atom.bonded_atoms[0], axis2 = vec1**vec3
atom2 = atom.bonded_atoms[0].bonded_atoms[other_atom_indices[1]]) if axis*axis2 > 0:
axis2 = v1**v3
if axis * axis2>0:
axis = axis+axis2 axis = axis+axis2
else: else:
axis = axis-axis2 axis = axis-axis2
else: else:
axis = a.orthogonal() axis = avec.orthogonal()
avec = rotate_vector_around_an_axis(rot_angle, axis, avec)
a = rotate_vector_around_an_axis(rot_angle, axis, a) avec = self.set_bond_distance(avec, atom.element)
a = self.set_bond_distance(a, atom.element) self.add_proton(atom, cvec+avec)
self.add_proton(atom, c+a)
# 2 bonds # 2 bonds
if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0: if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0:
# Add another atom with the right angle to the first two # Add another atom with the right angle to the first two
a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0) avec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0)
a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0) avec2 = Vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0)
new_a = -a1 - a2 new_a = -avec1 - avec2
new_a = self.set_bond_distance(new_a, atom.element) new_a = self.set_bond_distance(new_a, atom.element)
self.add_proton(atom, c+new_a) self.add_proton(atom, cvec+new_a)
return
def tetrahedral(self, atom): def tetrahedral(self, atom):
debug('TETRAHEDRAL - %d bonded atoms'%(len(atom.bonded_atoms))) """Protonate atom in tetrahedral geometry.
Args:
atom: atom to protonate.
"""
debug(
'TETRAHEDRAL - {0:d} bonded atoms'.format(len(atom.bonded_atoms)))
# TODO - might be good to move tetrahedral angle to constant
rot_angle = math.radians(109.5) rot_angle = math.radians(109.5)
cvec = Vector(atom1=atom)
# sanity check
# if atom.number_of_protons_to_add + len(atom.bonded_atoms) != 4:
# self.display 'Error: Attempting tetrahedral structure with %d bonds'%(atom.number_of_protons_to_add +
# len(atom.bonded_atoms))
c = vector(atom1 = atom)
# 0 bonds # 0 bonds
if len(atom.bonded_atoms) == 0: if len(atom.bonded_atoms) == 0:
pass pass
# 1 bond # 1 bond
if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0: if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0:
# Add another atom with the right angle to the first one # Add another atom with the right angle to the first one
a = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]) avec = Vector(atom1=atom, atom2=atom.bonded_atoms[0])
axis = a.orthogonal() axis = avec.orthogonal()
a = rotate_vector_around_an_axis(rot_angle, axis, a) avec = rotate_vector_around_an_axis(rot_angle, axis, avec)
a = self.set_bond_distance(a, atom.element) avec = self.set_bond_distance(avec, atom.element)
self.add_proton(atom, c+a) self.add_proton(atom, cvec+avec)
# 2 bonds # 2 bonds
if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0: if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0:
# Add another atom with the right angle to the first two # Add another atom with the right angle to the first two
a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0) avec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0)
a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0) avec2 = Vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0)
axis = avec1 + avec2
axis = a1 + a2 new_a = rotate_vector_around_an_axis(math.radians(90), axis,
-avec1)
new_a = rotate_vector_around_an_axis(math.radians(90), axis, -a1)
new_a = self.set_bond_distance(new_a, atom.element) new_a = self.set_bond_distance(new_a, atom.element)
self.add_proton(atom, c+new_a) self.add_proton(atom, cvec+new_a)
# 3 bonds # 3 bonds
if len(atom.bonded_atoms) == 3 and atom.number_of_protons_to_add > 0: if len(atom.bonded_atoms) == 3 and atom.number_of_protons_to_add > 0:
a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0) avec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0)
a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0) avec2 = Vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0)
a3 = vector(atom1 = atom, atom2 = atom.bonded_atoms[2]).rescale(1.0) avec3 = Vector(atom1=atom, atom2=atom.bonded_atoms[2]).rescale(1.0)
new_a = -avec1-avec2-avec3
new_a = -a1-a2-a3
new_a = self.set_bond_distance(new_a, atom.element) new_a = self.set_bond_distance(new_a, atom.element)
self.add_proton(atom, c+new_a) self.add_proton(atom, cvec+new_a)
return @staticmethod
def add_proton(atom, position):
"""Add a proton to an atom at a specific position.
Args:
def add_proton(self, atom, position): atom: atom to protonate
position: position for proton
"""
# Create the new proton # Create the new proton
new_H = propka.atom.Atom() new_h = propka.atom.Atom()
new_H.set_property(numb = None, new_h.set_property(
name = 'H%s'%atom.name[1:], numb=None,
res_name = atom.res_name, name='H{0:s}'.format(atom.name[1:]),
chain_id = atom.chain_id, res_name=atom.res_name,
res_num = atom.res_num, chain_id=atom.chain_id,
x = round(position.x,3), # round of to three digimal points res_num=atom.res_num,
y = round(position.y,3), # to avoid round-off differences x=round(position.x, 3), # round of to three decimal points to
z = round(position.z,3), # when input file # avoid round-off differences in input file
occ = None, y=round(position.y, 3),
beta = None) z=round(position.z, 3),
new_H.element = 'H' occ=None,
new_H.type = atom.type beta=None)
new_h.element = 'H'
new_H.bonded_atoms = [atom] new_h.type = atom.type
new_H.charge = 0 new_h.bonded_atoms = [atom]
new_H.steric_number = 0 new_h.charge = 0
new_H.number_of_lone_pairs = 0 new_h.steric_number = 0
new_H.number_of_protons_to_add = 0 new_h.number_of_lone_pairs = 0
new_H.num_pi_elec_2_3_bonds = 0 new_h.number_of_protons_to_add = 0
new_H.is_protonates = True new_h.num_pi_elec_2_3_bonds = 0
new_h.is_protonates = True
atom.bonded_atoms.append(new_H) atom.bonded_atoms.append(new_h)
atom.number_of_protons_to_add -=1 atom.number_of_protons_to_add -= 1
atom.conformation_container.add_atom(new_H) atom.conformation_container.add_atom(new_h)
# update names of all protons on this atom # update names of all protons on this atom
new_H.residue_label = "%-3s%4d%2s" % (new_H.name,new_H.res_num, new_H.chain_id) new_h.residue_label = "{0:<3s}{1:>4d}{2:>2s}".format(
new_h.name, new_h.res_num, new_h.chain_id)
no_protons = atom.count_bonded_elements('H') no_protons = atom.count_bonded_elements('H')
if no_protons > 1: if no_protons > 1:
i = 1 i = 1
for proton in atom.get_bonded_elements('H'): for proton in atom.get_bonded_elements('H'):
proton.name = 'H%s%d'%(atom.name[1:],i) proton.name = 'H{0:s}{1:d}'.format(
proton.residue_label = "%-3s%4d%2s" % (proton.name,proton.res_num, proton.chain_id) atom.name[1:], i)
i+=1 proton.residue_label = "{0:<3s}{1:>4d}{2:>2s}".format(
proton.name, proton.res_num, proton.chain_id)
i += 1
debug('added', new_h, 'to', atom)
def set_bond_distance(self, bvec, element):
"""Set bond distance between atom and element.
debug('added',new_H, 'to',atom) Args:
return bvec: bond vector
element: bonded element
def set_bond_distance(self, a, element): Returns:
d = 1.0 scaled bond vector
"""
dist = 1.0
if element in list(self.bond_lengths.keys()): if element in list(self.bond_lengths.keys()):
d = self.bond_lengths[element] dist = self.bond_lengths[element]
else: else:
warning('Bond length for %s not found, using the standard value of %f' % (element, d)) str_ = (
'Bond length for {0:s} not found, using the standard value '
a = a.rescale(d) 'of {1:f}'.format(element, dist))
warning(str_)
return a bvec = bvec.rescale(dist)
return bvec
if __name__ == '__main__':
import protein, pdb, sys,os
arguments = sys.argv
if len(arguments) != 2:
info('Usage: protonate.py <pdb_file>')
sys.exit(0)
filename = arguments[1]
if not os.path.isfile(filename):
info('Error: Could not find \"%s\"' % filename)
sys.exit(1)
p = Protonate()
pdblist = pdb.readPDB(filename)
my_protein = protein.Protein(pdblist,'test.pdb')
p.remove_all_hydrogen_atoms_from_protein(my_protein)
my_protein.writePDB('before_protonation.pdb')
p.protonate_protein(my_protein)
## write out protonated file
my_protein.writePDB('protonated.pdb')

View File

@@ -1,24 +1,24 @@
# entry point for propka script """Entry point for PROPKA script."""
import logging import logging
import propka.lib, propka.molecular_container from propka.lib import loadOptions
from propka.molecular_container import Molecular_container
_LOGGER = logging.getLogger("PROPKA") _LOGGER = logging.getLogger("PROPKA")
def main(): def main(optargs=None):
""" """Read in structure files, calculate pKa values, and print pKa files."""
Reads in structure files, calculates pKa values, and prints pKa files # loading options, flags and arguments
""" optargs = optargs if optargs is not None else []
# loading options, flaggs and arguments options = loadOptions(*optargs)
options = propka.lib.loadOptions()
pdbfiles = options.filenames pdbfiles = options.filenames
for pdbfile in pdbfiles: for pdbfile in pdbfiles:
my_molecule = propka.molecular_container.Molecular_container(pdbfile, options) my_molecule = Molecular_container(pdbfile, options)
my_molecule.calculate_pka() my_molecule.calculate_pka()
my_molecule.write_pka() my_molecule.write_pka()
def single(pdbfile, optargs=None): def single(pdbfile, optargs=None):
"""Run a single PROPKA calculation using *pdbfile* as input. """Run a single PROPKA calculation using *pdbfile* as input.
@@ -27,15 +27,15 @@ def single(pdbfile, optargs=None):
.. rubric:: Example .. rubric:: Example
:: ::
single("protein.pdb", optargs=["--mutation=N25R/N181D", "-v", "--pH=7.2"]) single("protein.pdb", optargs=["--mutation=N25R/N181D", "-v",
"--pH=7.2"])
""" """
optargs = optargs if optargs is not None else [] optargs = optargs if optargs is not None else []
options = propka.lib.loadOptions(*optargs) options = loadOptions(*optargs)
pdbfile = options.filenames.pop(0) pdbfile = options.filenames.pop(0)
if len(options.filenames) > 0: if len(options.filenames) > 0:
_LOGGER.warning("Ignoring filenames: %s", options.filenames) _LOGGER.warning("Ignoring filenames: {0:s}".format(options.filenames))
my_molecule = Molecular_container(pdbfile, options)
my_molecule = propka.molecular_container.Molecular_container(pdbfile, options)
my_molecule.calculate_pka() my_molecule.calculate_pka()
my_molecule.write_pka() my_molecule.write_pka()
return my_molecule return my_molecule

View File

@@ -1,11 +1,21 @@
from __future__ import division """Vector algebra for PROPKA."""
from __future__ import print_function
import math import math
from propka.lib import info from propka.lib import info, get_sorted_configurations
class vector:
""" Vector """ class Vector:
def __init__(self, xi=0.0, yi=0.0, zi=0.0, atom1 = 0, atom2 = 0): """Vector"""
def __init__(self, xi=0.0, yi=0.0, zi=0.0, atom1=None, atom2=None):
"""Initialize vector.
Args:
xi: default x-coordinate
yi: default y-coordinate
zi: default z-coordinate
atom1: atom center used to define default coordinate
atom2: two atom centers used to define vector
"""
self.x = xi self.x = xi
self.y = yi self.y = yi
self.z = zi self.z = zi
@@ -22,224 +32,280 @@ class vector:
self.y = atom2.y - self.y self.y = atom2.y - self.y
self.z = atom2.z - self.z self.z = atom2.z - self.z
return
def __add__(self, other): def __add__(self, other):
return vector(self.x + other.x, return Vector(self.x + other.x,
self.y + other.y, self.y + other.y,
self.z + other.z) self.z + other.z)
def __sub__(self, other): def __sub__(self, other):
return vector(self.x - other.x, return Vector(self.x - other.x,
self.y - other.y, self.y - other.y,
self.z - other.z) self.z - other.z)
def __mul__(self, other):
""" Dot product, scalar and matrix multiplication """
if isinstance(other,vector): def __mul__(self, other):
"""Dot product, scalar and matrix multiplication."""
if isinstance(other, Vector):
return self.x * other.x + self.y * other.y + self.z * other.z return self.x * other.x + self.y * other.y + self.z * other.z
elif isinstance(other, matrix4x4): elif isinstance(other, Matrix4x4):
return vector( return Vector(
xi = other.a11*self.x + other.a12*self.y + other.a13*self.z + other.a14*1.0, xi=other.a11*self.x + other.a12*self.y + other.a13*self.z
yi = other.a21*self.x + other.a22*self.y + other.a23*self.z + other.a24*1.0, + other.a14*1.0,
zi = other.a31*self.x + other.a32*self.y + other.a33*self.z + other.a34*1.0 yi=other.a21*self.x + other.a22*self.y + other.a23*self.z
+ other.a24*1.0,
zi=other.a31*self.x + other.a32*self.y + other.a33*self.z
+ other.a34*1.0
) )
elif type(other) in [int, float]: elif type(other) in [int, float]:
return vector(self.x * other, self.y * other, self.z * other) return Vector(self.x * other, self.y * other, self.z * other)
else: else:
info('%s not supported' % type(other)) info('{0:s} not supported'.format(type(other)))
raise TypeError raise TypeError
def __rmul__(self,other): def __rmul__(self, other):
return self.__mul__(other) return self.__mul__(other)
def __pow__(self, other): def __pow__(self, other):
""" Cross product """ """Cross product."""
return vector(self.y * other.z - self.z * other.y, return Vector(self.y * other.z - self.z * other.y,
self.z * other.x - self.x * other.z, self.z * other.x - self.x * other.z,
self.x * other.y - self.y * other.x) self.x * other.y - self.y * other.x)
def __neg__(self): def __neg__(self):
res = vector(xi = -self.x, res = Vector(xi=-self.x,
yi = -self.y, yi=-self.y,
zi = -self.z) zi=-self.z)
return res return res
def sq_length(self): def sq_length(self):
"""Return vector squared-length"""
return self.x * self.x + self.y * self.y + self.z * self.z return self.x * self.x + self.y * self.y + self.z * self.z
def length(self): def length(self):
"""Return vector length."""
return math.sqrt(self.sq_length()) return math.sqrt(self.sq_length())
def __str__(self): def __str__(self):
return '%10.4f %10.4f %10.4f'%(self.x, self.y, self.z) return '{0:>10.4f} {1:>10.4f} {2:>10.4f}'.format(
self.x, self.y, self.z)
def __repr__(self): def __repr__(self):
return '<vector>' return '<vector>'
def orthogonal(self): def orthogonal(self):
""" Returns a vector orthogonal to self """ """ Returns a vector orthogonal to self """
res = vector(self.y, -self.x, 0) res = Vector(self.y, -self.x, 0)
if abs(self.y) < abs(self.z): if abs(self.y) < abs(self.z):
res = vector(self.z, 0, -self.x) res = Vector(self.z, 0, -self.x)
return res return res
def rescale(self, new_length): def rescale(self, new_length):
""" Rescale vector to new length while preserving direction """ """ Rescale vector to new length while preserving direction """
frac = new_length/(self.length()) frac = new_length/(self.length())
res = vector(xi = self.x*frac, res = Vector(xi=self.x*frac, yi=self.y*frac, zi=self.z*frac)
yi = self.y*frac,
zi = self.z*frac)
return res return res
class matrix4x4: class Matrix4x4:
"""A 4-by-4 matrix class."""
def __init__(self, def __init__(self,
a11i=0.0, a12i=0.0, a13i=0.0, a14i=0.0, a11i=0.0, a12i=0.0, a13i=0.0, a14i=0.0,
a21i=0.0, a22i=0.0, a23i=0.0, a24i=0.0, a21i=0.0, a22i=0.0, a23i=0.0, a24i=0.0,
a31i=0.0, a32i=0.0, a33i=0.0, a34i=0.0, a31i=0.0, a32i=0.0, a33i=0.0, a34i=0.0,
a41i=0.0, a42i=0.0, a43i=0.0, a44i=0.0): a41i=0.0, a42i=0.0, a43i=0.0, a44i=0.0):
"""Initialize with matrix elements."""
# Row 1
self.a11 = a11i self.a11 = a11i
self.a12 = a12i self.a12 = a12i
self.a13 = a13i self.a13 = a13i
self.a14 = a14i self.a14 = a14i
# Row 2
self.a21 = a21i self.a21 = a21i
self.a22 = a22i self.a22 = a22i
self.a23 = a23i self.a23 = a23i
self.a24 = a24i self.a24 = a24i
# Row 3
self.a31 = a31i self.a31 = a31i
self.a32 = a32i self.a32 = a32i
self.a33 = a33i self.a33 = a33i
self.a34 = a34i self.a34 = a34i
# Row 4
self.a41 = a41i self.a41 = a41i
self.a42 = a42i self.a42 = a42i
self.a43 = a43i self.a43 = a43i
self.a44 = a44i self.a44 = a44i
return
def angle(avec, bvec):
"""Get the angle between two vectors.
Args:
avec: vector 1
bvec: vector 2
Returns:
angle in radians
"""
dot = avec * bvec
return math.acos(dot / (avec.length() * bvec.length()))
def angle_degrees(avec, bvec):
"""Get the angle between two vectors in degrees.
Args:
avec: vector 1
bvec: vector 2
Returns:
angle in degrees
"""
return math.degrees(angle(avec, bvec))
def signed_angle_around_axis(avec, bvec, axis):
"""Get signed angle of two vectors around axis in radians.
# methods working on vectors Args:
avec: vector 1
bvec: vector 2
axis: axis
Returns:
angle in radians
"""
norma = avec**axis
normb = bvec**axis
ang = angle(norma, normb)
dot_ = bvec*(avec**axis)
if dot_ < 0:
ang = -ang
return ang
def angle(a, b): def rotate_vector_around_an_axis(theta, axis, vec):
dot = a * b """Rotate vector around an axis.
return math.acos(dot / (a.length() * b.length()))
Args:
def angle_degrees(a,b): theta: rotation angle (in radians)
return math.degrees(angle(a, b)) axis: axis for rotation
vec: vector to rotate
Returns:
def signed_angle_around_axis(a,b, axis): rotated vector
na = a**axis """
nb = b**axis gamma = 0.0
if axis.y != 0:
v = angle(na,nb)
d = b*(a**axis)
if d < 0:
v =-v
return v
def signed_angle_degrees(a,b):
return 180/math.pi * signed_angle(a, b)
def rotate_vector_around_an_axis(theta, axis, v):
#print "# 1. rotate space about the z-axis so that the rotation axis lies in the xz-plane"
gamma = 0.0
if axis.y != 0:
if axis.x != 0:
gamma = -axis.x/abs(axis.x)*math.asin(axis.y/(math.sqrt(axis.x*axis.x + axis.y*axis.y)))
else:
gamma = math.pi/2.0
Rz = rotate_atoms_around_z_axis(gamma)
v = Rz * v
axis = Rz * axis
#print "# 2. rotate space about the y-axis so that the rotation axis lies along the z-axis"
beta = 0.0
if axis.x != 0: if axis.x != 0:
beta = -axis.x/abs(axis.x)*math.acos(axis.z/math.sqrt(axis.x*axis.x + axis.z*axis.z)) gamma = -axis.x/abs(axis.x)*math.asin(
Ry = rotate_atoms_around_y_axis(beta) axis.y/(math.sqrt(axis.x*axis.x + axis.y*axis.y)))
v = Ry * v else:
axis = Ry *axis gamma = math.pi/2.0
rot_z = rotate_atoms_around_z_axis(gamma)
vec = rot_z * vec
axis = rot_z * axis
beta = 0.0
if axis.x != 0:
beta = -axis.x/abs(axis.x)*math.acos(
axis.z/math.sqrt(axis.x*axis.x + axis.z*axis.z))
rot_y = rotate_atoms_around_y_axis(beta)
vec = rot_y * vec
axis = rot_y * axis
rot_z = rotate_atoms_around_z_axis(theta)
vec = rot_z * vec
rot_y = rotate_atoms_around_y_axis(-beta)
vec = rot_y * vec
rot_z = rotate_atoms_around_z_axis(-gamma)
vec = rot_z * vec
return vec
#print "# 3. perform the desired rotation by theta about the z-axis"
Rz = rotate_atoms_around_z_axis(theta)
v = Rz * v
#print "# 4. apply the inverse of step 2." def rotate_atoms_around_z_axis(theta):
Ry = rotate_atoms_around_y_axis(-beta) """Get rotation matrix for z-axis.
v = Ry * v
#print "# 5. apply the inverse of step 1."
Rz = rotate_atoms_around_z_axis(-gamma)
v = Rz * v
return v
def rotate_atoms_around_z_axis(angle): Args:
Rz = matrix4x4( theta: angle of rotation (radians)
a11i = math.cos(angle), a12i = -math.sin(angle), a13i = 0.0, a14i = 0.0, Returns:
a21i = math.sin(angle), a22i = math.cos(angle), a23i = 0.0, a24i = 0.0, rotation matrix
a31i = 0.0 , a32i = 0.0 , a33i = 1.0, a34i = 0.0, """
a41i = 0.0 , a42i = 0.0 , a43i = 0.0, a44i = 1.0 return Matrix4x4(
a11i=math.cos(theta),
a12i=-math.sin(theta),
a13i=0.0,
a14i=0.0,
a21i=math.sin(theta),
a22i=math.cos(theta),
a23i=0.0,
a24i=0.0,
a31i=0.0,
a32i=0.0,
a33i=1.0,
a34i=0.0,
a41i=0.0,
a42i=0.0,
a43i=0.0,
a44i=1.0
) )
return Rz
def rotate_atoms_around_y_axis(angle): def rotate_atoms_around_y_axis(theta):
Ry = matrix4x4( """Get rotation matrix for y-axis.
a11i = math.cos(angle), a12i = 0.0, a13i = math.sin(angle), a14i = 0.0,
a21i = 0.0 , a22i = 1.0, a23i = 0.0 , a24i = 0.0, Args:
a31i = -math.sin(angle), a32i = 0.0, a33i = math.cos(angle), a34i = 0.0, theta: angle of rotation (radians)
a41i = 0.0 , a42i = 0.0, a43i = 0.0 , a44i = 1.0 Returns:
rotation matrix
"""
return Matrix4x4(
a11i=math.cos(theta),
a12i=0.0,
a13i=math.sin(theta),
a14i=0.0,
a21i=0.0,
a22i=1.0,
a23i=0.0,
a24i=0.0,
a31i=-math.sin(theta),
a32i=0.0,
a33i=math.cos(theta),
a34i=0.0,
a41i=0.0,
a42i=0.0,
a43i=0.0,
a44i=1.0
) )
return Ry
class MultiVector:
"""Collection of vectors for multiple configurations of atoms.
class multi_vector: TODO - this class does not appear to be used or covered by tests
def __init__(self, atom1=0, atom2=0): """
def __init__(self, atom1=None, atom2=None):
"""Initialize with atom configurations.
Args:
atom1: first atom to define vector
atom2: second atom to define vector
"""
self.vectors = [] self.vectors = []
self.keys = [] self.keys = []
self.result = None
# store vectors for all configurations of atoms # store vectors for all configurations of atoms
if atom1!=0: if atom1 is not None:
self.keys = lib.get_sorted_configurations(atom1.configurations.keys()) self.keys = get_sorted_configurations(atom1.configurations.keys())
if atom2!=0: if atom2 is not None:
keys2 = lib.get_sorted_configurations(atom2.configurations.keys()) keys2 = get_sorted_configurations(atom2.configurations.keys())
if self.keys != keys2: if self.keys != keys2:
raise 'Cannot make multi vector: Atomic configurations mismatch for\n %s\n %s\n'%(atom1,atom2) str_ = ('Cannot make multi vector: Atomic configurations '
'mismatch for\n {0:s}\n {1:s}\n'.format(
atom1, atom2))
raise KeyError(str_)
for key in self.keys: for key in self.keys:
atom1.setConfiguration(key) atom1.setConfiguration(key)
if atom2!=0: if atom2 != 0:
atom2.setConfiguration(key) atom2.setConfiguration(key)
v = vector(atom1=atom1, atom2=atom2) vec = Vector(atom1=atom1, atom2=atom2)
self.vectors.append(v) self.vectors.append(vec)
#info(key,v)
return def __getattribute__(self, name):
def __getattribute__(self,name):
try: try:
return object.__getattribute__(self, name) return object.__getattribute__(self, name)
except AttributeError: except AttributeError:
@@ -247,72 +313,103 @@ class multi_vector:
def __str__(self): def __str__(self):
res = '' res = ''
for i in range(len(self.keys)): for i, key in enumerate(self.keys):
res += '%s %s\n'%(self.keys[i], self.vectors[i]) res += '{0:s} {1:s}\n'.format(key, self.vectors[i])
return res return res
def do_job(self, job): def do_job(self, job):
#info(job) """Append vectors to configuration.
self.res = multi_vector()
for i in range(len(self.vectors)): Args:
self.res.vectors.append(eval('self.vectors[%d].%s()'%(i,job))) job: name of function to apply to vectors
self.res.keys.append(self.keys[i]) Returns:
TODO - figure out what this is
"""
self.result = MultiVector()
for i, vector in enumerate(self.vectors):
func = getattr(vector, job)
self.result.vectors.append(func())
self.result.keys.append(self.keys[i])
return self.get_result return self.get_result
@property
def get_result(self): def get_result(self):
return self.res """Return the latest result."""
return self.result
def generic_operation(self, operation, other):
if self.keys != other.keys:
raise 'Incompatable keys'
self.res = multi_vector() def generic_operation(self, operation, other):
"""Perform a generic operation between two MultiVector objects.
Args:
operation: operation to perform (string)
other: other MultiVector object
"""
if self.keys != other.keys:
raise 'Incompatible keys'
self.result = MultiVector()
for i in range(len(self.vectors)): for i in range(len(self.vectors)):
self.res.vectors.append(eval('self.vectors[%d] %s other.vectors[%d]'%(i,operation,i))) self.result.vectors.append(
self.res.keys.append(self.keys[i]) # TODO - eliminate eval() or entire class
return eval(
'self.vectors[{0:d}] {1:s} other.vectors[{2:d}]'.format(
i, operation, i)))
self.result.keys.append(self.keys[i])
def __add__(self, other): def __add__(self, other):
self.generic_operation('+',other) self.generic_operation('+', other)
return self.res return self.result
def __sub__(self, other): def __sub__(self, other):
self.generic_operation('-',other) self.generic_operation('-', other)
return self.res return self.result
def __mul__(self, other): def __mul__(self, other):
self.generic_operation('*',other) self.generic_operation('*', other)
return self.res return self.result
def __pow__(self, other): def __pow__(self, other):
self.generic_operation('**',other) self.generic_operation('**', other)
return self.res return self.result
def generic_self_operation(self, operation): @staticmethod
def generic_self_operation(_):
"""TODO - delete this."""
return return
def __neg__(self): def __neg__(self):
self.generic_operation('*',-1.0) self.generic_operation('*', -1.0)
return self.res return self.result
def rescale(self, new_length): def rescale(self, new_length):
self.res = multi_vector() """Rescale multi-vector to new length.
for i in range(len(self.vectors)):
self.res.vectors.append(self.vectors[i].rescale(new_length)) Args:
self.res.keys.append(self.keys[i]) new_length: new length for multi-vector
Result:
MultiVector object
"""
self.result = MultiVector()
for i, vector in enumerate(self.vectors):
self.result.vectors.append(vector.rescale(new_length))
self.result.keys.append(self.keys[i])
return self.res return self.res
def rotate_multi_vector_around_an_axis(theta, axis, v): def rotate_multi_vector_around_an_axis(theta, axis, vec):
""" both axis ans v must be multi_vectors """ """Rotate a multi-vector around an axis.
if axis.keys != v.keys: NOTE - both axis ans v must be MultiVectors.
raise 'Incompatible keys in rotate multi_vector'
Args:
res = multi_vector() theta: angle (in radians)
for i in range(len(v.keys)): axis: multi-vector axis
res.vectors.append(rotate_vector_around_an_axis(theta, axis.vectors[i], v.vectors[i])) vec: multi-vector vector
res.keys.append(v.keys[i]) """
if axis.keys != vec.keys:
raise 'Incompatible keys in rotate MultiVector'
res = MultiVector()
for i, key in enumerate(vec.keys):
res.vectors.append(rotate_vector_around_an_axis(
theta, axis.vectors[i], vec.vectors[i]))
res.keys.append(key)
return res return res

View File

@@ -1,216 +1,290 @@
from __future__ import division """Contains version-specific methods and parameters.
from __future__ import print_function
import math
import sys, os
import propka.lib as lib TODO - this module unnecessarily confuses the code. Can we eliminate it?
from propka.lib import info, warning """
import propka.calculations as calculations from propka.lib import info
import propka.parameters import propka.calculations as calcs
class version: class Version:
def __init__(self,parameters): """Store version-specific methods and parameters."""
def __init__(self, parameters):
self.parameters = parameters self.parameters = parameters
return self.desolvation_model = self.empty_function
self.weight_pair_method = self.empty_function
self.hydrogen_bond_interaction_model = self.empty_function
self.sidechain_interaction_model = self.empty_function
self.electrostatic_interaction_model = self.empty_function
self.coulomb_interaction_model = self.empty_function
self.check_coulomb_pair_method = self.empty_function
self.backbone_reorganisation_method = self.empty_function
self.exception_check_method = self.empty_function
self.molecular_preparation_method = self.empty_function
self.prepare_bonds = self.empty_function
@staticmethod
def empty_function(*args):
"""Placeholder function so we don't use uninitialized variables.
Args:
args: whatever arguments would have been passed to the function
Raises:
NotImplementedError
"""
err = "Called an empty Version function with args {0:s}".format(args)
raise NotImplementedError(err)
# desolvation
def calculate_desolvation(self, group): def calculate_desolvation(self, group):
"""Calculate desolvation energy using assigned model."""
return self.desolvation_model(self.parameters, group) return self.desolvation_model(self.parameters, group)
def calculatePairWeight(self, Nmass1, Nmass2): def calculate_pair_weight(self, num_volume1, num_volume2):
return self.weight_pair_method(self.parameters, Nmass1, Nmass2) """Calculate pair weight using assigned model."""
return self.weight_pair_method(
self.parameters, num_volume1, num_volume2)
# side chains
def hydrogen_bond_interaction(self, group1, group2): def hydrogen_bond_interaction(self, group1, group2):
"""Calculate H-bond energy using assigned model."""
return self.hydrogen_bond_interaction_model(group1, group2, self) return self.hydrogen_bond_interaction_model(group1, group2, self)
def calculateSideChainEnergy(self, distance, dpka_max, cutoff, weight, f_angle): def calculate_side_chain_energy(self, distance, dpka_max, cutoff, _,
return self.sidechain_interaction_model(distance, dpka_max, cutoff, f_angle) # weight is ignored in 3.0 Sep07 f_angle):
"""Calculate sidechain energy using assigned model."""
return self.sidechain_interaction_model(
distance, dpka_max, cutoff, f_angle)
# coulomb
def electrostatic_interaction(self, group1, group2, distance): def electrostatic_interaction(self, group1, group2, distance):
return self.electrostatic_interaction_model(group1, group2, distance, self) """Calculate electrostatic energy using assigned model."""
return self.electrostatic_interaction_model(
group1, group2, distance, self)
def calculateCoulombEnergy(self, distance, weight): def calculate_coulomb_energy(self, distance, weight):
return self.coulomb_interaction_model(distance, weight, self.parameters) """Calculate Coulomb energy using assigned model."""
return self.coulomb_interaction_model(
distance, weight, self.parameters)
def checkCoulombPair(self, group1, group2, distance): def check_coulomb_pair(self, group1, group2, distance):
return self.check_coulomb_pair_method(self.parameters, group1, group2, distance) """Check Coulomb pair using assigned model."""
return self.check_coulomb_pair_method(
self.parameters, group1, group2, distance)
# backbone re-organisation def calculate_backbone_reorganization(self, conformation):
def calculateBackBoneReorganization(self, conformation): """Calculate backbone reorganization using assigned model."""
return self.backbone_reorganisation_method(self.parameters, conformation) return self.backbone_reorganisation_method(
self.parameters, conformation)
# exceptions def check_exceptions(self, group1, group2):
def checkExceptions(self, group1, group2): """Calculate exceptions using assigned model."""
return self.exception_check_method(self, group1, group2) return self.exception_check_method(self, group1, group2)
def setup_bonding_and_protonation(self, molecular_container): def setup_bonding_and_protonation(self, molecular_container):
return self.molecular_preparation_method(self.parameters, molecular_container) """Setup bonding and protonation using assigned model."""
return self.molecular_preparation_method(
self.parameters, molecular_container)
def setup_bonding(self, molecular_container): def setup_bonding(self, molecular_container):
"""Setup bonding using assigned model."""
return self.prepare_bonds(self.parameters, molecular_container) return self.prepare_bonds(self.parameters, molecular_container)
class VersionA(Version):
"""TODO - figure out what this is."""
class version_A(version):
def __init__(self, parameters): def __init__(self, parameters):
"""Initialize object with parameters."""
# set the calculation rutines used in this version # set the calculation rutines used in this version
version.__init__(self, parameters) super().__init__(parameters)
self.molecular_preparation_method = calcs.setup_bonding_and_protonation
# atom naming, bonding, and protonation self.prepare_bonds = calcs.setup_bonding
self.molecular_preparation_method = propka.calculations.setup_bonding_and_protonation self.desolvation_model = calcs.radial_volume_desolvation
self.prepare_bonds = propka.calculations.setup_bonding self.weight_pair_method = calcs.calculate_pair_weight
self.sidechain_interaction_model = calcs.hydrogen_bond_energy
self.hydrogen_bond_interaction_model = calcs.hydrogen_bond_interaction
# desolvation related methods self.electrostatic_interaction_model = calcs.electrostatic_interaction
self.desolvation_model = calculations.radial_volume_desolvation self.check_coulomb_pair_method = calcs.check_coulomb_pair
self.weight_pair_method = calculations.calculatePairWeight self.coulomb_interaction_model = calcs.coulomb_energy
self.backbone_interaction_model = calcs.hydrogen_bond_energy
# side chain methods self.backbone_reorganisation_method = calcs.backbone_reorganization
self.sidechain_interaction_model = propka.calculations.HydrogenBondEnergy self.exception_check_method = calcs.check_exceptions
self.hydrogen_bond_interaction_model = propka.calculations.hydrogen_bond_interaction
# colomb methods
self.electrostatic_interaction_model = propka.calculations.electrostatic_interaction
self.check_coulomb_pair_method = propka.calculations.checkCoulombPair
self.coulomb_interaction_model = propka.calculations.CoulombEnergy
#backbone
self.backbone_interaction_model = propka.calculations.HydrogenBondEnergy
self.backbone_reorganisation_method = propka.calculations.BackBoneReorganization
# exception methods
self.exception_check_method = propka.calculations.checkExceptions
return
def get_hydrogen_bond_parameters(self, atom1, atom2): def get_hydrogen_bond_parameters(self, atom1, atom2):
"""Get hydrogen bond parameters for two atoms.
Args:
atom1: first atom
atom2: second atom
Returns:
[dpka_max, cutoff]
"""
dpka_max = self.parameters.sidechain_interaction dpka_max = self.parameters.sidechain_interaction
cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type) cutoff = self.parameters.sidechain_cutoffs.get_value(
atom1.group_type, atom2.group_type)
return [dpka_max, cutoff] return [dpka_max, cutoff]
def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom): def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom):
"""Get hydrogen bond parameters between backbone atom and other atom.
Args:
backbone_atom: backbone atom
atom: other atom
Returns
[v, [c1, c3]] TODO - figure out what this is
"""
if backbone_atom.group_type == 'BBC': if backbone_atom.group_type == 'BBC':
if atom.group_type in self.parameters.backbone_CO_hydrogen_bond.keys(): if atom.group_type in self.parameters.backbone_CO_hydrogen_bond.keys():
[v,c1,c2] = self.parameters.backbone_CO_hydrogen_bond[atom.group_type] [v, c1, c2] = self.parameters.backbone_CO_hydrogen_bond[
return [v,[c1,c2]] atom.group_type]
return [v, [c1, c2]]
if backbone_atom.group_type == 'BBN': if backbone_atom.group_type == 'BBN':
if atom.group_type in self.parameters.backbone_NH_hydrogen_bond.keys(): if atom.group_type in self.parameters.backbone_NH_hydrogen_bond.keys():
[v,c1,c2] = self.parameters.backbone_NH_hydrogen_bond[atom.group_type] [v, c1, c2] = self.parameters.backbone_NH_hydrogen_bond[
return [v,[c1,c2]] atom.group_type]
return [v, [c1, c2]]
return None return None
class SimpleHB(VersionA):
"""A simple hydrogen bond version."""
class simple_hb(version_A):
def __init__(self, parameters): def __init__(self, parameters):
"""Initialize object with parameters."""
# set the calculation rutines used in this version # set the calculation rutines used in this version
version_A.__init__(self, parameters) super().__init__(parameters)
info('Using simple hb model') info('Using simple hb model')
return
def get_hydrogen_bond_parameters(self, atom1, atom2): def get_hydrogen_bond_parameters(self, atom1, atom2):
return self.parameters.hydrogen_bonds.get_value(atom1.element, atom2.element) """Get hydrogen bond parameters for two atoms.
Args:
atom1: first atom
atom2: second atom
Returns:
[dpka_max, cutoff]
"""
return self.parameters.hydrogen_bonds.get_value(
atom1.element, atom2.element)
def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom): def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom):
return self.parameters.hydrogen_bonds.get_value(backbone_atom.element, atom.element) """Get hydrogen bond parameters between backbone atom and other atom.
Args:
backbone_atom: backbone atom
atom: other atom
Returns
[v, [c1, c3]] TODO - figure out what this is
"""
return self.parameters.hydrogen_bonds.get_value(
backbone_atom.element, atom.element)
class ElementBasedLigandInteractions(VersionA):
"""TODO - figure out what this is."""
class element_based_ligand_interactions(version_A):
def __init__(self, parameters): def __init__(self, parameters):
"""Initialize object with parameters."""
# set the calculation rutines used in this version # set the calculation rutines used in this version
version_A.__init__(self, parameters) super().__init__(parameters)
info('Using detailed SC model!') info('Using detailed SC model!')
return return
def get_hydrogen_bond_parameters(self, atom1, atom2): def get_hydrogen_bond_parameters(self, atom1, atom2):
if not 'hetatm' in [atom1.type, atom2.type]: """Get hydrogen bond parameters for two atoms.
# this is a protein-protein interaction
dpka_max = self.parameters.sidechain_interaction.get_value(atom1.group_type, atom2.group_type)
cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type)
return [dpka_max, cutoff]
Args:
atom1: first atom
atom2: second atom
Returns:
[dpka_max, cutoff]
"""
if 'hetatm' not in [atom1.type, atom2.type]:
# this is a protein-protein interaction
dpka_max = self.parameters.sidechain_interaction.get_value(
atom1.group_type, atom2.group_type)
cutoff = self.parameters.sidechain_cutoffs.get_value(
atom1.group_type, atom2.group_type)
return [dpka_max, cutoff]
# at least one ligand atom is involved in this interaction # at least one ligand atom is involved in this interaction
# make sure that we are using the heavy atoms for finding paramters # make sure that we are using the heavy atoms for finding paramters
elements = [] elements = []
for a in [atom1, atom2]: for atom in [atom1, atom2]:
if a.element == 'H': elements.append(a.bonded_atoms[0].element) if atom.element == 'H':
else: elements.append(a.element) elements.append(atom.bonded_atoms[0].element)
else:
return self.parameters.hydrogen_bonds.get_value(elements[0], elements[1]) elements.append(atom.element)
return self.parameters.hydrogen_bonds.get_value(
elements[0], elements[1])
def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom): def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom):
"""Get hydrogen bond parameters between backbone atom and other atom.
Args:
backbone_atom: backbone atom
atom: other atom
Returns
[v, [c1, c3]] TODO - figure out what this is
"""
if atom.type == 'atom': if atom.type == 'atom':
# this is a backbone-protein interaction # this is a backbone-protein interaction
if backbone_atom.group_type == 'BBC' and\ if (backbone_atom.group_type == 'BBC'
atom.group_type in self.parameters.backbone_CO_hydrogen_bond.keys(): and atom.group_type
[v,c1,c2] = self.parameters.backbone_CO_hydrogen_bond[atom.group_type] in self.parameters.backbone_CO_hydrogen_bond.keys()):
return [v,[c1,c2]] [v, c1, c2] = self.parameters.backbone_CO_hydrogen_bond[
atom.group_type]
return [v, [c1, c2]]
if backbone_atom.group_type == 'BBN' and\ if (backbone_atom.group_type == 'BBN'
atom.group_type in self.parameters.backbone_NH_hydrogen_bond.keys(): and atom.group_type
[v,c1,c2] = self.parameters.backbone_NH_hydrogen_bond[atom.group_type] in self.parameters.backbone_NH_hydrogen_bond.keys()):
return [v,[c1,c2]] [v, c1, c2] = self.parameters.backbone_NH_hydrogen_bond[
atom.group_type]
return [v, [c1, c2]]
else: else:
# this is a backbone-ligand interaction # this is a backbone-ligand interaction
# make sure that we are using the heavy atoms for finding paramters # make sure that we are using the heavy atoms for finding paramters
elements = [] elements = []
for a in [backbone_atom, atom]: for atom2 in [backbone_atom, atom]:
if a.element == 'H': elements.append(a.bonded_atoms[0].element) if atom2.element == 'H':
else: elements.append(a.element) elements.append(atom2.bonded_atoms[0].element)
else:
res = self.parameters.hydrogen_bonds.get_value(elements[0], elements[1]) elements.append(atom2.element)
res = self.parameters.hydrogen_bonds.get_value(
elements[0], elements[1])
if not res: if not res:
info('Could not determine backbone interaction parameters for:', info(
backbone_atom, atom) 'Could not determine backbone interaction parameters for:',
backbone_atom, atom)
return return None
return None return None
class Propka30(Version):
"""Version class for PROPKA 3.0."""
class propka30(version):
def __init__(self, parameters): def __init__(self, parameters):
# set the calculation rutines used in this version """Initialize object with parameters."""
version.__init__(self, parameters) # set the calculation routines used in this version
super().__init__(parameters)
# atom naming, bonding, and protonation self.molecular_preparation_method = (
self.molecular_preparation_method = propka.calculations.setup_bonding_and_protonation_30_style calcs.setup_bonding_and_protonation_30_style)
self.desolvation_model = calcs.radial_volume_desolvation
# desolvation related methods self.weight_pair_method = calcs.calculate_pair_weight
self.desolvation_model = calculations.radial_volume_desolvation self.sidechain_interaction_model = calcs.hydrogen_bond_energy
self.weight_pair_method = calculations.calculatePairWeight self.check_coulomb_pair_method = calcs.check_coulomb_pair
self.coulomb_interaction_model = calcs.coulomb_energy
# side chain methods self.backbone_reorganisation_method = calcs.backbone_reorganization
self.sidechain_interaction_model = propka.calculations.HydrogenBondEnergy self.exception_check_method = calcs.check_exceptions
# colomb methods
self.check_coulomb_pair_method = propka.calculations.checkCoulombPair
self.coulomb_interaction_model = propka.calculations.CoulombEnergy
#backbone
self.backbone_reorganisation_method = propka.calculations.BackBoneReorganization
# exception methods
self.exception_check_method = propka.calculations.checkExceptions
return
def get_hydrogen_bond_parameters(self, atom1, atom2): def get_hydrogen_bond_parameters(self, atom1, atom2):
dpka_max = self.parameters.sidechain_interaction.get_value(atom1.group_type, atom2.group_type) """Get hydrogen bond parameters for two atoms.
cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type)
Args:
atom1: first atom
atom2: second atom
Returns:
[dpka_max, cutoff]
"""
dpka_max = self.parameters.sidechain_interaction.get_value(
atom1.group_type, atom2.group_type)
cutoff = self.parameters.sidechain_cutoffs.get_value(
atom1.group_type, atom2.group_type)
return [dpka_max, cutoff] return [dpka_max, cutoff]

View File

@@ -1,32 +1,30 @@
#!/usr/bin/env python #!/usr/bin/env python
"""PROPKA script.
# This is the original propka script. However, this distribute-based This is the original propka script. However, this distribute-based
# installation moved the main() function into propka.run.main and just installation moved the main() function into propka.run.main and just
# generates a script called propka31 from the setup.py installation generates a script called propka31 from the setup.py installation
# script. You should not need to use this script. script. You should not need to use this script.
#
# (Also note that there can be import problems because the script name (Also note that there can be import problems because the script name
# is the same as the module name; that's why the new script is called is the same as the module name; that's why the new script is called
# propka31.) propka31.)
"""
from propka.lib import loadOptions
from propka.molecular_container import Molecular_container
import propka.lib, propka.molecular_container
def main(): def main():
""" """Read in structure files, calculates pKa values, and prints pKa files."""
Reads in structure files, calculates pKa values, and prints pKa files
"""
# loading options, flaggs and arguments # loading options, flaggs and arguments
options = propka.lib.loadOptions() options = loadOptions([])
pdbfiles = options.filenames pdbfiles = options.filenames
for pdbfile in pdbfiles: for pdbfile in pdbfiles:
my_molecule = propka.molecular_container.Molecular_container(pdbfile, options) my_molecule = Molecular_container(pdbfile, options)
my_molecule.calculate_pka() my_molecule.calculate_pka()
my_molecule.write_pka() my_molecule.write_pka()
if __name__ == '__main__': if __name__ == '__main__':
#import cProfile
#cProfile.run('main()',sort=1)
main() main()

View File

@@ -22,7 +22,8 @@ MAX_ERR_DECIMALS = 2
TEST_DIR = Path("tests") TEST_DIR = Path("tests")
# Location for test PDBs # Location for test PDBs
PDB_DIR = Path("pdb") PDB_DIR = Path("pdb")
# Location for results for comparing output (allow running from tests/ and ../tests/) # Location for results for comparing output (allow running from tests/ and
# ../tests/)
RESULTS_DIR = Path("tests/results") RESULTS_DIR = Path("tests/results")
if not RESULTS_DIR.is_dir(): if not RESULTS_DIR.is_dir():
_LOGGER.warning("Switching to sub-directory") _LOGGER.warning("Switching to sub-directory")
@@ -47,7 +48,9 @@ def get_test_dirs():
if test_path.is_dir(): if test_path.is_dir():
path_dict[key] = test_path path_dict[key] = test_path
else: else:
errstr = "Can't find %s test files in %s" % (key, [TEST_DIR / path, path]) errstr = (
"Can't find {0:s} test files in {1:s}".format(
key, [TEST_DIR / path, path]))
raise FileNotFoundError(errstr) raise FileNotFoundError(errstr)
return path_dict return path_dict
@@ -63,11 +66,13 @@ def run_propka(options, pdb_path, tmp_path):
options += [str(pdb_path)] options += [str(pdb_path)]
args = propka.lib.loadOptions(options) args = propka.lib.loadOptions(options)
try: try:
_LOGGER.warning("Working in tmpdir %s because of PROPKA file output; need to fix this.", _LOGGER.warning(
tmp_path) "Working in tmpdir {0:s} because of PROPKA file output; "
"need to fix this.".format(str(tmp_path)))
cwd = Path.cwd() cwd = Path.cwd()
os.chdir(tmp_path) os.chdir(tmp_path)
molecule = propka.molecular_container.Molecular_container(str(pdb_path), args) molecule = propka.molecular_container.Molecular_container(
str(pdb_path), args)
molecule.calculate_pka() molecule.calculate_pka()
molecule.write_pka() molecule.write_pka()
finally: finally:
@@ -90,7 +95,7 @@ def compare_output(pdb, tmp_path, ref_path):
ref_data.append(float(line)) ref_data.append(float(line))
test_data = [] test_data = []
pka_path = Path(tmp_path) / ("%s.pka" % pdb) pka_path = Path(tmp_path) / ("{0:s}.pka".format(pdb))
with open(pka_path, "rt") as pka_file: with open(pka_path, "rt") as pka_file:
at_pka = False at_pka = False
for line in pka_file: for line in pka_file:
@@ -100,12 +105,15 @@ def compare_output(pdb, tmp_path, ref_path):
elif line.startswith("---"): elif line.startswith("---"):
at_pka = False at_pka = False
else: else:
m = re.search(r'([0-9]+\.[0-9]+)', line) match = re.search(r'([0-9]+\.[0-9]+)', line)
value = float(m.group(0)) value = float(match.group(0))
test_data.append(value) test_data.append(value)
errstr = "Error exceeds maximum allowed value (%d decimal places)" % MAX_ERR_DECIMALS errstr = (
assert_almost_equal(test_data, ref_data, decimal=MAX_ERR_DECIMALS, "Error exceeds maximum allowed value ({0:d} decimal places)".format(
err_msg=errstr, verbose=True) MAX_ERR_DECIMALS))
assert_almost_equal(
test_data, ref_data, decimal=MAX_ERR_DECIMALS, err_msg=errstr,
verbose=True)
@pytest.mark.parametrize("pdb, options", [ @pytest.mark.parametrize("pdb, options", [
@@ -113,24 +121,26 @@ def compare_output(pdb, tmp_path, ref_path):
pytest.param('1HPX', [], id="1HPX: no options"), pytest.param('1HPX', [], id="1HPX: no options"),
pytest.param('4DFR', [], id="4DFR: no options"), pytest.param('4DFR', [], id="4DFR: no options"),
pytest.param('3SGB', [], id="3SGB: no options"), pytest.param('3SGB', [], id="3SGB: no options"),
pytest.param('3SGB-subset', ["--titrate_only", pytest.param('3SGB-subset', [
"E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"], "--titrate_only",
id="3SGB: --titrate_only"), "E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"],
id="3SGB: --titrate_only"),
pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet")]) pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet")])
def test_regression(pdb, options, tmp_path): def test_regression(pdb, options, tmp_path):
"""Basic regression test of PROPKA functionality.""" """Basic regression test of PROPKA functionality."""
path_dict = get_test_dirs() path_dict = get_test_dirs()
ref_path = path_dict["results"] / ("%s.dat" % pdb) ref_path = path_dict["results"] / ("{0:s}.dat".format(pdb))
if ref_path.is_file(): if ref_path.is_file():
ref_path = ref_path.resolve() ref_path = ref_path.resolve()
else: else:
_LOGGER.warning("Missing results file for comparison: %s", ref_path) _LOGGER.warning("Missing results file for comparison: {0:s}".format(
str(ref_path)))
ref_path = None ref_path = None
pdb_path = path_dict["pdbs"] / ("%s.pdb" % pdb) pdb_path = path_dict["pdbs"] / ("{0:s}.pdb".format(pdb))
if pdb_path.is_file(): if pdb_path.is_file():
pdb_path = pdb_path.resolve() pdb_path = pdb_path.resolve()
else: else:
errstr = "Missing PDB file: %s" % pdb_path errstr = "Missing PDB file: {0:s}".format(pdb_path)
raise FileNotFoundError(errstr) raise FileNotFoundError(errstr)
tmp_path = Path(tmp_path).resolve() tmp_path = Path(tmp_path).resolve()

60
tests/test_hybrid36.py Normal file
View File

@@ -0,0 +1,60 @@
"""Test the hybrid36 module."""
import unittest
import propka.hybrid36 as hybrid36
class Hybrid36Test(unittest.TestCase):
"""Test class for hybrid36."""
def test_decode(self):
"""Test decoding functions."""
test_values = {
"99999": 99999,
"A0000": 100000,
"0": 0,
"9": 9,
"A": 10,
" ZZZZY": 43770014,
"ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000
"a0000": 43770016,
"zzzzz": 87440031,
"zzzzy": 87440030,
"99": 99,
"A0": 100,
"ZZ": 1035,
"zz": 1971,
"-99999": -99999,
"-A0000": -100000,
"-0": 0,
"-9": -9,
"-A": -10,
"-ZZZZY": -43770014,
"-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000
"-a0000": -43770016,
"-zzzzz": -87440031,
"-zzzzy": -87440030,
"-99": -99,
"-A0": -100,
"-ZZ": -1035,
"-zz": -1971,
"PROPKA": 954495146,
"A001Z": 100071,
"B0000": 1779616,
}
for key, value in test_values.items():
self.assertEqual(hybrid36.decode(key), value)
def test_errors(self):
"""Test values that should raise errors."""
test_values = [
"99X99",
"X9-99",
"XYZa",
"",
"-",
"!NotOk",
]
for value in test_values:
with self.assertRaises(ValueError) as err:
hybrid36.decode(value)
self.assertTrue(value in str(err.exception))