diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..fb68c9c --- /dev/null +++ b/.pylintrc @@ -0,0 +1,2 @@ +[MESSAGES CONTROL] +disable = no-else-return \ No newline at end of file diff --git a/propka/atom.py b/propka/atom.py index b93f051..1e87af2 100644 --- a/propka/atom.py +++ b/propka/atom.py @@ -5,15 +5,35 @@ import propka.group from . import hybrid36 +# Format strings that get used in multiple places (or are very complex) +PKA_FMT = "{:6.2f}" +INPUT_LINE_FMT = ( + "{type:6s}{r.numb:>5d} {atom_label} {r.res_name}{r.chain_id:>2s}" + "{r.res_num:>4d}{r.x:>12.3f}{r.y:>8.3f}{r.z:>8.3f}{group:>6s}{pka:>6s} \n") +PDB_LINE_FMT1 = ( + "{type:6s}{r.numb:>5d} {atom_label} {r.res_name}{r.chain_id:>2s}" + "{r.res_num:>4d}{r.x:>12.3f}{r.y:>8.3f}{r.z:>8.3f}{r.occ:>6s}" + "{r.beta:>6s}\n") +MOL2_LINE_FMT = ( + "{id:<4d} {atom_label:4s} " + "{r.x:>10.4f} {r.y:>10.4f} {r.z:>10.4f} " + "{r.sybyl_type:>6s} {r.res_num:>6d} {r.res_name:>10s} 0.0000\n") +PDB_LINE_FMT2 = ( + "ATOM {numb:>6d} {atom_label} {res_name}{chain_id:>2s}{res_num:>4d}" + "{x:>12.3f}{y:>8.3f}{z:>8.3f}{occ:>6.2f}{beta:>6.2f}\n") +STR_FMT = ( + "{r.numb:>5d}-{r.name:>4s} {r.res_num:>5d}-{r.res_name:>3s} " + "({r.chain_id:1s}) [{r.x:>8.3f} {r.y:>8.3f} {r.z:>8.3f}] {r.element:s}") + + class Atom(object): """Atom class - contains all atom information found in the PDB file""" - def __init__(self, line=None, verbose=False): + def __init__(self, line=None): """Initialize Atom object. Args: line: Line from a PDB file to set properties of atom. - verbose: TODO - this does not appear to be used. Can we remove it? """ self.occ = None self.numb = None @@ -48,7 +68,8 @@ class Atom(object): self.num_pi_elec_conj_2_3_bonds = 0 self.groups_extracted = 0 self.set_properties(line) - self.residue_label = "%-3s%4d%2s" % (self.name, self.res_num, self.chain_id) + fmt = "{r.name:3s}{r.res_num:>4d}{r.chain_id:>2s}" + self.residue_label = fmt.format(r=self) # ligand atom types self.sybyl_type = '' @@ -82,13 +103,14 @@ class Atom(object): self.y = float(line[38:46].strip()) self.z = float(line[46:54].strip()) self.res_num = int(line[22:26].strip()) - self.res_name = "%-3s" % (line[17:20].strip()) + self.res_name = "{0:<3s}".format(line[17:20].strip()) self.chain_id = line[21] # Set chain id to "_" if it is just white space. if not self.chain_id.strip(): self.chain_id = '_' self.type = line[:6].strip().lower() + # TODO - define nucleic acid residue names elsewhere if self.res_name in ['DA ', 'DC ', 'DG ', 'DT ']: self.type = 'hetatm' @@ -101,7 +123,8 @@ class Atom(object): if len(self.name) == 4: self.element = self.element[0] if len(self.element) == 2: - self.element = '%1s%1s' % (self.element[0], self.element[1].lower()) + self.element = '{0:1s}{1:1s}'.format( + self.element[0], self.element[1].lower()) def set_group_type(self, type_): """Set group type of atom. @@ -130,9 +153,9 @@ class Atom(object): array of bonded atoms. """ res = [] - for ba in self.bonded_atoms: - if ba.element == element: - res.append(ba) + for bond_atom in self.bonded_atoms: + if bond_atom.element == element: + res.append(bond_atom) return res def get_bonded_heavy_atoms(self): @@ -156,12 +179,14 @@ class Atom(object): if ba == other_atom: return True if max_bonds > cur_bond: - if ba.is_atom_within_bond_distance(other_atom, max_bonds, cur_bond+1): + if ba.is_atom_within_bond_distance(other_atom, max_bonds, + cur_bond+1): return True return False def set_property(self, numb=None, name=None, res_name=None, chain_id=None, - res_num=None, x=None, y=None, z=None, occ=None, beta=None): + res_num=None, x=None, y=None, z=None, occ=None, + beta=None): """Set properties of the atom object. Args: @@ -225,7 +250,8 @@ class Atom(object): """PDB line for this atom. TODO - Could be @property method/attribute - TODO - figure out difference between make_pdb_line, make_input_line, and make_pdb_line2 + TODO - figure out difference between make_pdb_line, make_input_line, + and make_pdb_line2 Returns: String with PDB-format line. @@ -238,12 +264,11 @@ class Atom(object): group = 'C-' ## circumventing C-/COO parameter unification if self.group.titratable: - model_pka = '%6.2f'%self.group.model_pka - str_ = "%-6s%5d %s " % (self.type.upper(), self.numb, - propka.lib.makeTidyAtomLabel(self.name, self.element)) - str_ += "%s%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s \n" % (self.res_name, self.chain_id, - self.res_num, self.x, self.y, - self.z, group, model_pka) + model_pka = PKA_FMT.format(self.group.model_pka) + str_ = INPUT_LINE_FMT.format( + type=self.type.upper(), r=self, + atom_label=propka.lib.make_tidy_atom_label(self.name, self.element), + group=group, pka=model_pka) return str_ def make_conect_line(self): @@ -252,15 +277,15 @@ class Atom(object): Returns: String with PDB line. """ - res = 'CONECT%5d' % self.numb + res = 'CONECT{0:5d}'.format(self.numb) bonded = [] for atom in self.bonded_atoms: bonded.append(atom.numb) bonded.sort() - for b in bonded: - res += '%5d'%b + for bond in bonded: + res += '{0:5d}'.format(bond) res += '\n' return res @@ -290,10 +315,15 @@ class Atom(object): self.occ = self.occ.replace('LG', 'non_titratable_ligand') # try to initialise the group try: - # TODO - get rid of this exec() statement for security reasons - exec('self.group = propka.group.%s_group(self)' % self.occ) + group_attr = "{0:s}_group".format(self.occ) + group_attr = getattr(propka.group, group_attr) + self.group = group_attr(self) except: - raise Exception('%s in input_file is not recognized as a group' % self.occ) + # TODO - be more specific with expection handling here + str_ = ( + '{0:s} in input_file is not recognized as a group'.format( + self.occ)) + raise Exception(str_) # set the model pKa value if self.beta != '-': self.group.model_pka = float(self.beta) @@ -306,17 +336,15 @@ class Atom(object): """Create PDB line. TODO - this could/should be a @property method/attribute - TODO - figure out difference between make_pdb_line, make_input_line, and make_pdb_line2 + TODO - figure out difference between make_pdb_line, make_input_line, + and make_pdb_line2 Returns: String with PDB line. """ - str_ = "%-6s%5d " % (self.type.upper(), self.numb) - str_ += "%s %s" % (propka.lib.makeTidyAtomLabel(self.name, self.element), - self.res_name) - str_ += "%2s%4d%12.3lf%8.3lf%8.3lf%6s%6s\n" % (self.chain_id, self.res_num, - self.x, self.y, self.z, - self.occ, self.beta) + str_ = PDB_LINE_FMT1.format( + type=self.type.upper(), r=self, + atom_label=propka.lib.make_tidy_atom_label(self.name, self.element)) return str_ def make_mol2_line(self, id_): @@ -329,19 +357,19 @@ class Atom(object): Returns: String with MOL2 line. """ - str_ = "%-4d %-4s " % (id_, propka.lib.makeTidyAtomLabel(self.name, - self.element)) - str_ += "%10.4f %10.4f %10.4f " % (self.x, self.y, self.z) - str_ += "%6s %6d %10s %10.4f\n" % (self.sybyl_type.replace('-', ''), - self.res_num, self.res_name, 0.0) + str_ = MOL2_LINE_FMT.format( + id=id_, r=self, + atom_label=propka.lib.make_tidy_atom_label(self.name, self.element)) return str_ def make_pdb_line2(self, numb=None, name=None, res_name=None, chain_id=None, - res_num=None, x=None, y=None, z=None, occ=None, beta=None): + res_num=None, x=None, y=None, z=None, occ=None, + beta=None): """Create a PDB line. TODO - this could/should be a @property method/attribute - TODO - figure out difference between make_pdb_line, make_input_line, and make_pdb_line2 + TODO - figure out difference between make_pdb_line, make_input_line, + and make_pdb_line2 Returns: String with PDB line. @@ -366,18 +394,11 @@ class Atom(object): occ = self.occ if beta is None: beta = self.beta - str_ = "ATOM " - str_ += "%6d" % (numb) - str_ += " %s" % (propka.lib.makeTidyAtomLabel(name, self.element)) - str_ += " %s" % (res_name) - str_ += "%2s" % (chain_id) - str_ += "%4d" % (res_num) - str_ += "%12.3lf" % (x) - str_ += "%8.3lf" % (y) - str_ += "%8.3lf" % (z) - str_ += "%6.2lf" % (occ) - str_ += "%6.2lf" % (beta) - str_ += '\n' + str_ = PDB_LINE_FMT2.format( + numb=numb, res_name=res_name, chain_id=chain_id, res_num=res_num, + x=x, y=y, z=z, occ=occ, beta=beta, + atom_label=propka.lib.make_tidy_atom_label(name, self.element) + ) return str_ def get_tidy_label(self): @@ -387,14 +408,11 @@ class Atom(object): Returns: String with label""" - return propka.lib.makeTidyAtomLabel(self.name, self.element) + return propka.lib.make_tidy_atom_label(self.name, self.element) def __str__(self): """Return an undefined-format string version of this atom.""" - return '%5d-%4s %5d-%3s (%1s) [%8.3f %8.3f %8.3f] %s' % (self.numb, self.name, - self.res_num, self.res_name, - self.chain_id, self.x, self.y, - self.z, self.element) + return STR_FMT.format(r=self) def set_residue(self, residue): """ Makes a reference to the parent residue diff --git a/propka/bonds.py b/propka/bonds.py index 8e7ef66..c6656df 100644 --- a/propka/bonds.py +++ b/propka/bonds.py @@ -1,16 +1,10 @@ """PROPKA representation of bonds.""" -# TODO - is pickle still used? -import pickle -# TODO - eliminate use of sys -import sys -# TODO - eliminate use of os -import os import math import json import pkg_resources import propka.calculations # TODO - replace the info/warning imports with logging functionality -from propka.lib import info, warning +from propka.lib import info # TODO - should these constants be defined higher up in the module? @@ -30,18 +24,23 @@ class BondMaker: """ def __init__(self): # predefined bonding distances - self.distances = {'S-S' : DISULFIDE_DISTANCE, 'F-F' : FLUORIDE_DISTANCE} + self.distances = {'S-S' : DISULFIDE_DISTANCE, + 'F-F' : FLUORIDE_DISTANCE} self.distances_squared = {} - for k in self.distances.keys(): - self.distances_squared[k] = self.distances[k] * self.distances[k] - self.H_dist = HYDROGEN_DISTANCE + for key in self.distances: + self.distances_squared[key] = ( + self.distances[key] * self.distances[key]) + h_dist = HYDROGEN_DISTANCE self.default_dist = DEFAULT_DISTANCE - self.H_dist_squared = self.H_dist * self.H_dist + self.h_dist_squared = h_dist * h_dist self.default_dist_squared = self.default_dist * self.default_dist - distances = list(self.distances_squared.values()) + [self.default_dist_squared] + distances = ( + list(self.distances_squared.values()) + + [self.default_dist_squared]) self.max_sq_distance = max(distances) # protein bonding data - self.data_file_name = pkg_resources.resource_filename(__name__, 'protein_bonds.json') + self.data_file_name = ( + pkg_resources.resource_filename(__name__, 'protein_bonds.json')) with open(self.data_file_name, 'rt') as json_file: self.protein_bonds = json.load(json_file) self.intra_residue_backbone_bonds = {'N': ['CA'], 'CA': ['N', 'C'], @@ -74,9 +73,13 @@ class BondMaker: self.num_pi_elec_conj_bonds_ligands = {'N.am': 1, 'N.pl3': 1} self.backbone_atoms = list(self.intra_residue_backbone_bonds.keys()) self.terminal_oxygen_names = ['OXT', 'O\'\''] + self.boxes = {} + self.num_box_x = None + self.num_box_y = None + self.num_box_z = None def find_bonds_for_protein(self, protein): - """Finds bonds proteins based on the way atoms normally bond in proteins. + """Bonds proteins based on the way atoms normally bond. Args: protein: the protein to search for bonds @@ -92,9 +95,12 @@ class BondMaker: last_residues = [] for chain in protein.chains: for i in range(1, len(chain.residues)): - if chain.residues[i-1].res_name.replace(' ', '') not in ['N+', 'C-']: - if chain.residues[i].res_name.replace(' ', '') not in ['N+', 'C-']: - self.connect_backbone(chain.residues[i-1], chain.residues[i]) + if (chain.residues[i-1].res_name.replace(' ', '') + not in ['N+', 'C-']): + if (chain.residues[i].res_name.replace(' ', '') + not in ['N+', 'C-']): + self.connect_backbone(chain.residues[i-1], + chain.residues[i]) last_residues.append(chain.residues[i]) info('++++ terminal oxygen ++++') # terminal OXT @@ -121,7 +127,8 @@ class BondMaker: if atom1.name == 'SG': for atom2 in cys2.atoms: if atom2.name == 'SG': - dist = propka.calculations.squared_distance(atom1, atom2) + dist = propka.calculations.squared_distance(atom1, + atom2) # TODO - is SS_dist_squared an attribute of this class? if dist < self.SS_dist_squared: self.make_bond(atom1, atom2) @@ -138,7 +145,6 @@ class BondMaker: if atom2.name == 'C': self.make_bond(atom1, atom2) - # TODO - stopped here. def connect_backbone(self, residue1, residue2): """Sets up bonds in the backbone @@ -152,8 +158,8 @@ class BondMaker: if atom1.name == 'C': for atom2 in residue2.atoms: if atom2.name == 'N': - if propka.calculations.squared_distance(atom1, atom2) \ - < self.default_dist_squared: + if (propka.calculations.squared_distance(atom1, atom2) + < self.default_dist_squared): self.make_bond(atom1, atom2) def find_bonds_for_residue_backbone(self, residue): @@ -164,17 +170,18 @@ class BondMaker: """ for atom1 in residue.atoms: if atom1.name in list(self.num_pi_elec_bonds_backbone.keys()): - atom1.num_pi_elec_2_3_bonds \ - = self.num_pi_elec_bonds_backbone[atom1.name] - if atom1.name in \ - list(self.num_pi_elec_conj_bonds_backbone.keys()) \ - and len(atom1.bonded_atoms) > 1: # last part to avoid including N-term - atom1.num_pi_elec_conj_2_3_bonds \ - = self.num_pi_elec_conj_bonds_backbone[atom1.name] + atom1.num_pi_elec_2_3_bonds = ( + self.num_pi_elec_bonds_backbone[atom1.name]) + if atom1.name in ( + list(self.num_pi_elec_conj_bonds_backbone.keys()) + and len(atom1.bonded_atoms) > 1): # avoid N-term + atom1.num_pi_elec_conj_2_3_bonds = ( + self.num_pi_elec_conj_bonds_backbone[atom1.name]) if atom1.name in self.backbone_atoms: for atom2 in residue.atoms: - if atom2.name in self.intra_residue_backbone_bonds[atom1.name]: + if atom2.name in ( + self.intra_residue_backbone_bonds[atom1.name]): self.make_bond(atom1, atom2) def find_bonds_for_side_chain(self, atoms): @@ -184,18 +191,19 @@ class BondMaker: atoms: list of atoms to check for bonds """ for atom1 in atoms: - key = '%s-%s' % (atom1.res_name, atom1.name) + key = '{0:s}-{1:s}'.format(atom1.res_name, atom1.name) if key in list(self.num_pi_elec_bonds_sidechains.keys()): - atom1.num_pi_elec_2_3_bonds \ - = self.num_pi_elec_bonds_sidechains[key] + atom1.num_pi_elec_2_3_bonds = ( + self.num_pi_elec_bonds_sidechains[key]) if key in list(self.num_pi_elec_conj_bonds_sidechains.keys()): - atom1.num_pi_elec_conj_2_3_bonds \ - = self.num_pi_elec_conj_bonds_sidechains[key] - + atom1.num_pi_elec_conj_2_3_bonds = ( + self.num_pi_elec_conj_bonds_sidechains[key]) if not atom1.name in self.backbone_atoms: if not atom1.name in self.terminal_oxygen_names: for atom2 in atoms: - if atom2.name in self.protein_bonds[atom1.res_name][atom1.name]: + if atom2.name in ( + self + .protein_bonds[atom1.res_name][atom1.name]): self.make_bond(atom1, atom2) def find_bonds_for_ligand(self, ligand): @@ -219,25 +227,30 @@ class BondMaker: # for ligands if atom.type == 'hetatm': if atom.sybyl_type in self.num_pi_elec_bonds_ligands.keys(): - atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_ligands[atom.sybyl_type] - if atom.sybyl_type in self.num_pi_elec_conj_bonds_ligands.keys(): - atom.num_pi_elec_conj_2_3_bonds \ - = self.num_pi_elec_conj_bonds_ligands[atom.sybyl_type] + atom.num_pi_elec_2_3_bonds = ( + self.num_pi_elec_bonds_ligands[atom.sybyl_type]) + if atom.sybyl_type in ( + self.num_pi_elec_conj_bonds_ligands.keys()): + atom.num_pi_elec_conj_2_3_bonds = ( + self.num_pi_elec_conj_bonds_ligands[atom.sybyl_type]) # for protein if atom.type == 'atom': - key = '%s-%s' % (atom.res_name, atom.name) + key = '{0:s}-{1:s}'.format(atom.res_name, atom.name) if key in list(self.num_pi_elec_bonds_sidechains.keys()): - atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_sidechains[key] + atom.num_pi_elec_2_3_bonds = ( + self.num_pi_elec_bonds_sidechains[key]) if key in list(self.num_pi_elec_conj_bonds_sidechains.keys()): - atom.num_pi_elec_conj_2_3_bonds = self.num_pi_elec_conj_bonds_sidechains[key] - + atom.num_pi_elec_conj_2_3_bonds = ( + self.num_pi_elec_conj_bonds_sidechains[key]) if atom.name in list(self.num_pi_elec_bonds_backbone.keys()): - atom.num_pi_elec_2_3_bonds = self.num_pi_elec_bonds_backbone[atom.name] - if atom.name in list(self.num_pi_elec_conj_bonds_backbone.keys()) \ - and len(atom.bonded_atoms) > 1: + atom.num_pi_elec_2_3_bonds = ( + self.num_pi_elec_bonds_backbone[atom.name]) + if atom.name in list( + self.num_pi_elec_conj_bonds_backbone.keys()) and ( + len(atom.bonded_atoms) > 1): # last part to avoid including N-term - atom.num_pi_elec_conj_2_3_bonds \ - = self.num_pi_elec_conj_bonds_backbone[atom.name] + atom.num_pi_elec_conj_2_3_bonds = ( + self.num_pi_elec_conj_bonds_backbone[atom.name]) def find_bonds_for_protein_by_distance(self, molecule): """Finds bonds for all atoms in the molecule. @@ -287,9 +300,9 @@ class BondMaker: sq_dist = propka.calculations.squared_distance(atom1, atom2) if sq_dist > self.max_sq_distance: return False - key = '%s-%s' % (atom1.element, atom2.element) + key = '{0:s}-{1:s}'.format(atom1.element, atom2.element) h_count = key.count('H') - if sq_dist < self.H_dist_squared and h_count == 1: + if sq_dist < self.h_dist_squared and h_count == 1: return True if sq_dist < self.default_dist_squared and h_count == 0: return True @@ -305,7 +318,8 @@ class BondMaker: molecules: list of molecules for finding bonds. """ for name in molecules.conformation_names: - self.find_bonds_for_atoms_using_boxes(molecules.conformations[name].atoms) + self.find_bonds_for_atoms_using_boxes( + molecules.conformations[name].atoms) def add_pi_electron_information(self, molecules): """Add pi electron information to a molecule. @@ -314,7 +328,8 @@ class BondMaker: molecules: list of molecules for adding pi electron information. """ for name in molecules.conformation_names: - self.add_pi_electron_table_info(molecules.conformations[name].atoms) + self.add_pi_electron_table_info( + molecules.conformations[name].atoms) def find_bonds_for_atoms_using_boxes(self, atoms): """Finds all bonds for a list of atoms. @@ -370,16 +385,17 @@ class BondMaker: z: box z-coordinates atom: the atom to place in a box """ - for bx in [x, x+1]: - for by in [y, y+1]: - for bz in [z, z+1]: - key = (bx, by, bz) + for box_x in [x, x+1]: + for box_y in [y, y+1]: + for box_z in [z, z+1]: + key = (box_x, box_y, box_z) try: self.boxes[key].append(atom) except KeyError: pass - def has_bond(self, atom1, atom2): + @staticmethod + def has_bond(atom1, atom2): """Look for bond between two atoms. Args: @@ -392,7 +408,8 @@ class BondMaker: return True return False - def make_bond(self, atom1, atom2): + @staticmethod + def make_bond(atom1, atom2): """Makes a bond between atom1 and atom2 Args: @@ -418,10 +435,12 @@ class BondMaker: name_i = atom.name resi_j = bonded_atom.res_name name_j = bonded_atom.name - if not name_i in self.backbone_atoms or\ - not name_j in self.backbone_atoms: - if not name_i in self.terminal_oxygen_names and\ - not name_j in self.terminal_oxygen_names: + if not name_i in ( + self.backbone_atoms + or not name_j in self.backbone_atoms): + if not name_i in ( + self.terminal_oxygen_names + and not name_j in self.terminal_oxygen_names): if not resi_i in list(self.protein_bonds.keys()): self.protein_bonds[resi_i] = {} if not name_i in self.protein_bonds[resi_i]: diff --git a/propka/calculations.py b/propka/calculations.py index ca0f5a6..3770983 100644 --- a/propka/calculations.py +++ b/propka/calculations.py @@ -1,31 +1,99 @@ """PROPKA calculations.""" import math -import copy -import sys import propka.protonate import propka.bonds -from propka.lib import info, warning +from propka.lib import warning, info + + +# TODO - this file should be broken into three separate files: +# * calculations.py - includes basic functions for calculating distances, etc. +# * hydrogen.py - includes bonding and protonation functions +# * energy.py - includes energy functions (dependent on distance functions) + + +# TODO - the next set of functions form a distinct "module" for distance calculation + + +# Maximum distance used to bound calculations of smallest distance +MAX_DISTANCE = 1e6 + + +def squared_distance(atom1, atom2): + """Calculate the squared distance between two atoms. + + Args: + atom1: first atom for distance calculation + atom2: second atom for distance calculation + Returns: + distance squared + """ + dx = atom2.x - atom1.x + dy = atom2.y - atom1.y + dz = atom2.z - atom1.z + res = dx*dx+dy*dy+dz*dz + return res + + +def distance(atom1, atom2): + """Calculate the distance between two atoms. + + Args: + atom1: first atom for distance calculation + atom2: second atom for distance calculation + Returns: + distance + """ + return math.sqrt(squared_distance(atom1, atom2)) + + +def get_smallest_distance(atoms1, atoms2): + """Calculate the smallest distance between two groups of atoms. + + Args: + atoms1: atom group 1 + atoms2: atom group 2 + Returns: + smallest distance between groups + """ + res_dist = MAX_DISTANCE + res_atom1 = None + res_atom2 = None + for atom1 in atoms1: + for atom2 in atoms2: + dist = squared_distance(atom1, atom2) + if dist < res_dist: + res_dist = dist + res_atom1 = atom1 + res_atom2 = atom2 + return [res_atom1, math.sqrt(res_dist), res_atom2] + + +# TODO - the next set of functions form a distinct "module" for hydrogen addition def setup_bonding_and_protonation(parameters, molecular_container): """Set up bonding and protonation for a molecule. + NOTE - the unused `parameters` argument is required for compatibility in version.py + TODO - figure out why there is a similar function in version.py + Args: + parameters: not used molecular_container: molecule container. """ # make bonds - my_bond_maker = setup_bonding(parameters, molecular_container) + my_bond_maker = setup_bonding(molecular_container) # set up ligand atom names set_ligand_atom_names(molecular_container) # apply information on pi electrons my_bond_maker.add_pi_electron_information(molecular_container) # Protonate atoms if molecular_container.options.protonate_all: - my_protonator = propka.protonate.Protonate(verbose=False) - my_protonator.protonate(molecular_container) + PROTONATOR = propka.protonate.Protonate(verbose=False) + PROTONATOR.protonate(molecular_container) -def setup_bonding(parameters, molecular_container): +def setup_bonding(molecular_container): """Set up bonding for a molecular container. Args: @@ -38,6 +106,60 @@ def setup_bonding(parameters, molecular_container): return my_bond_maker +def setup_bonding_and_protonation_30_style(parameters, molecular_container): + """Set up bonding for a molecular container. + + Args: + parameters: parameters for calculation + molecular_container: the molecular container in question + Returns: + BondMaker object + """ + # Protonate atoms + protonate_30_style(molecular_container) + # make bonds + bond_maker = propka.bonds.BondMaker() + bond_maker.find_bonds_for_molecules_using_boxes(molecular_container) + return bond_maker + + +def protonate_30_style(molecular_container): + """Protonate the molecule. + + Args: + molecular_container: molecule + """ + for name in molecular_container.conformation_names: + info('Now protonating', name) + # split atom into residues + curres = -1000000 + residue = [] + o_atom = None + c_atom = None + for atom in molecular_container.conformations[name].atoms: + if atom.res_num != curres: + curres = atom.res_num + if len(residue) > 0: + #backbone + [o_atom, c_atom] = add_backbone_hydrogen( + residue, o_atom, c_atom) + #arginine + if residue[0].res_name == 'ARG': + add_arg_hydrogen(residue) + #histidine + if residue[0].res_name == 'HIS': + add_his_hydrogen(residue) + #tryptophan + if residue[0].res_name == 'TRP': + add_trp_hydrogen(residue) + #amides + if residue[0].res_name in ['GLN', 'ASN']: + add_amd_hydrogen(residue) + residue = [] + if atom.type == 'atom': + residue.append(atom) + + def set_ligand_atom_names(molecular_container): """Set names for ligands in molecular container. @@ -48,801 +170,776 @@ def set_ligand_atom_names(molecular_container): molecular_container.conformations[name].set_ligand_atom_names() -def addArgHydrogen(residue): - """ - Adds Arg hydrogen atoms to residues according to the 'old way'. +def add_arg_hydrogen(residue): + """Adds Arg hydrogen atoms to residues according to the 'old way'. + + Args: + residue: arginine residue to protonate + Returns: + list of hydrogen atoms """ #info('Adding arg H',residue) for atom in residue: - if atom.name == "CD": - CD = atom + if atom.name == "CD": + cd_atom = atom elif atom.name == "CZ": - CZ = atom + cz_atom = atom elif atom.name == "NE": - NE = atom + ne_atom = atom elif atom.name == "NH1": - NH1 = atom + nh1_atom = atom elif atom.name == "NH2": - NH2 = atom + nh2_atom = atom + h1_atom = protonate_sp2(cd_atom, ne_atom, cz_atom) + h1_atom.name = "HE" + h2_atom = protonate_direction(nh1_atom, ne_atom, cz_atom) + h2_atom.name = "HN1" + h3_atom = protonate_direction(nh1_atom, ne_atom, cd_atom) + h3_atom.name = "HN2" + h4_atom = protonate_direction(nh2_atom, ne_atom, cz_atom) + h4_atom.name = "HN3" + h5_atom = protonate_direction(nh2_atom, ne_atom, h1_atom) + h5_atom.name = "HN4" + return [h1_atom, h2_atom, h3_atom, h4_atom, h5_atom] - H1 = protonateSP2([CD, NE, CZ]) - H1.name = "HE" - H2 = protonateDirection([NH1, NE, CZ]) - H2.name = "HN1" - H3 = protonateDirection([NH1, NE, CD]) - H3.name = "HN2" - H4 = protonateDirection([NH2, NE, CZ]) - H4.name = "HN3" - H5 = protonateDirection([NH2, NE, H1]) - H5.name = "HN4" - return [H1,H2,H3,H4,H5] +def add_his_hydrogen(residue): + """Adds His hydrogen atoms to residues according to the 'old way'. -def addHisHydrogen(residue): - """ - Adds His hydrogen atoms to residues according to the 'old way'. + Args: + residue: histidine residue to protonate """ for atom in residue: - if atom.name == "CG": - CG = atom + if atom.name == "CG": + cg_atom = atom elif atom.name == "ND1": - ND = atom + nd_atom = atom elif atom.name == "CD2": - CD = atom + cd_atom = atom elif atom.name == "CE1": - CE = atom + ce_atom = atom elif atom.name == "NE2": - NE = atom - HD = protonateSP2([CG, ND, CE]) - HD.name = "HND" - HE = protonateSP2([CD, NE, CE]) - HE.name = "HNE" - return + ne_atom = atom + hd_atom = protonate_sp2(cg_atom, nd_atom, ce_atom) + hd_atom.name = "HND" + he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom) + he_atom.name = "HNE" -def addTrpHydrogen(residue): + +def add_trp_hydrogen(residue): + """Adds Trp hydrogen atoms to residues according to the 'old way'. + + Args: + residue: tryptophan residue to protonate """ - Adds Trp hydrogen atoms to residues according to the 'old way'. - """ - CD = None - NE = None - DE = None + cd_atom = None + ne_atom = None for atom in residue: - if atom.name == "CD1": - CD = atom + if atom.name == "CD1": + cd_atom = atom elif atom.name == "NE1": - NE = atom + ne_atom = atom elif atom.name == "CE2": - CE = atom - if CD == None or NE == None or CE == None: - str = "Did not find all atoms in %s%4d - in %s" % (self.res_name, self.res_num, "addTrpHydrogen()") - info(str) - sys.exit(0) + ce_atom = atom + if (cd_atom is None) or (ne_atom is None) or (ce_atom is None): + str_ = "Unable to find all atoms for {0:s} {1:s}".format( + residue[0].res_name, residue[0].res_num) + raise ValueError(str_) + he_atom = protonate_sp2(cd_atom, ne_atom, ce_atom) + he_atom.name = "HNE" - HE = protonateSP2([CD, NE, CE]) - HE.name = "HNE" - return +def add_amd_hydrogen(residue): + """Adds Gln & Asn hydrogen atoms to residues according to the 'old way'. -def addAmdHydrogen(residue): + Args: + residue: glutamine or asparagine residue to protonate """ - Adds Gln & Asn hydrogen atoms to residues according to the 'old way'. - """ - C = None - O = None - N = None + c_atom = None + o_atom = None + n_atom = None for atom in residue: - if (atom.res_name == "GLN" and atom.name == "CD") or (atom.res_name == "ASN" and atom.name == "CG"): - C = atom - elif (atom.res_name == "GLN" and atom.name == "OE1") or (atom.res_name == "ASN" and atom.name == "OD1"): - O = atom - elif (atom.res_name == "GLN" and atom.name == "NE2") or (atom.res_name == "ASN" and atom.name == "ND2"): - N = atom + if ((atom.res_name == "GLN" and atom.name == "CD") + or (atom.res_name == "ASN" and atom.name == "CG")): + c_atom = atom + elif ((atom.res_name == "GLN" and atom.name == "OE1") + or (atom.res_name == "ASN" and atom.name == "OD1")): + o_atom = atom + elif ((atom.res_name == "GLN" and atom.name == "NE2") + or (atom.res_name == "ASN" and atom.name == "ND2")): + n_atom = atom + if (c_atom is None) or (o_atom is None) or (n_atom is None): + str_ = "Unable to find all atoms for {0:s} {1:s}".format( + residue[0].res_name, residue[0].res_num) + raise ValueError(str_) + h1_atom = protonate_direction(n_atom, o_atom, c_atom) + h1_atom.name = "HN1" + h2_atom = protonate_average_direction(n_atom, c_atom, o_atom) + h2_atom.name = "HN2" - if C == None or O == None or N == None: - str = "Did not find N, C and/or O in %s%4d - in %s" % (atom.res_name, atom.res_num, "addAmdHydrogen()") - info(str) - sys.exit(0) - H1 = protonateDirection([N, O, C]) - H1.name = "HN1" - H2 = protonateAverageDirection([N, C, O]) - H2.name = "HN2" +def add_backbone_hydrogen(residue, o_atom, c_atom): + """Adds hydrogen backbone atoms to residues according to the old way. - return + dR is wrong for the N-terminus (i.e. first residue) but it doesn't affect + anything at the moment. Could be improved, but works for now. -def addBackBoneHydrogen(residue, O, C): + Args: + residue: residue to protonate + o_atom: backbone oxygen atom + c_atom: backbone carbon atom + Returns: + [new backbone oxygen atom, new backbone carbon atom] """ - Adds hydrogen backbone atoms to residues according to the old way; dR is wrong for the N-terminus - (i.e. first residue) but it doesn't affect anything at the moment. Could be improved, but works - for now. - """ - - new_C = None - new_O = None - N = None - - + new_c_atom = None + new_o_atom = None + n_atom = None for atom in residue: if atom.name == "N": - N = atom + n_atom = atom if atom.name == "C": - new_C = atom + new_c_atom = atom if atom.name == "O": - new_O = atom - - - - - if None in [C,O,N]: - return [new_O,new_C] - - - if N.res_name == "PRO": - """ PRO doesn't have an H-atom; do nothing """ + new_o_atom = atom + if None in [c_atom, o_atom, n_atom]: + return [new_o_atom, new_c_atom] + if n_atom.res_name == "PRO": + # PRO doesn't have an H-atom; do nothing + pass else: - H = protonateDirection([N, O, C]) - H.name = "H" - - return [new_O,new_C] + h_atom = protonate_direction(n_atom, o_atom, c_atom) + h_atom.name = "H" + return [new_o_atom, new_c_atom] +def protonate_direction(x1_atom, x2_atom, x3_atom): + """Protonates an atom, x1_atom, given a direction. + New direction for x1_atom proton is (x2_atom -> x3_atom). - -def protonateDirection(list): + Args: + x1_atom: atom to be protonated + x2_atom: atom for direction + x3_atom: other atom for direction + Returns: + new hydrogen atom """ - Protonates an atom, X1, given a direction (X2 -> X3) [X1, X2, X3] + dx = (x3_atom.x - x2_atom.x) + dy = (x3_atom.y - x2_atom.y) + dz = (x3_atom.z - x2_atom.z) + length = math.sqrt(dx*dx + dy*dy + dz*dz) + x = x1_atom.x + dx/length + y = x1_atom.y + dy/length + z = x1_atom.z + dz/length + h_atom = make_new_h(x1_atom, x, y, z) + h_atom.name = "H" + return h_atom + + +def protonate_average_direction(x1_atom, x2_atom, x3_atom): + """Protonates an atom, x1_atom, given a direction. + + New direction for x1_atom is (x1_atom/x2_atom -> x3_atom). + Note, this one uses the average of x1_atom & x2_atom (N & O) unlike + the previous N - C = O + + Args: + x1_atom: atom to be protonated + x2_atom: atom for direction + x3_atom: other atom for direction + Returns: + new hydrogen atom """ - X1 = list[0] - X2 = list[1] - X3 = list[2] - - dX = (X3.x - X2.x) - dY = (X3.y - X2.y) - dZ = (X3.z - X2.z) - length = math.sqrt( dX*dX + dY*dY + dZ*dZ ) - x = X1.x + dX/length - y = X1.y + dY/length - z = X1.z + dZ/length + dx = (x3_atom.x + x1_atom.x)*0.5 - x2_atom.x + dy = (x3_atom.y + x1_atom.y)*0.5 - x2_atom.y + dz = (x3_atom.z + x1_atom.z)*0.5 - x2_atom.z + length = math.sqrt(dx*dx + dy*dy + dz*dz) + x = x1_atom.x + dx/length + y = x1_atom.y + dy/length + z = x1_atom.z + dz/length + h_atom = make_new_h(x1_atom, x, y, z) + h_atom.name = "H" + return h_atom - H = make_new_H(X1,x,y,z) - H.name = "H" +def protonate_sp2(x1_atom, x2_atom, x3_atom): + """Protonates a SP2 atom, given a list of atoms - - return H - - -def protonateAverageDirection(list): + Args: + x1_atom: atom to set direction + x2_atom: atom to be protonated + x3_atom: other atom to set direction + Returns: + new hydrogen atom """ - Protonates an atom, X1, given a direction (X1/X2 -> X3) [X1, X2, X3] - Note, this one uses the average of X1 & X2 (N & O) unlike the previous - N - C = O + dx = (x1_atom.x + x3_atom.x)*0.5 - x2_atom.x + dy = (x1_atom.y + x3_atom.y)*0.5 - x2_atom.y + dz = (x1_atom.z + x3_atom.z)*0.5 - x2_atom.z + length = math.sqrt(dx*dx + dy*dy + dz*dz) + x = x2_atom.x - dx/length + y = x2_atom.y - dy/length + z = x2_atom.z - dz/length + h_atom = make_new_h(x2_atom, x, y, z) + h_atom.name = "H" + return h_atom + + +def make_new_h(atom, x, y, z): + """Add a new hydrogen to an atom at the specified position. + + Args: + atom: atom to protonate + x: x position of hydrogen + y: y position of hydrogen + z: z position of hydrogen + Returns: + new hydrogen atom """ - X1 = list[0] - X2 = list[1] - X3 = list[2] - - dX = (X3.x + X1.x)*0.5 - X2.x - dY = (X3.y + X1.y)*0.5 - X2.y - dZ = (X3.z + X1.z)*0.5 - X2.z - - length = math.sqrt( dX*dX + dY*dY + dZ*dZ ) - x = X1.x + dX/length - y = X1.y + dY/length - z = X1.z + dZ/length - - H = make_new_H(X1,x,y,z) - H.name = "H" + new_h = propka.atom.Atom() + new_h.set_property( + numb=None, name='H{0:s}'.format(atom.name[1:]), + res_name=atom.res_name, chain_id=atom.chain_id, + res_num=atom.res_num, x=x, y=y, z=z, occ=None, beta=None) + new_h.element = 'H' + new_h.bonded_atoms = [atom] + new_h.charge = 0 + new_h.steric_number = 0 + new_h.number_of_lone_pairs = 0 + new_h.number_of_protons_to_add = 0 + new_h.num_pi_elec_2_3_bonds = 0 + atom.bonded_atoms.append(new_h) + atom.conformation_container.add_atom(new_h) + return new_h - - return H +# TODO - the remaining functions form a distinct "module" for desolvation -def protonateSP2(list): - """ - Protonates a SP2 atom, H2, given a list of [X1, X2, X3] - X1-X2-X3 - """ - X1 = list[0] - X2 = list[1] - X3 = list[2] - - dX = (X1.x + X3.x)*0.5 - X2.x - dY = (X1.y + X3.y)*0.5 - X2.y - dZ = (X1.z + X3.z)*0.5 - X2.z - length = math.sqrt( dX*dX + dY*dY + dZ*dZ ) - x = X2.x - dX/length - y = X2.y - dY/length - z = X2.z - dZ/length - - H = make_new_H(X2,x,y,z) - H.name = "H" - - return H - - -def make_new_H(atom, x,y,z): - - new_H = propka.atom.Atom() - new_H.set_property(numb = None, - name = 'H%s'%atom.name[1:], - res_name = atom.res_name, - chain_id = atom.chain_id, - res_num = atom.res_num, - x = x, - y = y, - z = z, - occ = None, - beta = None) - new_H.element = 'H' - - - new_H.bonded_atoms = [atom] - new_H.charge = 0 - new_H.steric_number = 0 - new_H.number_of_lone_pairs = 0 - new_H.number_of_protons_to_add = 0 - new_H.num_pi_elec_2_3_bonds = 0 - - atom.bonded_atoms.append(new_H) - atom.conformation_container.add_atom(new_H) - - return new_H - -######## 3.0 style protonation methods end - - - - - - - -# -# Desolvation methods -# +# TODO - I have no idea what these constants mean so I labeled them "UNK_" +UNK_MIN_DISTANCE = 2.75 +MIN_DISTANCE_4TH = math.pow(UNK_MIN_DISTANCE, 4) +UNK_DIELECTRIC1 = 160 +UNK_DIELECTRIC2 = 30 +UNK_PKA_SCALING1 = 244.12 +UNK_BACKBONE_DISTANCE1 = 6.0 +UNK_BACKBONE_DISTANCE2 = 3.0 +UNK_FANGLE_MIN = 0.001 +UNK_PKA_SCALING2 = 0.8 +COMBINED_NUM_BURIED_MAX = 900 +SEPARATE_NUM_BURIED_MAX = 400 def radial_volume_desolvation(parameters, group): + """Calculate desolvation terms for group. + + Args: + parameters: parameters for desolvation calculation + group: group of atoms for calculation + """ all_atoms = group.atom.conformation_container.get_non_hydrogen_atoms() volume = 0.0 - group.Nmass = 0 - min_distance_4th = 57.1914 # pow(2.75, 4) - + # TODO - Nathan really wants to rename the num_volume variable. + # He had to re-read the original paper to figure out what it was. + # A better name would be num_volume. + group.num_volume = 0 + min_dist_4th = MIN_DISTANCE_4TH for atom in all_atoms: # ignore atoms in the same residue - if atom.res_num == group.atom.res_num and atom.chain_id == group.atom.chain_id: + if (atom.res_num == group.atom.res_num + and atom.chain_id == group.atom.chain_id): continue - sq_dist = squared_distance(group, atom) - # desolvation if sq_dist < parameters.desolv_cutoff_squared: - # use a default relative volume of 1.0 if the volume of the element is not found in parameters + # use a default relative volume of 1.0 if the volume of the element + # is not found in parameters # insert check for methyl groups - if atom.element == 'C' and atom.name not in ['CA','C']: - dv = parameters.VanDerWaalsVolume['C4'] + if atom.element == 'C' and atom.name not in ['CA', 'C']: + dvol = parameters.VanDerWaalsVolume['C4'] else: - dv = parameters.VanDerWaalsVolume.get(atom.element, 1.0) - - dv_inc = dv/max(min_distance_4th, sq_dist*sq_dist) -# dv_inc = dv/(sq_dist*sq_dist) - dv/(parameters.desolv_cutoff_squared*parameters.desolv_cutoff_squared) + dvol = parameters.VanDerWaalsVolume.get(atom.element, 1.0) + dv_inc = dvol/max(min_dist_4th, sq_dist*sq_dist) volume += dv_inc # buried if sq_dist < parameters.buried_cutoff_squared: - group.Nmass += 1 - - group.buried = calculate_weight(parameters, group.Nmass) + group.num_volume += 1 + group.buried = calculate_weight(parameters, group.num_volume) scale_factor = calculate_scale_factor(parameters, group.buried) volume_after_allowance = max(0.00, volume-parameters.desolvationAllowance) - - group.Emass = group.charge * parameters.desolvationPrefactor * volume_after_allowance * scale_factor - # Emass, Nmass - # Elocl, Nlocl -> reorganisation energy (count backbone hydorgen bond acceptors, C=O) - - - - #info('%s %5.2f %5.2f %4d'%(group, group.buried, group.Emass, group.Nmass)) - return - - - -def contactDesolvation(parameters, group): - """ - calculates the desolvation according to the Contact Model, the old default - """ - - local_radius = {'ASP': 4.5, - 'GLU': 4.5, - 'HIS': 4.5, - 'CYS': 3.5, - 'TYR': 3.5, - 'LYS': 4.5, - 'ARG': 5.0, - 'C-': 4.5, - 'N+': 4.5} - - all_atoms = group.atom.conformation_container.get_non_hydrogen_atoms() - if residue.res_name in version.desolvationRadii: - local_cutoff = version.desolvationRadii[residue.res_name] - else: - local_cutoff = 0.00 - residue.Nmass = 0 - residue.Nlocl = 0 - - for atom in all_atoms: - if atom.res_num != group.atom.res_num or atom.chain_id != group.atom.chain_id: - dX = atom.x - residue.x - dY = atom.y - residue.y - dZ = atom.z - residue.z - distance = math.sqrt(dX*dX + dY*dY + dZ*dZ) - if distance < local_cutoff: - group.Nlocl += 1 - if distance < parameters.buried_cutoff: - group.Nmass += 1 - if residue.Nmass > 400: - group.location = "BURIED " - else: - group.location = "SURFACE" - group.Emass = group.charge * parameters.desolvationPrefactor * max(0.00, group.Nmass-parameters.desolvationAllowance) - group.Elocl = group.charge * parameters.desolvationLocal * group.Nlocl - # Buried ratio - new feature in propka3.0 - # Note, there will be an unforseen problem: e.g. if one residue has Nmass > Nmax and - # the other Nmass < Nmax, the Npair will not be Nmass1 + Nmass2! - residue.buried = calculateWeight(residue.Nmass) - - return 0.00, 0.00, 0.00, 0.00 - - - - - + group.energy_volume = ( + group.charge * parameters.desolvationPrefactor + * volume_after_allowance * scale_factor) def calculate_scale_factor(parameters, weight): + """Calculate desolvation scaling factor. + + Args: + parameters: parameters for desolvation calculation + weight: weight for scaling factor + Returns: + scaling factor + """ scale_factor = 1.0 - (1.0 - parameters.desolvationSurfaceScalingFactor)*(1.0 - weight) return scale_factor -def calculate_weight(parameters, Nmass): +def calculate_weight(parameters, num_volume): + """Calculate the atom-based desolvation weight. + + TODO - figure out why a similar function exists in version.py + + Args: + parameters: parameters for desolvation calculation + num_volume: number of heavy atoms within desolvation calculation volume + Returns: + desolvation weight """ - calculating the atom-based desolvation weight - """ - weight = float(Nmass - parameters.Nmin)/float(parameters.Nmax - parameters.Nmin) + weight = ( + float(num_volume - parameters.Nmin) + / float(parameters.Nmax - parameters.Nmin)) weight = min(1.0, weight) weight = max(0.0, weight) - return weight +def calculate_pair_weight(parameters, num_volume1, num_volume2): + """Calculate the atom-pair based desolvation weight. -def squared_distance(atom1, atom2): -# if atom1 in atom2.squared_distances: -# return atom2.squared_distances[atom1] - - dx = atom2.x - atom1.x - dy = atom2.y - atom1.y - dz = atom2.z - atom1.z - - res = dx*dx+dy*dy+dz*dz - - return res - - -def distance(atom1, atom2): - return math.sqrt(squared_distance(atom1,atom2)) - - - -def get_smallest_distance(atoms1, atoms2): - res_distance =1e6 - res_atom1 = None - res_atom2 = None - - for atom1 in atoms1: - for atom2 in atoms2: - dist = squared_distance(atom1, atom2) - if dist < res_distance: - res_distance = dist - res_atom1 = atom1 - res_atom2 = atom2 - - return [res_atom1, math.sqrt(res_distance), res_atom2] - - -def calculatePairWeight(parameters, Nmass1, Nmass2): + Args: + num_volume1: number of heavy atoms within first desolvation volume + num_volume2: number of heavy atoms within second desolvation volume + Returns: + desolvation weight """ - calculating the atom-pair based desolvation weight - """ - Nmass = Nmass1 + Nmass2 - Nmin = 2*parameters.Nmin - Nmax = 2*parameters.Nmax - weight = float(Nmass - Nmin)/float(Nmax - Nmin) + num_volume = num_volume1 + num_volume2 + num_min = 2*parameters.Nmin + num_max = 2*parameters.Nmax + weight = float(num_volume - num_min)/float(num_max - num_min) weight = min(1.0, weight) weight = max(0.0, weight) - return weight -def HydrogenBondEnergy(distance, dpka_max, cutoff, f_angle=1.0): +def hydrogen_bond_energy(dist, dpka_max, cutoffs, f_angle=1.0): + """Calculate hydrogen-bond interaction pKa shift. + + Args: + dist: distance for hydrogen bond + dpka_max: maximum pKa value shift + cutoffs: array with max and min distance values + f_angle: angle scaling factor + Returns: + pKa shift value """ - returns a hydrogen-bond interaction pKa shift - """ - if distance < cutoff[0]: + if dist < cutoffs[0]: value = 1.00 - elif distance > cutoff[1]: + elif dist > cutoffs[1]: value = 0.00 else: - value = 1.0-(distance-cutoff[0])/(cutoff[1]-cutoff[0]) - - dpKa = dpka_max*value*f_angle - - return abs(dpKa) + value = 1.0 - (dist - cutoffs[0])/(cutoffs[1] - cutoffs[0]) + dpka = dpka_max*value*f_angle + return abs(dpka) +def angle_distance_factors(atom1=None, atom2=None, atom3=None, center=None): + """Calculate distance and angle factors for three atoms for backbone interactions. + + NOTE - you need to use atom1 to be the e.g. ASP atom if distance is reset at + return: [O1 -- H2-N3]. -def AngleFactorX(atom1=None, atom2=None, atom3=None, center=None): - """ - Calculates the distance and angle-factor from three atoms for back-bone interactions, - IMPORTANT: you need to use atom1 to be the e.g. ASP atom if distance is reset at return: [O1 -- H2-N3] Also generalized to be able to be used for residue 'centers' for C=O COO interactions. + + Args: + atom1: first atom for calculation (could be None) + atom2: second atom for calculation + atom3: third atom for calculation + center: center point between atoms 1 and 2 + Returns + [distance factor between atoms 1 and 2, + angle factor, + distance factor between atoms 2 and 3] """ - - dX_32 = atom2.x - atom3.x - dY_32 = atom2.y - atom3.y - dZ_32 = atom2.z - atom3.z - - distance_23 = math.sqrt( dX_32*dX_32 + dY_32*dY_32 + dZ_32*dZ_32 ) - - dX_32 = dX_32/distance_23 - dY_32 = dY_32/distance_23 - dZ_32 = dZ_32/distance_23 - - if atom1 == None: - dX_21 = center[0] - atom2.x - dY_21 = center[1] - atom2.y - dZ_21 = center[2] - atom2.z - else: - dX_21 = atom1.x - atom2.x - dY_21 = atom1.y - atom2.y - dZ_21 = atom1.z - atom2.z - - distance_12 = math.sqrt( dX_21*dX_21 + dY_21*dY_21 + dZ_21*dZ_21 ) - - dX_21 = dX_21/distance_12 - dY_21 = dY_21/distance_12 - dZ_21 = dZ_21/distance_12 - - f_angle = dX_21*dX_32 + dY_21*dY_32 + dZ_21*dZ_32 - - - return distance_12, f_angle, distance_23 - - - -def hydrogen_bond_interaction(group1, group2, version): - - # find the smallest distance between interacting atoms - atoms1 = group1.get_interaction_atoms(group2) - atoms2 = group2.get_interaction_atoms(group1) - [closest_atom1, distance, closest_atom2] = propka.calculations.get_smallest_distance(atoms1, atoms2) - - if None in [closest_atom1, closest_atom2]: - warning('Side chain interaction failed for %s and %s' % (group1.label, group2.label)) - return None - - # get the parameters - [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1,closest_atom2) - - if dpka_max==None or None in cutoff: - return None - - # check that the closest atoms are close enough - if distance >= cutoff[1]: - return None - - # check that bond distance criteria is met - bond_distance_too_short = group1.atom.is_atom_within_bond_distance(group2.atom, - version.parameters.min_bond_distance_for_hydrogen_bonds,1) - if bond_distance_too_short: - return None - - # set the angle factor + # The angle factor # # ---closest_atom1/2 # . # . # the_hydrogen---closest_atom2/1--- + dx_32 = atom2.x - atom3.x + dy_32 = atom2.y - atom3.y + dz_32 = atom2.z - atom3.z + dist_23 = math.sqrt(dx_32 * dx_32 + dy_32 * dy_32 + dz_32 * dz_32) + dx_32 = dx_32/dist_23 + dy_32 = dy_32/dist_23 + dz_32 = dz_32/dist_23 + if atom1 is None: + dx_21 = center[0] - atom2.x + dy_21 = center[1] - atom2.y + dz_21 = center[2] - atom2.z + else: + dx_21 = atom1.x - atom2.x + dy_21 = atom1.y - atom2.y + dz_21 = atom1.z - atom2.z + dist_12 = math.sqrt(dx_21 * dx_21 + dy_21 * dy_21 + dz_21 * dz_21) + dx_21 = dx_21/dist_12 + dy_21 = dy_21/dist_12 + dz_21 = dz_21/dist_12 + f_angle = dx_21*dx_32 + dy_21*dy_32 + dz_21*dz_32 + return dist_12, f_angle, dist_23 + + +def hydrogen_bond_interaction(group1, group2, version): + """Calculate energy for hydrogen bond interactions between two groups. + + Args: + group1: first interacting group + group2: second interacting group + version: an object that contains version-specific parameters + Returns: + hydrogen bond interaction energy + """ + # find the smallest distance between interacting atoms + atoms1 = group1.get_interaction_atoms(group2) + atoms2 = group2.get_interaction_atoms(group1) + [closest_atom1, dist, closest_atom2] = get_smallest_distance(atoms1, atoms2) + if None in [closest_atom1, closest_atom2]: + warning( + 'Side chain interaction failed for {0:s} and {1:s}'.format( + group1.label, group2.label)) + return None + # get the parameters + [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1, + closest_atom2) + if (dpka_max is None) or (None in cutoff): + return None + # check that the closest atoms are close enough + if dist >= cutoff[1]: + return None + # check that bond distance criteria is met + min_hbond_dist = version.parameters.min_bond_distance_for_hydrogen_bonds + if group1.atom.is_atom_within_bond_distance(group2.atom, min_hbond_dist, 1): + return None + # set angle factor f_angle = 1.0 if group2.type in version.parameters.angular_dependent_sidechain_interactions: if closest_atom2.element == 'H': heavy_atom = closest_atom2.bonded_atoms[0] - hydrogen = closest_atom2 - distance, f_angle, nada = propka.calculations.AngleFactorX(closest_atom1, hydrogen, heavy_atom) + hydrogen = closest_atom2 + dist, f_angle, _ = angle_distance_factors(closest_atom1, hydrogen, + heavy_atom) else: - # Either the structure is corrupt (no hydrogen), or the heavy atom is closer to - # the titratable atom than the hydrogen. In either case we set the angle factor - # to 0 + # Either the structure is corrupt (no hydrogen), or the heavy atom + # is closer to the titratable atom than the hydrogen. In either + # case, we set the angle factor to 0 f_angle = 0.0 - elif group1.type in version.parameters.angular_dependent_sidechain_interactions: if closest_atom1.element == 'H': heavy_atom = closest_atom1.bonded_atoms[0] - hydrogen = closest_atom1 - distance, f_angle, nada = propka.calculations.AngleFactorX(closest_atom2, hydrogen, heavy_atom) + hydrogen = closest_atom1 + dist, f_angle, _ = angle_distance_factors(closest_atom2, hydrogen, + heavy_atom) else: - # Either the structure is corrupt (no hydrogen), or the heavy atom is closer to - # the titratable atom than the hydrogen. In either case we set the angle factor - # to 0 + # Either the structure is corrupt (no hydrogen), or the heavy atom + # is closer to the titratable atom than the hydrogen. In either + # case, we set the angle factor to 0 f_angle = 0.0 - - weight = version.calculatePairWeight(group1.Nmass, group2.Nmass) - - exception, value = version.checkExceptions(group1, group2) - #exception = False # circumventing exception - if exception == True: - """ do nothing, value should have been assigned """ - #info(" exception for %s %s %6.2lf" % (group1.label, group2.label, value)) + weight = version.calculate_pair_weight(group1.num_volume, group2.num_volume) + exception, value = version.check_exceptions(group1, group2) + if exception: + # Do nothing, value should have been assigned. + pass else: - value = version.calculateSideChainEnergy(distance, dpka_max, cutoff, weight, f_angle) - - # info('distance',distance) - # info('dpka_max',dpka_max) - # info('cutoff',cutoff) - # info('f_angle',f_angle) - # info('weight',weight) - # info('value',value) - # info('===============================================') - + value = version.calculate_side_chain_energy( + dist, dpka_max, cutoff, weight, f_angle) return value +def electrostatic_interaction(group1, group2, dist, version): + """Calculate electrostatic interaction betwee two groups. -def HydrogenBondEnergy(distance, dpka_max, cutoff, f_angle=1.0): + Args: + group1: first interacting group + group2: second interacting group + dist: distance between groups + version: version-specific object with parameters and functions + Returns: + electrostatic interaction energy or None (if no interaction is appropriate) """ - returns a hydrogen-bond interaction pKa shift - """ - if distance < cutoff[0]: - value = 1.00 - elif distance > cutoff[1]: - value = 0.00 - else: - value = 1.0-(distance-cutoff[0])/(cutoff[1]-cutoff[0]) - - dpKa = dpka_max*value*f_angle - - return abs(dpKa) - - - - -def electrostatic_interaction(group1, group2, distance, version): - # check if we should do coulomb interaction at all - if not version.checkCoulombPair(group1, group2, distance): + if not version.check_coulomb_pair(group1, group2, dist): return None - - weight = version.calculatePairWeight(group1.Nmass, group2.Nmass) - value = version.calculateCoulombEnergy(distance, weight) - + weight = version.calculate_pair_weight(group1.num_volume, group2.num_volume) + value = version.calculate_coulomb_energy(dist, weight) return value -def checkCoulombPair(parameters, group1, group2, distance): - """ - Checks if this Coulomb interaction should be done - a propka2.0 hack - """ - Npair = group1.Nmass + group2.Nmass - do_coulomb = True +def check_coulomb_pair(parameters, group1, group2, dist): + """Checks if this Coulomb interaction should be done. - # check if both groups are titratable (ions are taken care of in determinants::setIonDeterminants) + NOTE - this is a propka2.0 hack + TODO - figure out why a similar function exists in version.py + + Args: + parameters: parameters for Coulomb calculations + group1: first interacting group + group2: second interacting group + dist: distance between groups + Returns: + Boolean + """ + num_volume = group1.num_volume + group2.num_volume + do_coulomb = True + # check if both groups are titratable (ions are taken care of in + # determinants::set_ion_determinants) if not (group1.titratable and group2.titratable): do_coulomb = False - # check if the distance is not too big - if distance > parameters.coulomb_cutoff2: + if dist > parameters.coulomb_cutoff2: do_coulomb = False - # check that desolvation is ok - if Npair < parameters.Nmin: + if num_volume < parameters.Nmin: do_coulomb = False - return do_coulomb -def CoulombEnergy(distance, weight, parameters): - """ - calculates the Coulomb interaction pKa shift based on Coulombs law - eps = 60.0 for the moment; to be scaled with 'weight' - """ - #diel = 80.0 - 60.0*weight +def coulomb_energy(dist, weight, parameters): + """Calculates the Coulomb interaction pKa shift based on Coulomb's law. - diel = 160 - (160 -30)*weight - R = max(distance, parameters.coulomb_cutoff1) - scale = (R - parameters.coulomb_cutoff2)/(parameters.coulomb_cutoff1 -parameters.coulomb_cutoff2) + Args: + dist: distance for electrostatic interaction + weight: scaling of dielectric constant + parameters: parameter object for calculation + Returns: + pKa shift + """ + diel = UNK_DIELECTRIC1 - (UNK_DIELECTRIC1 - UNK_DIELECTRIC2)*weight + dist = max(dist, parameters.coulomb_cutoff1) + scale = ( + (dist - parameters.coulomb_cutoff2) + / (parameters.coulomb_cutoff1 - parameters.coulomb_cutoff2)) scale = max(0.0, scale) scale = min(1.0, scale) - - dpka = 244.12/(diel*R) *scale - + dpka = UNK_PKA_SCALING1/(diel*dist)*scale return abs(dpka) +def backbone_reorganization(_, conformation): + """Perform calculations related to backbone reorganizations. -def BackBoneReorganization(parameters, conformation): - """ - adding test stuff + NOTE - this was described in the code as "adding test stuff" + NOTE - this function does not appear to be used + TODO - figure out why a similar function exists in version.py + + Args: + _: not used + conformation: specific molecule conformation """ titratable_groups = conformation.get_backbone_reorganisation_groups() - BBC_groups = conformation.get_backbone_CO_groups() + bbc_groups = conformation.get_backbone_co_groups() for titratable_group in titratable_groups: weight = titratable_group.buried - dpKa = 0.00 - for BBC_group in BBC_groups: + dpka = 0.00 + for bbc_group in bbc_groups: center = [titratable_group.x, titratable_group.y, titratable_group.z] - distance, f_angle, nada = AngleFactorX(atom2=BBC_group.get_interaction_atoms(titratable_group)[0], - atom3=BBC_group.atom, - center=center) - if distance < 6.0 and f_angle > 0.001: - value = 1.0-(distance-3.0)/(6.0-3.0) - dpKa += 0.80*min(1.0, value) - - titratable_group.Elocl = dpKa*weight - return + atom2 = bbc_group.get_interaction_atoms(titratable_group)[0] + dist, f_angle, _ = angle_distance_factors(atom2=atom2, + atom3=bbc_group.atom, + center=center) + if dist < UNK_BACKBONE_DISTANCE1 and f_angle > UNK_FANGLE_MIN: + value = ( + 1.0 - (dist-UNK_BACKBONE_DISTANCE2) + / (UNK_BACKBONE_DISTANCE1-UNK_BACKBONE_DISTANCE2)) + dpka += UNK_PKA_SCALING2 * min(1.0, value) + titratable_group.energy_local = dpka*weight -# -# Exception methods -# +def check_exceptions(version, group1, group2): + """Checks for atypical behavior in interactions between two groups. + Checks are made based on group type. -def checkExceptions(version, group1, group2): + TODO - figure out why a similar function exists in version.py + + Args: + version: version object + group1: first group for check + group2: second group for check + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) """ - checks for exceptions for this version - using defaults - """ - resType1 = group1.type - resType2 = group2.type - - if (resType1 == "COO" and resType2 == "ARG"): - exception, value = checkCooArgException(group1, group2, version) - elif (resType1 == "ARG" and resType2 == "COO"): - exception, value = checkCooArgException(group2, group1, version) - elif (resType1 == "COO" and resType2 == "COO"): - exception, value = checkCooCooException(group1, group2, version) - elif (resType1 == "CYS" and resType2 == "CYS"): - exception, value = checkCysCysException(group1, group2, version) - elif (resType1 == "COO" and resType2 == "HIS") or \ - (resType1 == "HIS" and resType2 == "COO"): - exception, value = checkCooHisException(group1, group2, version) - elif (resType1 == "OCO" and resType2 == "HIS") or \ - (resType1 == "HIS" and resType2 == "OCO"): - exception, value = checkOcoHisException(group1, group2, version) - elif (resType1 == "CYS" and resType2 == "HIS") or \ - (resType1 == "HIS" and resType2 == "CYS"): - exception, value = checkCysHisException(group1, group2, version) + res_type1 = group1.type + res_type2 = group2.type + if (res_type1 == "COO") and (res_type2 == "ARG"): + exception, value = check_coo_arg_exception(group1, group2, version) + elif (res_type1 == "ARG") and (res_type2 == "COO"): + exception, value = check_coo_arg_exception(group2, group1, version) + elif (res_type1 == "COO") and (res_type2 == "COO"): + exception, value = check_coo_coo_exception(group1, group2, version) + elif (res_type1 == "CYS") and (res_type2 == "CYS"): + exception, value = check_cys_cys_exception(group1, group2, version) + elif ((res_type1 == "COO") and (res_type2 == "HIS") + or (res_type1 == "HIS") and (res_type2 == "COO")): + exception, value = check_coo_his_exception(group1, group2, version) + elif ((res_type1 == "OCO") and (res_type2 == "HIS") + or (res_type1 == "HIS") and (res_type2 == "OCO")): + exception, value = check_oco_his_exception(group1, group2, version) + elif ((res_type1 == "CYS") and (res_type2 == "HIS") + or (res_type1 == "HIS") and (res_type2 == "CYS")): + exception, value = check_cys_his_exception(group1, group2, version) else: # do nothing, no exception for this pair - exception = False; value = None - + exception = False + value = None return exception, value +def check_coo_arg_exception(group_coo, group_arg, version): + """Check for COO-ARG interaction atypical behavior. -def checkCooArgException(group_coo, group_arg, version): - """ - checking Coo-Arg exception: uses the two shortes unique distances (involving 2+2 atoms) - """ + Uses the two shortest unique distances (involving 2+2 atoms) - str = "xxx" + Args: + group_coo: COO group + group_arg: ARG group + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) + """ exception = True value_tot = 0.00 - - #dpka_max = parameters.sidechain_interaction.get_value(group_coo.type,group_arg.type) - #cutoff = parameters.sidechain_cutoffs.get_value(group_coo.type,group_arg.type) - # needs to be this way since you want to find shortest distance first - #info("--- exception for %s %s ---" % (group_coo.label, group_arg.label)) atoms_coo = [] atoms_coo.extend(group_coo.get_interaction_atoms(group_arg)) atoms_arg = [] atoms_arg.extend(group_arg.get_interaction_atoms(group_coo)) - - - for iter in ["shortest", "runner-up"]: + for _ in ["shortest", "runner-up"]: # find the closest interaction pair - [closest_coo_atom, distance, closest_arg_atom] = get_smallest_distance(atoms_coo, atoms_arg) - [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_coo_atom,closest_arg_atom) - # calculate and sum up interaction energy + [closest_coo_atom, dist, closest_arg_atom] = get_smallest_distance(atoms_coo, + atoms_arg) + [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_coo_atom, + closest_arg_atom) + # calculate and sum up interaction energy f_angle = 1.00 if group_arg.type in version.parameters.angular_dependent_sidechain_interactions: atom3 = closest_arg_atom.bonded_atoms[0] - distance, f_angle, nada = AngleFactorX(closest_coo_atom, closest_arg_atom, atom3) - - value = HydrogenBondEnergy(distance, dpka_max, cutoff, f_angle) - #info(iter, closest_coo_atom, closest_arg_atom,distance,value) + dist, f_angle, _ = angle_distance_factors(closest_coo_atom, + closest_arg_atom, + atom3) + value = hydrogen_bond_energy(dist, dpka_max, cutoff, f_angle) value_tot += value # remove closest atoms before we attemp to find the runner-up pair atoms_coo.remove(closest_coo_atom) atoms_arg.remove(closest_arg_atom) - - return exception, value_tot -def checkCooCooException(group1, group2, version): - """ - checking Coo-Coo hydrogen-bond exception +def check_coo_coo_exception(group1, group2, version): + """Check for COO-COO hydrogen-bond atypical interaction behavior. + + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) """ exception = True - [closest_atom1, distance, closest_atom2] = get_smallest_distance(group1.get_interaction_atoms(group2), - group2.get_interaction_atoms(group1)) - - #dpka_max = parameters.sidechain_interaction.get_value(group1.type,group2.type) - #cutoff = parameters.sidechain_cutoffs.get_value(group1.type,group2.type) - [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1,closest_atom2) + interact_groups12 = group1.get_interaction_atoms(group2) + interact_groups21 = group2.get_interaction_atoms(group1) + [closest_atom1, dist, closest_atom2] = get_smallest_distance(interact_groups12, + interact_groups21) + [dpka_max, cutoff] = version.get_hydrogen_bond_parameters(closest_atom1, + closest_atom2) f_angle = 1.00 - value = HydrogenBondEnergy(distance, dpka_max, cutoff, f_angle) - weight = calculatePairWeight(version.parameters, group1.Nmass, group2.Nmass) + value = hydrogen_bond_energy(dist, dpka_max, cutoff, f_angle) + weight = calculate_pair_weight(version.parameters, group1.num_volume, group2.num_volume) value = value * (1.0 + weight) - return exception, value +def check_coo_his_exception(group1, group2, version): + """Check for COO-HIS atypical interaction behavior -def checkCooHisException(group1, group2, version): - """ - checking Coo-His exception + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) """ exception = False - if checkBuried(group1.Nmass, group2.Nmass): + if check_buried(group1.num_volume, group2.num_volume): exception = True - return exception, version.parameters.COO_HIS_exception -def checkOcoHisException(group1, group2, version): - """ - checking Coo-His exception +def check_oco_his_exception(group1, group2, version): + """Check for OCO-HIS atypical interaction behavior + + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) """ exception = False - if checkBuried(group1.Nmass, group2.Nmass): + if check_buried(group1.num_volume, group2.num_volume): exception = True - return exception, version.parameters.OCO_HIS_exception -def checkCysHisException(group1, group2, version): - """ - checking Cys-His exception +def check_cys_his_exception(group1, group2, version): + """Check for CYS-HIS atypical interaction behavior + + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) """ exception = False - if checkBuried(group1.Nmass, group2.Nmass): + if check_buried(group1.num_volume, group2.num_volume): exception = True - return exception, version.parameters.CYS_HIS_exception -def checkCysCysException(group1, group2, version): - """ - checking Cys-Cys exception +def check_cys_cys_exception(group1, group2, version): + """Check for CYS-CYS atypical interaction behavior + + Args: + group1: first group for check + group2: second group for check + version: version object + Returns: + 1. Boolean indicating atypical behavior, + 2. value associated with atypical interaction (None if Boolean is False) """ exception = False - if checkBuried(group1.Nmass, group2.Nmass): + if check_buried(group1.num_volume, group2.num_volume): exception = True - return exception, version.parameters.CYS_CYS_exception +def check_buried(num_volume1, num_volume2): + """Check to see if an interaction is buried - - -def checkBuried(Nmass1, Nmass2): + Args: + num_volume1: number of buried heavy atoms in volume 1 + num_volume2: number of buried heavy atoms in volume 2 + Returns: + True if interaction is buried, False otherwise """ - returns True if an interaction is buried - """ - - if (Nmass1 + Nmass2 <= 900) and (Nmass1 <= 400 or Nmass2 <= 400): + if ((num_volume1 + num_volume2 <= COMBINED_NUM_BURIED_MAX) + and (num_volume1 <= SEPARATE_NUM_BURIED_MAX + or num_volume2 <= SEPARATE_NUM_BURIED_MAX)): return False - else: - return True + return True diff --git a/propka/conformation_container.py b/propka/conformation_container.py index ce1a9c7..d44e30c 100644 --- a/propka/conformation_container.py +++ b/propka/conformation_container.py @@ -1,451 +1,610 @@ -# -# Container for molecular conformations -# +"""Container for molecular conformations""" +import functools +import propka.ligand +from propka.output import make_interaction_map +from propka.determinant import Determinant +from propka.coupled_groups import NCCG +from propka.determinants import set_backbone_determinants, set_ion_determinants +from propka.determinants import set_determinants +from propka.group import Group, is_group +from propka.lib import info -from __future__ import division -from __future__ import print_function -import propka.group, propka.determinants, propka.determinant, propka.ligand, propka.output, propka.coupled_groups, functools -from propka.lib import info, warning +# A large number that gets multipled with the integer obtained from applying +# ord() to the atom chain ID. Used in calculating atom keys for sorting. +UNICODE_MULTIPLIER = 1e7 +# A number that gets mutiplied with an atom's residue number. Used in +# calculating keys for atom sorting. +RESIDUE_MULTIPLIER = 1000 + + +class ConformationContainer: + """Container for molecular conformations""" -class Conformation_container: def __init__(self, name='', parameters=None, molecular_container=None): + """Initialize conformation container. + + Args: + name: name for conformation + parameters: parmameters for conformation + molecular_container: container for molecule + """ self.molecular_container = molecular_container - self.name=name - self.parameters=parameters + self.name = name + self.parameters = parameters self.atoms = [] self.groups = [] self.chains = [] self.current_iter_item = 0 - + # TODO - what is marvin_pkas_calculated? self.marvin_pkas_calculated = False self.non_covalently_coupled_groups = False - return - - - # - # Group related methods - # def extract_groups(self): - """ Generates at list of molecular groups needed for calculating pKa values """ + """Generate molecular groups needed for calculating pKa values.""" for atom in self.get_non_hydrogen_atoms(): # has this atom been checked for groups? if atom.groups_extracted == 0: - group = propka.group.is_group(self.parameters, atom) + group = is_group(self.parameters, atom) else: group = atom.group - # if the atom has been checked in a another conformation, check if it has a - # group that should be used in this conformation as well + # if the atom has been checked in a another conformation, check + # if it has a group that should be used in this conformation as well if group: self.setup_and_add_group(group) - return - def additional_setup_when_reading_input_file(self): - - # if a group is coupled and we are reading a .propka_input file, + """Generate interaction map and charge centers.""" + # if a group is coupled and we are reading a .propka_input file, then # some more configuration might be needed - - # print coupling map - map = propka.output.make_interaction_map('Covalent coupling map for %s'%self, - self.get_covalently_coupled_groups(), - lambda g1,g2: g1 in g2.covalently_coupled_groups) - info(map) - + map_ = make_interaction_map( + 'Covalent coupling map for {0:s}'.format(str(self)), + self.get_covalently_coupled_groups(), + lambda g1, g2: g1 in g2.covalently_coupled_groups) + info(map_) # check if we should set a common charge centre as well if self.parameters.common_charge_centre: self.set_common_charge_centres() - return - def set_common_charge_centres(self): - for system in self.get_coupled_systems(self.get_covalently_coupled_groups(), propka.group.Group.get_covalently_coupled_groups): + """Assign charge centers to groups.""" + for system in self.get_coupled_systems( + self.get_covalently_coupled_groups(), + Group.get_covalently_coupled_groups): # make a list of the charge centre coordinates all_coordinates = list(map(lambda g: [g.x, g.y, g.z], system)) # find the common charge center - ccc = functools.reduce(lambda g1,g2: [g1[0]+g2[0], g1[1]+g2[1], g1[2]+g2[2]], all_coordinates) + ccc = functools.reduce( + lambda g1, g2: [g1[0]+g2[0], g1[1]+g2[1], g1[2]+g2[2]], + all_coordinates) ccc = list(map(lambda c: c/len(system), ccc)) # set the ccc for all coupled groups in this system - for g in system: - [g.x, g.y, g.z] = ccc - g.common_charge_centre = True - return - - - + for group in system: + [group.x, group.y, group.z] = ccc + group.common_charge_centre = True def find_covalently_coupled_groups(self): - """ Finds covalently coupled groups and sets common charge centres if needed """ + """Find covalently coupled groups and set common charge centres.""" for group in self.get_titratable_groups(): # Find covalently bonded groups - bonded_groups = self.find_bonded_titratable_groups(group.atom, 1, group.atom) - - # couple groups - for cg in bonded_groups: - if cg in group.covalently_coupled_groups: + bonded_groups = self.find_bonded_titratable_groups( + group.atom, 1, group.atom) + # coupled groups + for bond_group in bonded_groups: + if bond_group in group.covalently_coupled_groups: continue - if cg.atom.sybyl_type == group.atom.sybyl_type: - group.couple_covalently(cg) - + if bond_group.atom.sybyl_type == group.atom.sybyl_type: + group.couple_covalently(bond_group) # check if we should set a common charge centre as well if self.parameters.common_charge_centre: self.set_common_charge_centres() - # print coupling map - map = propka.output.make_interaction_map('Covalent coupling map for %s'%self, - #self.get_titratable_groups(), - self.get_covalently_coupled_groups(), - lambda g1,g2: g1 in g2.covalently_coupled_groups) - info(map) - - - return - + map_ = make_interaction_map( + 'Covalent coupling map for {0:s}'.format(str(self)), + self.get_covalently_coupled_groups(), + lambda g1, g2: g1 in g2.covalently_coupled_groups) + info(map_) def find_non_covalently_coupled_groups(self, verbose=False): + """Find non-covalently coupled groups and set common charge centres. + + Args: + verbose: verbose output + """ # check if non-covalent coupling has already been set up in an input file - if len(list(filter(lambda g: len(g.non_covalently_coupled_groups)>0, self.get_titratable_groups())))>0: + if len(list(filter(lambda g: len(g.non_covalently_coupled_groups) > 0, + self.get_titratable_groups()))) > 0: self.non_covalently_coupled_groups = True - - propka.coupled_groups.nccg.identify_non_covalently_coupled_groups(self,verbose=verbose) - + NCCG.identify_non_covalently_coupled_groups(self, verbose=verbose) # re-do the check - if len(list(filter(lambda g: len(g.non_covalently_coupled_groups)>0, self.get_titratable_groups())))>0: + if len(list(filter(lambda g: len(g.non_covalently_coupled_groups) > 0, + self.get_titratable_groups()))) > 0: self.non_covalently_coupled_groups = True - return + def find_bonded_titratable_groups(self, atom, num_bonds, original_atom): + """Find bonded titrable groups. - - def find_bonded_titratable_groups(self, atom, no_bonds, original_atom): + Args: + atom: atom to check for bonds + num_bonds: number of bonds for coupling + original_atom: another atom to check for bonds + Returns: + a set of bonded atom groups + """ res = set() - for ba in atom.bonded_atoms: + for bond_atom in atom.bonded_atoms: # skip the original atom - if ba == original_atom: + if bond_atom == original_atom: continue # check if this atom has a titratable group - if ba.group and ba.group.titratable and no_bonds <= self.parameters.coupling_max_number_of_bonds: - res.add(ba.group) + if (bond_atom.group and bond_atom.group.titratable + and num_bonds + <= self.parameters.coupling_max_number_of_bonds): + res.add(bond_atom.group) # check for titratable groups bonded to this atom - if no_bonds < self.parameters.coupling_max_number_of_bonds: - res |= self.find_bonded_titratable_groups(ba,no_bonds+1, original_atom) - + if num_bonds < self.parameters.coupling_max_number_of_bonds: + res |= self.find_bonded_titratable_groups( + bond_atom, num_bonds+1, original_atom) return res - def setup_and_add_group(self, group): - """ Checks if we want to include this group in the calculations """ + """Check if we want to include this group in the calculations. + Args: + group: group to check + """ # Is it recognized as a group at all? if not group: return - # Other checks (include ligands, which chains etc.) - # if all ok, accept the group self.init_group(group) self.groups.append(group) def init_group(self, group): - """ - Initialize the given Group object. + """Initialize the given Group object. + + Args: + group: group object to initialize """ # set up the group - group.parameters=self.parameters + group.parameters = self.parameters group.setup() - # If --titrate_only option is set, make non-specified residues un-titratable: + # If --titrate_only option is set, make non-specified residues + # un-titratable: titrate_only = self.molecular_container.options.titrate_only if titrate_only is not None: - at = group.atom - if not (at.chain_id, at.res_num, at.icode) in titrate_only: + atom = group.atom + if not (atom.chain_id, atom.res_num, atom.icode) in titrate_only: group.titratable = False if group.residue_type == 'CYS': group.exclude_cys_from_results = True - - # - # pka calculation methods - # - def calculate_pka(self, version, options): + """Calculate pKas for conformation container. + + Args: + version: version object + options: option object + """ info('\nCalculating pKas for', self) - # calculate desolvation - for group in self.get_titratable_groups()+self.get_ions(): + for group in self.get_titratable_groups() + self.get_ions(): version.calculate_desolvation(group) - # calculate backbone interactions - propka.determinants.setBackBoneDeterminants(self.get_titratable_groups(), self.get_backbone_groups(), version) - + set_backbone_determinants( + self.get_titratable_groups(), self.get_backbone_groups(), version) # setting ion determinants - propka.determinants.setIonDeterminants(self, version) - + set_ion_determinants(self, version) # calculating the back-bone reorganization/desolvation term - version.calculateBackBoneReorganization(self) - - # setting remaining non-iterative and iterative side-chain & Coulomb interaction determinants - propka.determinants.setDeterminants(self.get_sidechain_groups(), version=version, options=options) - + version.calculate_backbone_reorganization(self) + # setting remaining non-iterative and iterative side-chain & Coulomb + # interaction determinants + set_determinants( + self.get_sidechain_groups(), version=version, options=options) # calculating the total pKa values - for group in self.groups: group.calculate_total_pka() - + for group in self.groups: + group.calculate_total_pka() # take coupling effects into account penalised_labels = self.coupling_effects() - - if self.parameters.remove_penalised_group and len(penalised_labels)>0: + if (self.parameters.remove_penalised_group + and len(penalised_labels) > 0): info('Removing penalised groups!!!') - - for g in self.get_titratable_groups(): - g.remove_determinants(penalised_labels) - + for group in self.get_titratable_groups(): + group.remove_determinants(penalised_labels) # re-calculating the total pKa values - for group in self.groups: group.calculate_total_pka() - - - return - + for group in self.groups: + group.calculate_total_pka() def coupling_effects(self): - # - # Bases: The group with the highest pKa (the most stable one in the - # charged form) will be the first one to adopt a proton as pH - # is lowered and this group is allowed to titrate. - # The remaining groups are penalised - # - # Acids: The group with the highest pKa (the least stable one in the - # charged form) will be the last group to loose the proton as - # pH is raised and will be penalised. - # The remaining groups are allowed to titrate. - # + """Penalize groups based on coupling effects. + + Bases: The group with the highest pKa (the most stable one in the + charged form) will be the first one to adopt a proton as pH is lowered + and this group is allowed to titrate. The remaining groups are + penalised. + + Acids: The group with the highest pKa (the least stable one in the + charged form) will be the last group to loose the proton as pH is + raised and will be penalised. The remaining groups are allowed to + titrate. + """ penalised_labels = [] - - for all_groups in self.get_coupled_systems(self.get_covalently_coupled_groups(), - propka.group.Group.get_covalently_coupled_groups): - + for all_groups in self.get_coupled_systems( + self.get_covalently_coupled_groups(), + Group.get_covalently_coupled_groups): # check if we should share determinants if self.parameters.shared_determinants: self.share_determinants(all_groups) - # find the group that has the highest pKa value - first_group = max(all_groups, key=lambda g:g.pka_value) - + first_group = max(all_groups, key=lambda g: g.pka_value) # In case of acids if first_group.charge < 0: - first_group.coupled_titrating_group = min(all_groups, key=lambda g:g.pka_value) - penalised_labels.append(first_group.label) # group with the highest pKa is penalised - + first_group.coupled_titrating_group = min( + all_groups, key=lambda g: g.pka_value) + # group with the highest pKa is penalised + penalised_labels.append(first_group.label) # In case of bases else: - for a in all_groups: - if a == first_group: - continue # group with the highest pKa is allowed to titrate... - a.coupled_titrating_group = first_group - penalised_labels.append(a.label) #... and the rest is penalised - + for group in all_groups: + if group == first_group: + # group with the highest pKa is allowed to titrate... + continue + group.coupled_titrating_group = first_group + #... and the rest are penalised + penalised_labels.append(group.label) return penalised_labels + @staticmethod + def share_determinants(groups): + """Share sidechain, backbone, and Coloumb determinants between groups. - def share_determinants(self, groups): - + Args: + groups: groups to share between + """ # make a list of the determinants to share - types = ['sidechain','backbone','coulomb'] - for type in types: + types = ['sidechain', 'backbone', 'coulomb'] + for type_ in types: # find maximum value for each determinant max_dets = {} - for g in groups: - for d in g.determinants[type]: + for group in groups: + for det in group.determinants[type_]: # update max dets - if d.group not in max_dets.keys(): - max_dets[d.group] = d.value + if det.group not in max_dets.keys(): + max_dets[det.group] = det.value else: - max_dets[d.group] = max(d.value, max_dets[d.group], key= lambda v: abs(v)) - + max_dets[det.group] = max(det.value, + max_dets[det.group], + key=lambda v: abs(v)) # overwrite/add maximum value for each determinant - for det_group in max_dets.keys(): - new_determinant = propka.determinant.Determinant(det_group, max_dets[det_group]) - for g in groups: - g.set_determinant(new_determinant,type) - - - return - + for det_group in max_dets: + new_determinant = Determinant(det_group, max_dets[det_group]) + for group in groups: + group.set_determinant(new_determinant, type_) def get_coupled_systems(self, groups, get_coupled_groups): - """ This generator will yield one covalently coupled system at the time """ + """A generator that yields covalently coupled systems. + + Args: + groups: groups for generating coupled systems + get_coupled_groups: TODO - I don't know what this is + Yields: + covalently coupled systems + """ groups = set(groups) - while len(groups)>0: + while len(groups) > 0: # extract a system of coupled groups ... system = set() - self.get_a_coupled_system_of_groups(groups.pop(), system, get_coupled_groups) + self.get_a_coupled_system_of_groups( + groups.pop(), system, get_coupled_groups) # ... and remove them from the list groups -= system - yield system - return + def get_a_coupled_system_of_groups(self, new_group, coupled_groups, + get_coupled_groups): + """Set up coupled systems of groups. - - def get_a_coupled_system_of_groups(self, new_group, coupled_groups, get_coupled_groups): + Args: + new_group: added to coupled_groups + coupled_groups: existing coupled groups + get_coupled_groups: TODO - I don't know what this + """ coupled_groups.add(new_group) - [self.get_a_coupled_system_of_groups(c, coupled_groups, get_coupled_groups) for c in get_coupled_groups(new_group) if c not in coupled_groups] - return + for coupled_group in get_coupled_groups(new_group): + if coupled_group not in coupled_groups: + self.get_a_coupled_system_of_groups(coupled_group, + coupled_groups, + get_coupled_groups) + def calculate_folding_energy(self, ph=None, reference=None): + """Calculate folding energy over all groups in conformation container. - # - # Energy/summary-related methods - # - def calculate_folding_energy(self, pH=None, reference=None): + Args: + ph: pH for calculation + reference: reference state + Returns: + folding energy + TODO - need units + """ ddg = 0.0 for group in self.groups: - #info('Folding energy for %s at pH %f: %f'%(group,pH,group.calculate_folding_energy(self.parameters, pH=pH, reference=reference))) - ddg += group.calculate_folding_energy(self.parameters, pH=pH, reference=reference) - + ddg += group.calculate_folding_energy(self.parameters, ph=ph, + reference=reference) return ddg - def calculate_charge(self, parmaeters, pH=None): + def calculate_charge(self, parameters, ph=None): + """Calculate charge for folded and unfolded states. + + Args: + parameters: parameters for calculation + ph: pH for calculation + Returns: + 1. charge for unfolded state + 2. charge for folded state + """ unfolded = folded = 0.0 for group in self.get_titratable_groups(): - unfolded += group.calculate_charge(parmaeters, pH=pH, state='unfolded') - folded += group.calculate_charge(parmaeters, pH=pH, state='folded') - - return unfolded,folded - - - # - # conformation/bookkeeping/atom methods - # + unfolded += group.calculate_charge(parameters, ph=ph, + state='unfolded') + folded += group.calculate_charge(parameters, ph=ph, + state='folded') + return unfolded, folded def get_backbone_groups(self): - """ returns all backbone groups needed for the pKa calculations """ + """Get backbone groups needed for the pKa calculations. + + Returns: + list of groups + """ return [group for group in self.groups if 'BB' in group.type] def get_sidechain_groups(self): - """ returns all sidechain groups needed for the pKa calculations """ - return [group for group in self.groups if ('BB' not in group.type\ - and not group.atom.cysteine_bridge)] + """Get sidechain groups needed for the pKa calculations. + + Returns: + list of groups + """ + return [ + group for group in self.groups + if ('BB' not in group.type and not group.atom.cysteine_bridge)] def get_covalently_coupled_groups(self): - return [g for g in self.groups if len(g.covalently_coupled_groups)>0] + """Get covalently coupled groups needed for pKa calculations. + + Returns: + list of groups + """ + return [ + g for g in self.groups + if len(g.covalently_coupled_groups) > 0] def get_non_covalently_coupled_groups(self): - return [g for g in self.groups if len(g.non_covalently_coupled_groups)>0] + """Get non-covalently coupled groups needed for pKa calculations. - def get_backbone_NH_groups(self): - """ returns all NH backbone groups needed for the pKa calculations """ + Returns: + list of groups + """ + return [ + g for g in self.groups + if len(g.non_covalently_coupled_groups) > 0] + + def get_backbone_nh_groups(self): + """Get NH backbone groups needed for pKa calculations. + + Returns: + list of groups + """ return [group for group in self.groups if group.type == 'BBN'] - def get_backbone_CO_groups(self): - """ returns all CO backbone groups needed for the pKa calculations """ + def get_backbone_co_groups(self): + """Get CO backbone groups needed for pKa calculations. + + Returns: + list of groups + """ return [group for group in self.groups if group.type == 'BBC'] def get_groups_in_residue(self, residue): - return [group for group in self.groups if group.residue_type == residue] + """Get residue groups needed for pKa calculations. + + Args: + residue: specific residue with groups + Returns: + list of groups + """ + return [ + group for group in self.groups if group.residue_type == residue] def get_titratable_groups(self): + """Get all titratable groups needed for pKa calculations. + + Returns: + list of groups + """ return [group for group in self.groups if group.titratable] def get_groups_for_calculations(self): - """ - Returns a list of groups that should be included in results report. + """Get a list of groups that should be included in results report. + If --titrate_only option is specified, only residues that are titratable and are in that list are included; otherwise all titratable residues and CYS residues are included. + + Returns: + list of groups """ return [group for group in self.groups if group.use_in_calculations()] def get_acids(self): - return [group for group in self.groups if (group.residue_type in self.parameters.acid_list - and not group.atom.cysteine_bridge)] + """Get acid groups needed for pKa calculations. + + Returns: + list of groups + """ + return [ + group for group in self.groups + if (group.residue_type in self.parameters.acid_list + and not group.atom.cysteine_bridge)] def get_backbone_reorganisation_groups(self): - return [group for group in self.groups if (group.residue_type in self.parameters.backbone_reorganisation_list - and not group.atom.cysteine_bridge)] + """Get groups involved with backbone reorganization. + + Returns: + list of groups + """ + return [ + group for group in self.groups + if (group.residue_type + in self.parameters.backbone_reorganisation_list + and not group.atom.cysteine_bridge)] def get_ions(self): - return [group for group in self.groups if group.residue_type in self.parameters.ions.keys()] + """Get ion groups. - def get_group_names(self, list): - return [group for group in self.groups if group.type in list] + Returns: + list of groups + """ + return [ + group for group in self.groups + if group.residue_type in self.parameters.ions.keys()] + def get_group_names(self, group_list): + """Get names of groups in list. + + Args: + group_list: list to check + Returns: + list of groups + """ + return [group for group in self.groups if group.type in group_list] def get_ligand_atoms(self): - return [atom for atom in self.atoms if atom.type=='hetatm'] + """Get atoms associated with ligands. + + Returns: + list of atoms + """ + return [atom for atom in self.atoms if atom.type == 'hetatm'] def get_heavy_ligand_atoms(self): - return [atom for atom in self.atoms if atom.type=='hetatm' and atom.element != 'H'] + """Get heavy atoms associated with ligands. - def get_chain(self,chain): + Returns: + list of atoms + """ + return [ + atom for atom in self.atoms + if atom.type == 'hetatm' and atom.element != 'H'] + + def get_chain(self, chain): + """Get atoms associated with a specific chain. + + Args: + chain: chain to select + Returns: + list of atoms + """ return [atom for atom in self.atoms if atom.chain_id != chain] - def add_atom(self, atom): - #info(self,'adding',atom) + """Add atom to container. + + Args: + atom: atom to add + """ self.atoms.append(atom) if not atom.conformation_container: atom.conformation_container = self if not atom.molecular_container: atom.molecular_container = self.molecular_container - # store chain id for bookkeeping if not atom.chain_id in self.chains: self.chains.append(atom.chain_id) - return - def copy_atom(self, atom): - new_atom = atom.make_copy() + """Add a copy of the atom to container. + + Args: + atom: atom to copy and add + """ + new_atom = atom.make_copy() self.atoms.append(new_atom) new_atom.conformation_container = self - return - def get_non_hydrogen_atoms(self): - return [atom for atom in self.atoms if atom.element!='H'] + """Get atoms that are not hydrogens. + Returns: + list of atoms + """ + return [atom for atom in self.atoms if atom.element != 'H'] def top_up(self, other): - """ Tops up self with all atoms found in other but not in self """ - my_residue_labels = { a.residue_label for a in self.atoms } + """Adds any atoms found in `other` but not in this container. + + Tops up self with all atoms found in other but not in self. + + Args: + other: conformation container with atoms to add + """ + my_residue_labels = {a.residue_label for a in self.atoms} for atom in other.atoms: if not atom.residue_label in my_residue_labels: self.copy_atom(atom) - return def find_group(self, group): - for g in self.groups: - if g.atom.residue_label == group.atom.residue_label: - if g.type == group.type: - return g + """Find a group in the container. + + Args: + group: group to find + Returns: + False (if group not found) or group + """ + for group_ in self.groups: + if group_.atom.residue_label == group.atom.residue_label: + if group_.type == group.type: + return group_ return False - def set_ligand_atom_names(self): + """Set names for atoms in ligands.""" for atom in self.get_ligand_atoms(): propka.ligand.assign_sybyl_type(atom) - return - - def __str__(self): - return'Conformation container %s with %d atoms and %d groups'%(self.name,len(self),len(self.groups)) + """String that lists statistics of atoms and groups.""" + fmt = ( + "Conformation container {name} with {natoms:d} atoms and " + "{ngroups:d} groups") + str_ = fmt.format( + name=self.name, natoms=len(self), ngroups=len(self.groups)) + return str_ def __len__(self): + """Number of atoms in container.""" return len(self.atoms) def sort_atoms(self): + """Sort atoms by `self.sort_atoms_key()` and renumber.""" # sort the atoms ... self.atoms.sort(key=self.sort_atoms_key) # ... and re-number them for i in range(len(self.atoms)): self.atoms[i].numb = i+1 - return + @staticmethod + def sort_atoms_key(atom): + """Generate key for atom sorting. - def sort_atoms_key(self, atom): - key = ord(atom.chain_id)*1e7 - key += atom.res_num*1000 + Args: + atom: atom for key generation. + Returns: + key for atom + """ + key = ord(atom.chain_id) * UNICODE_MULTIPLIER + key += atom.res_num * RESIDUE_MULTIPLIER if len(atom.name) > len(atom.element): key += ord(atom.name[len(atom.element)]) - #info(atom,ord(atom.name[len(atom.element)]), '|%s||%s|'%(atom.name,atom.element)) return key diff --git a/propka/coupled_groups.py b/propka/coupled_groups.py index 3d0ac2d..bb47dfe 100644 --- a/propka/coupled_groups.py +++ b/propka/coupled_groups.py @@ -1,113 +1,118 @@ - -from __future__ import division -from __future__ import print_function - -import math, propka.output, propka.group, propka.lib, itertools -from propka.lib import info, warning +"""Describe coupling between groups.""" +import itertools +import propka.lib +from propka.group import Group +from propka.output import make_interaction_map +from propka.lib import info -class non_covalently_couple_groups: +class NonCovalentlyCoupledGroups: + """Groups that are coupled without covalent bonding.""" def __init__(self): - self.parameters = None - - # self.do_intrinsic = False - # self.do_pair_wise = False self.do_prot_stat = True - return - - - # - # Methods for finding coupled groups - # - def is_coupled_protonation_state_probability(self, group1, group2, energy_method, return_on_fail=True): + def is_coupled_protonation_state_probability(self, group1, group2, + energy_method, + return_on_fail=True): + """Check whether two groups are energetically coupled. + Args: + group1: first group for interaction + group2: second group for interaction + energy_method: function for calculating energy + return_on_fail: return if part of the calculation fails + Returns: + dictionary describing coupling + """ # check if the interaction energy is high enough - interaction_energy = max(self.get_interaction(group1,group2), self.get_interaction(group2,group1)) - - if interaction_energy<=self.parameters.min_interaction_energy and return_on_fail: - return {'coupling_factor':-1.0} - + interaction_energy = max(self.get_interaction(group1, group2), + self.get_interaction(group2, group1)) + if (interaction_energy <= self.parameters.min_interaction_energy + and return_on_fail): + return {'coupling_factor': -1.0} # calculate intrinsic pKa's, if not already done for group in [group1, group2]: - if not hasattr(group, 'intrinsic_pKa'): + if group.intrinsic_pka is None: group.calculate_intrinsic_pka() - - use_pH = self.parameters.pH + use_ph = self.parameters.pH if self.parameters.pH == 'variable': - use_pH = min(group1.pka_value, group2.pka_value) - - default_energy = energy_method(pH=use_pH, reference=self.parameters.reference) + use_ph = min(group1.pka_value, group2.pka_value) + default_energy = energy_method(ph=use_ph, + reference=self.parameters.reference) default_pka1 = group1.pka_value default_pka2 = group2.pka_value - # check that pka values are within relevant limits - if max(default_pka1, default_pka2) < self.parameters.min_pka or \ - min(default_pka1, default_pka2) > self.parameters.max_pka: + if (max(default_pka1, default_pka2) < self.parameters.min_pka + or min(default_pka1, default_pka2) > self.parameters.max_pka): if return_on_fail: - return {'coupling_factor':-1.0} - + return {'coupling_factor': -1.0} # Swap interactions and re-calculate pKa values self.swap_interactions([group1], [group2]) group1.calculate_total_pka() group2.calculate_total_pka() - # store swapped energy and pka's - swapped_energy = energy_method(pH=use_pH, reference=self.parameters.reference) + swapped_energy = energy_method( + ph=use_ph, reference=self.parameters.reference) swapped_pka1 = group1.pka_value swapped_pka2 = group2.pka_value - pka_shift1 = swapped_pka1 - default_pka1 pka_shift2 = swapped_pka2 - default_pka2 - # Swap back to original protonation state self.swap_interactions([group1], [group2]) group1.calculate_total_pka() group2.calculate_total_pka() - # check difference in free energy - if abs(default_energy - swapped_energy) > self.parameters.max_free_energy_diff and return_on_fail: - return {'coupling_factor':-1.0} - + if (abs(default_energy - swapped_energy) + > self.parameters.max_free_energy_diff and return_on_fail): + return {'coupling_factor': -1.0} # check pka shift - if max(abs(pka_shift1), abs(pka_shift2)) < self.parameters.min_swap_pka_shift and return_on_fail: - return {'coupling_factor':-1.0} - + if (max(abs(pka_shift1), abs(pka_shift2)) + < self.parameters.min_swap_pka_shift and return_on_fail): + return {'coupling_factor': -1.0} # check intrinsic pka diff - if abs(group1.intrinsic_pKa - group2.intrinsic_pKa) > self.parameters.max_intrinsic_pKa_diff and return_on_fail: - return {'coupling_factor':-1.0} - + if (abs(group1.intrinsic_pka - group2.intrinsic_pka) + > self.parameters.max_intrinsic_pka_diff and return_on_fail): + return {'coupling_factor': -1.0} # if everything is OK, calculate the coupling factor and return all info - factor = self.get_free_energy_diff_factor(default_energy, swapped_energy)*\ - self.get_pka_diff_factor(group1.intrinsic_pKa, group2.intrinsic_pKa)*\ - self.get_interaction_factor(interaction_energy) - - return {'coupling_factor':factor, - 'default_energy':default_energy, - 'swapped_energy':swapped_energy, - 'interaction_energy':interaction_energy, - 'swapped_pka1':swapped_pka1, - 'swapped_pka2':swapped_pka2, - 'pka_shift1':pka_shift1, - 'pka_shift2':pka_shift2, - 'pH':use_pH} - - - - # - # Methods for calculating the coupling factor - # + factor = ( + self.get_free_energy_diff_factor(default_energy, swapped_energy) + * self.get_pka_diff_factor(group1.intrinsic_pka, + group2.intrinsic_pka) + * self.get_interaction_factor(interaction_energy)) + return {'coupling_factor': factor, 'default_energy': default_energy, + 'swapped_energy': swapped_energy, + 'interaction_energy': interaction_energy, + 'swapped_pka1': swapped_pka1, 'swapped_pka2': swapped_pka2, + 'pka_shift1': pka_shift1, 'pka_shift2': pka_shift2, + 'pH': use_ph} def get_pka_diff_factor(self, pka1, pka2): + """Get scaling factor for difference between intrinsic pKa values. + + Args: + pka1: first pKa to compare + pka2: second pKa to compare + Returns: + float value of scaling factor + """ intrinsic_pka_diff = abs(pka1-pka2) res = 0.0 - if intrinsic_pka_diff <= self.parameters.max_intrinsic_pKa_diff: - res = 1-(intrinsic_pka_diff/self.parameters.max_intrinsic_pKa_diff)**2 - + if intrinsic_pka_diff <= self.parameters.max_intrinsic_pka_diff: + res = ( + 1-(intrinsic_pka_diff + /self.parameters.max_intrinsic_pka_diff)**2) return res def get_free_energy_diff_factor(self, energy1, energy2): + """Get scaling factor for difference between free energies. + + Args: + energy1: first energy to compare + energy2: second energy to compare + Returns: + float value of scaling factor + """ free_energy_diff = abs(energy1-energy2) res = 0.0 if free_energy_diff <= self.parameters.max_free_energy_diff: @@ -115,208 +120,267 @@ class non_covalently_couple_groups: return res def get_interaction_factor(self, interaction_energy): + """Get scaling factor related to interaction energy. + + Args: + interaction_energy: interaction energy + Returns: + float value of scaling factor + """ res = 0.0 interaction_energy = abs(interaction_energy) if interaction_energy >= self.parameters.min_interaction_energy: - res = (interaction_energy-self.parameters.min_interaction_energy)/(1.0+interaction_energy-self.parameters.min_interaction_energy) - + res = ( + (interaction_energy-self.parameters.min_interaction_energy) + / (1.0+interaction_energy + -self.parameters.min_interaction_energy)) return res + def identify_non_covalently_coupled_groups(self, conformation, + verbose=True): + """Find coupled residues in protein. - - - def identify_non_covalently_coupled_groups(self, conformation, verbose=True): - """ Finds coupled residues in protein """ - + Args: + conformation: protein conformation to test + verbose: verbose output (boolean) + """ self.parameters = conformation.parameters if verbose: - info('') - info(' Warning: When using the -d option, pKa values based on \'swapped\' interactions') - info(' will be writting to the output .pka file') - info('') - info('-' * 103) - info(' Detecting non-covalently coupled residues') - info('-' * 103) - info(' Maximum pKa difference: %4.2f pKa units' % self.parameters.max_intrinsic_pKa_diff) - info(' Minimum interaction energy: %4.2f pKa units' % self.parameters.min_interaction_energy) - info(' Maximum free energy diff.: %4.2f pKa units' % self.parameters.max_free_energy_diff) - info(' Minimum swap pKa shift: %4.2f pKa units' % self.parameters.min_swap_pka_shift) - info(' pH: %6s ' % str(self.parameters.pH)) - info(' Reference: %s' % self.parameters.reference) - info(' Min pKa: %4.2f' % self.parameters.min_pka) - info(' Max pKa: %4.2f' % self.parameters.max_pka) - info('') - + info_fmt = ( + '\n' + ' Warning: When using the -d option, pKa values based on \n' + '\'swapped\' interactions\n' + ' will be writting to the output .pka file\n' + '\n' + '{sep}\n' + '\n' + ' Detecting non-covalently coupled residues\n' + '{sep}\n' + ' Maximum pKa difference: {c.max_intrinsic_pka_diff:>4.2f} pKa units\n' + ' Minimum interaction energy: {c.min_interaction_energy:>4.2f} pKa units\n' + ' Maximum free energy diff.: {c.max_free_energy_diff:>4.2f} pKa units\n' + ' Minimum swap pKa shift: {c.min_swap_pka_shift:>4.2f} pKa units\n' + ' pH: {c.pH:>6} \n' + ' Reference: {c.reference}\n' + ' Min pKa: {c.min_pka:>4.2f}\n' + ' Max pKa: {c.max_pka:>4.2f}\n' + '\n') + sep = "-" * 103 + info(info_fmt.format(sep=sep, c=self)) # find coupled residues titratable_groups = conformation.get_titratable_groups() - if not conformation.non_covalently_coupled_groups: - for g1 in titratable_groups: - for g2 in titratable_groups: - if g1==g2: + for group1 in titratable_groups: + for group2 in titratable_groups: + if group1 == group2: break - - if not g1 in g2.non_covalently_coupled_groups and self.do_prot_stat: - data = self.is_coupled_protonation_state_probability(g1, g2,conformation.calculate_folding_energy) - if data['coupling_factor'] >0.0: - g1.couple_non_covalently(g2) - + if (group1 not in group2.non_covalently_coupled_groups + and self.do_prot_stat): + data = ( + self + .is_coupled_protonation_state_probability( + group1, group2, + conformation.calculate_folding_energy)) + if data['coupling_factor'] > 0.0: + group1.couple_non_covalently(group2) if verbose: self.print_out_swaps(conformation) - return + def print_out_swaps(self, conformation): + """Print out something having to do with coupling interactions. - def print_out_swaps(self, conformation, verbose=True): - map = propka.output.make_interaction_map('Non-covalent coupling map for %s'%conformation, - conformation.get_non_covalently_coupled_groups(), - lambda g1,g2: g1 in g2.non_covalently_coupled_groups) - info(map) - - for system in conformation.get_coupled_systems(conformation.get_non_covalently_coupled_groups(),propka.group.Group.get_non_covalently_coupled_groups): + Args: + conformation: conformation to print + """ + map_ = make_interaction_map( + 'Non-covalent coupling map for {0:s}'.format(str(conformation)), + conformation.get_non_covalently_coupled_groups(), + lambda g1, g2: g1 in g2.non_covalently_coupled_groups) + info(map_) + for system in conformation.get_coupled_systems( + conformation.get_non_covalently_coupled_groups(), + Group.get_non_covalently_coupled_groups): self.print_system(conformation, list(system)) - return def print_system(self, conformation, system): + """Print out something about the system. - info('System containing %d groups:' % len(system)) - - # make list of interactions withi this system - interactions = list(itertools.combinations(system,2)) - + Args: + conformation: conformation to print + system: system to print + """ + info( + 'System containing {0:d} groups:'.format(len(system))) + # make list of interactions within this system + interactions = list(itertools.combinations(system, 2)) # print out coupling info for each interaction coup_info = '' for interaction in interactions: - data = self.is_coupled_protonation_state_probability(interaction[0], interaction[1],conformation.calculate_folding_energy, return_on_fail=False) - coup_info += self.make_data_to_string(data,interaction[0],interaction[1])+'\n\n' + data = ( + self.is_coupled_protonation_state_probability( + interaction[0], interaction[1], + conformation.calculate_folding_energy, + return_on_fail=False)) + coup_info += ( + self.make_data_to_string(data, interaction[0], interaction[1]) + + '\n\n') info(coup_info) - # make list of possible combinations of swap to try out combinations = propka.lib.generate_combinations(interactions) - # Make possible swap combinations swap_info = '' - swap_info += self.print_determinants_section(system,'Original') - + swap_info += self.print_determinants_section(system, 'Original') for combination in combinations: # Tell the user what is swap in this combination swap_info += 'Swapping the following interactions:\n' for interaction in combination: - swap_info += ' %s %s\n'%(interaction[0].label, interaction[1].label) - + swap_info += ' {0:s} {1:s}\n'.format( + interaction[0].label, interaction[1].label) # swap... for interaction in combination: - self.swap_interactions([interaction[0]],[interaction[1]]) - - swap_info += self.print_determinants_section(system,'Swapped') - # ...and swap back - #for interaction in combination: - # self.swap_interactions([interaction[0]], [interaction[1]]) - + self.swap_interactions([interaction[0]], [interaction[1]]) + swap_info += self.print_determinants_section(system, 'Swapped') info(swap_info) - return - # - # Interaction and swapping methods - # + @staticmethod + def get_interaction(group1, group2, include_side_chain_hbs=True): + """Get interaction energy between two groups. - def get_interaction(self, group1, group2, include_side_chain_hbs = True): + Args: + group1: first group for interaction + group2: second group for interaction + include_side_chain_hbs: include sidechain hydrogen bonds in energy + Returns: + interaction energy (float) + """ determinants = group1.determinants['coulomb'] if include_side_chain_hbs: - determinants = group1.determinants['sidechain'] + group1.determinants['coulomb'] - + determinants = ( + group1.determinants['sidechain'] + + group1.determinants['coulomb']) interaction_energy = 0.0 for det in determinants: if group2 == det.group: interaction_energy += det.value - return interaction_energy - - def print_determinants_section(self, system, tag): + """Print determinants of system. + + Args: + system: set of groups + tag: something to add to output + Returns: + string with summary + """ all_labels = [g.label for g in system] - s = ' '+'-'*113+'\n' + str_ = ' ' + '-' * 113 + '\n' for group in system: - s += self.tagged_format(' %-8s|' % tag, group.getDeterminantString(), all_labels) + str_ += self.tagged_format( + ' {0:<8s}|'.format(tag), group.get_determinant_string(), + all_labels) + return str_ + '\n' - return s+'\n' + def swap_interactions(self, groups1, groups2, include_side_chain_hbs=True): + """Swap interactions between two groups. - def swap_interactions(self, groups1, groups2, include_side_chain_hbs = True): - - for i in range(len(groups1)): - group1 = groups1[i] + Args: + group1: first group to swap + group2: second group to swap + """ + for i, group1 in enumerate(groups1): group2 = groups2[i] - # swap the interactions! - self.transfer_determinant(group1.determinants['coulomb'], group2.determinants['coulomb'], group1.label, group2.label) + self.transfer_determinant(group1.determinants['coulomb'], + group2.determinants['coulomb'], + group1.label, group2.label) if include_side_chain_hbs: - self.transfer_determinant(group1.determinants['sidechain'], group2.determinants['sidechain'], group1.label, group2.label) - - # re-calculate pKa values + self.transfer_determinant(group1.determinants['sidechain'], + group2.determinants['sidechain'], + group1.label, group2.label) + # re-calculate pKa values group1.calculate_total_pka() group2.calculate_total_pka() - return + @staticmethod + def transfer_determinant(determinants1, determinants2, + label1, label2): + """Transfer information between two sets of determinants. - - def transfer_determinant(self, determinants1, determinants2, label1, label2): + Args: + determinants1: determinant list + determinants2: determinant list + label1: label for list 1 + label2: label for list 2 + """ # find out what to transfer... from1to2 = [] from2to1 = [] for det in determinants1: if det.label == label2: from1to2.append(det) - for det in determinants2: if det.label == label1: from2to1.append(det) - # ...and transfer it! for det in from1to2: det.label = label1 determinants2.append(det) determinants1.remove(det) - for det in from2to1: det.label = label2 determinants1.append(det) determinants2.remove(det) - return + @staticmethod + def tagged_format(tag, str_, labels): + """Tag a string. - # - # Output methods - # - - def tagged_format(self, tag, s, labels): - s = "%s %s"%(tag,s) - s = s.replace('\n','\n%s '%tag) + Args: + tag: tag to add + str_: string to tag + labels: labels to replace + Returns: + tagged string + """ + str_ = "{0:s} {1:s}".format(tag, str_) + str_ = str_.replace('\n', '\n{0:s} '.format(tag)) for label in labels: - s = s.replace(label, '\033[31m%s\033[30m'%label) - return s+'\n' + str_ = str_.replace(label, '\033[31m{0:s}\033[30m'.format(label)) + return str_ + '\n' + + @staticmethod + def make_data_to_string(data, group1, group2): + """Describe interaction between groups. + + Args: + data: data about interactions + group1: first group + group2: second group + Returns: + formatted string with information. + """ + str_ = ( + " {label1} and {label2} coupled (prot.state): {coupl_fact:>5.2f}\n" + " Energy levels: {def_energy:>6.2f}, {swap_energy:>6.2f} " + "(difference: {diff_energy:>6.2f}) at pH {ph:>6.2f}\n" + " Interaction energy: {int_energy:>6.2f}\n" + " Intrinsic pka's: {pka1:>6.2f}, {pka2:>6.2f} " + "(difference: {diff_pka:>6.2f})\n" + " Swapped pKa's: {swap1:>6.2f}, {swap2:>6.2f} " + "(difference: {shift1:>6.2f}, {shift2:>6.2f})" + ).format( + label1=group1.label, label2=group2.label, + coupl_fact=data['coupling_factor'], + def_energy=data['default_energy'], + swap_energy=data['swapped_energy'], + diff_energy=data['default_energy']-data['swapped_energy'], + ph=data['pH'], int_energy=data['interaction_energy'], + pka1=group1.intrinsic_pka, pka2=group2.intrinsic_pka, + diff_pka=group1.intrinsic_pka-group2.intrinsic_pka, + swap1=data['swapped_pka1'], swap2=data['swapped_pka2'], + shift1=data['pka_shift1'], shift2=data['pka_shift2']) + return str_ - def make_data_to_string(self, data, group1, group2): - s = """ %s and %s coupled (prot.state): %5.2f - Energy levels: %6.2f, %6.2f (difference: %6.2f) at pH %6.2f - Interaction energy: %6.2f - Intrinsic pka's: %6.2f, %6.2f (difference: %6.2f) - Swapped pKa's: %6.2f, %6.2f (difference: %6.2f, %6.2f)"""%(group1.label, - group2.label, - data['coupling_factor'], - data['default_energy'], data['swapped_energy'], - data['default_energy'] - data['swapped_energy'], - data['pH'], - data['interaction_energy'], - group1.intrinsic_pKa, - group2.intrinsic_pKa, - group1.intrinsic_pKa-group2.intrinsic_pKa, - data['swapped_pka1'], - data['swapped_pka2'], - data['pka_shift1'], - data['pka_shift2']) - - return s - - -nccg = non_covalently_couple_groups() +NCCG = NonCovalentlyCoupledGroups() diff --git a/propka/determinant.py b/propka/determinant.py index 5085ca1..5370798 100644 --- a/propka/determinant.py +++ b/propka/determinant.py @@ -1,29 +1,37 @@ +"""Holds the Determinant class + +TODO - it is confusing to have both `determinant.py` and `determinants.py`. +Should these be merged? +""" -from __future__ import division -from __future__ import print_function class Determinant: - """ - Determinant class - set up for later structurization + """Determinant class. + + Appears to be a container for storing information and values about + groups that interact to influence titration states. + + TODO - figure out what this class does. """ def __init__(self, group, value): - """ - Contructer of determinant object - simple, but helps in creating structure! + """Initialize the object. + + Args: + group: group associated with Determinant object + value: value to assign to group """ self.group = group self.label = group.label self.value = value - return - def add(self, value): - """ - adding a value to determinant + """Increment determinant value. + + Args: + value: value to add to determinant """ self.value += value - return - def __str__(self): - return '%s: %8.2f'%(self.label,self.value) + return '{0:s}: {1:8.2f}'.format(self.label, self.value) diff --git a/propka/determinants.py b/propka/determinants.py index 3154875..9a82e33 100644 --- a/propka/determinants.py +++ b/propka/determinants.py @@ -1,19 +1,33 @@ +"""Functions to manipulate Determinant objects. -from __future__ import division -from __future__ import print_function - -import math, time - -import propka.iterative, propka.lib, propka.vector_algebra -import propka.calculations +TODO - it is confusing to have both `determinant.py` and `determinants.py`. +Should these be merged? +""" +import math +import propka.iterative +import propka.lib +import propka.vector_algebra +from propka.calculations import squared_distance, get_smallest_distance +from propka.calculations import angle_distance_factors, hydrogen_bond_energy from propka.determinant import Determinant -def setDeterminants(propka_groups, version=None, options=None): - """ - adding side-chain and coulomb determinants/perturbations to all residues - note, backbone determinants are set separately - """ +# Cutoff for angle factor +# TODO - this constant appears elsewhere in the package. +# It should be moved to a configuration file. +FANGLE_MIN = 0.001 + +def set_determinants(propka_groups, version=None, options=None): + """Add side-chain and coulomb determinants/perturbations to all residues. + + NOTE - backbone determinants are set separately + + Args: + propka_groups: groups to adjust + version: version object + options: options object + """ iterative_interactions = [] # --- NonIterative section ---# for group1 in propka_groups: @@ -23,172 +37,202 @@ def setDeterminants(propka_groups, version=None, options=None): # do not calculate interactions for coupled groups if group2 in group1.covalently_coupled_groups: break - distance = propka.calculations.distance(group1, group2) - if distance < version.parameters.coulomb_cutoff2: - interaction_type = version.parameters.interaction_matrix.get_value(group1.type,group2.type) + interaction_type = ( + version.parameters.interaction_matrix.get_value( + group1.type, group2.type)) if interaction_type == 'I': - propka.iterative.addtoDeterminantList(group1, group2, distance, iterative_interactions, version=version) + propka.iterative.add_to_determinant_list( + group1, group2, distance, iterative_interactions, + version=version) elif interaction_type == 'N': - addDeterminants(group1, group2, distance, version) - - + add_determinants(group1, group2, distance, version) # --- Iterative section ---# - propka.iterative.addDeterminants(iterative_interactions, version, options=options) + propka.iterative.add_determinants(iterative_interactions, version) -def addDeterminants(group1, group2, distance, version): +def add_determinants(group1, group2, distance, version): + """Add determinants and perturbations for distance(R1,R2) < coulomb_cutoff. + + Args: + group1: first group to add + group2: second group to add + distance: distance between groups + version: version object """ - adding determinants/perturbations, distance(R1, R2) < coulomb_cutoff always - """ - # side-chain determinant - addSidechainDeterminants(group1, group2, version) - + add_sidechain_determinants(group1, group2, version) # Coulomb determinant - addCoulombDeterminants(group1, group2, distance, version) + add_coulomb_determinants(group1, group2, distance, version) - return -def addSidechainDeterminants(group1, group2, version=None): +def add_sidechain_determinants(group1, group2, version=None): + """Add side-chain determinants and perturbations. + + NOTE - res_num1 > res_num2 + + Args: + group1: first group to add + group2: second group to add + version: version object """ - adding side-chain determinants/perturbations - Note, res_num1 > res_num2 - """ - hbond_interaction = version.hydrogen_bond_interaction(group1, group2) - if hbond_interaction: - if group1.charge == group2.charge: # acid pair or base pair if group1.model_pka < group2.model_pka: - newDeterminant1 = Determinant(group2, -hbond_interaction) - newDeterminant2 = Determinant(group1, hbond_interaction) + new_determinant1 = Determinant(group2, -hbond_interaction) + new_determinant2 = Determinant(group1, hbond_interaction) else: - newDeterminant1 = Determinant(group2, hbond_interaction) - newDeterminant2 = Determinant(group1, -hbond_interaction) + new_determinant1 = Determinant(group2, hbond_interaction) + new_determinant2 = Determinant(group1, -hbond_interaction) else: - newDeterminant1 = Determinant(group2, hbond_interaction*group1.charge) - newDeterminant2 = Determinant(group1, hbond_interaction*group2.charge) + new_determinant1 = Determinant( + group2, hbond_interaction*group1.charge) + new_determinant2 = Determinant( + group1, hbond_interaction*group2.charge) + group1.determinants['sidechain'].append(new_determinant1) + group2.determinants['sidechain'].append(new_determinant2) - group1.determinants['sidechain'].append(newDeterminant1) - group2.determinants['sidechain'].append(newDeterminant2) - return +def add_coulomb_determinants(group1, group2, distance, version): + """Add non-iterative Coulomb determinants and perturbations. -def addCoulombDeterminants(group1, group2, distance, version): + Args: + group1: first group to add + group2: second group to add + distance: distance between groups + version: version object """ - adding NonIterative Coulomb determinants/perturbations - """ - - coulomb_interaction = version.electrostatic_interaction(group1, group2, distance) - + coulomb_interaction = version.electrostatic_interaction( + group1, group2, distance) if coulomb_interaction: - Q1 = group1.charge - Q2 = group2.charge - + q1 = group1.charge + q2 = group2.charge # assigning the Coulombic interaction - if Q1 < 0.0 and Q2 < 0.0: - """ both are acids """ - addCoulombAcidPair(group1, group2, coulomb_interaction) - elif Q1 > 0.0 and Q2 > 0.0: - """ both are bases """ - addCoulombBasePair(group1, group2, coulomb_interaction) + if q1 < 0.0 and q2 < 0.0: + # both are acids + add_coulomb_acid_pair(group1, group2, coulomb_interaction) + elif q1 > 0.0 and q2 > 0.0: + # both are bases + add_coulomb_base_pair(group1, group2, coulomb_interaction) else: - """ one of each """ - addCoulombIonPair(group1, group2, coulomb_interaction) - - return + # one of each + add_coulomb_ion_pair(group1, group2, coulomb_interaction) -def addCoulombAcidPair(object1, object2, value): +def add_coulomb_acid_pair(object1, object2, value): + """Add the Coulomb interaction (an acid pair). + + The higher pKa is raised. + + Args: + object1: first part of pair + object2: second part of pair + value: determinant value """ - Adding the Coulomb interaction (an acid pair): - the higher pKa is raised - """ - if object1.model_pka > object2.model_pka: - newDeterminant = Determinant(object2, value) - object1.determinants['coulomb'].append(newDeterminant) + new_determinant = Determinant(object2, value) + object1.determinants['coulomb'].append(new_determinant) else: - newDeterminant = Determinant(object1, value) - object2.determinants['coulomb'].append(newDeterminant) + new_determinant = Determinant(object1, value) + object2.determinants['coulomb'].append(new_determinant) -def addCoulombBasePair(object1, object2, value): - """ - Adding the Coulomb interaction (a base pair): - the lower pKa is lowered +def add_coulomb_base_pair(object1, object2, value): + """Add the Coulomb interaction (a base pair). + + The lower pKa is lowered. + + Args: + object1: first part of pair + object2: second part of pair + value: determinant value """ if object1.model_pka < object2.model_pka: - newDeterminant = Determinant(object2, -value) - object1.determinants['coulomb'].append(newDeterminant) + new_determinant = Determinant(object2, -value) + object1.determinants['coulomb'].append(new_determinant) else: - newDeterminant = Determinant(object1, -value) - object2.determinants['coulomb'].append(newDeterminant) + new_determinant = Determinant(object1, -value) + object2.determinants['coulomb'].append(new_determinant) -def addCoulombIonPair(object1, object2, value): +def add_coulomb_ion_pair(object1, object2, value): + """Add the Coulomb interaction (an acid-base pair). + + The pKa of the acid is lowered & the pKa of the base is raised. + + Args: + object1: first part of pair + object2: second part of pair + value: determinant value """ - Adding the Coulomb interaction (an acid-base pair): - the pKa of the acid is lowered & the pKa of the base is raised - """ - # residue1 - Q1 = object1.charge - newDeterminant = Determinant(object2, Q1*value) - object1.determinants['coulomb'].append(newDeterminant) - + q1 = object1.charge + new_determinant = Determinant(object2, q1*value) + object1.determinants['coulomb'].append(new_determinant) # residue2 - Q2 = object2.charge - newDeterminant = Determinant(object1, Q2*value) - object2.determinants['coulomb'].append(newDeterminant) + q2 = object2.charge + new_determinant = Determinant(object1, q2*value) + object2.determinants['coulomb'].append(new_determinant) +def set_ion_determinants(conformation_container, version): + """Add ion determinants and perturbations. - -def setIonDeterminants(conformation_container, version): - """ - adding ion determinants/perturbations + Args: + conformation_container: conformation to set + version: version object """ for titratable_group in conformation_container.get_titratable_groups(): for ion_group in conformation_container.get_ions(): - squared_distance = propka.calculations.squared_distance(titratable_group, ion_group) - if squared_distance < version.parameters.coulomb_cutoff2_squared: - weight = version.calculatePairWeight(titratable_group.Nmass, ion_group.Nmass) - # the pKa of both acids and bases are shifted up by negative ions (and vice versa) - value = (-ion_group.charge) * version.calculateCoulombEnergy(math.sqrt(squared_distance), weight) - newDeterminant = Determinant(ion_group, value) - titratable_group.determinants['coulomb'].append(newDeterminant) + dist_sq = squared_distance(titratable_group, ion_group) + if dist_sq < version.parameters.coulomb_cutoff2_squared: + weight = version.calculate_pair_weight( + titratable_group.num_volume, ion_group.num_volume) + # the pKa of both acids and bases are shifted up by negative + # ions (and vice versa) + value = ( + -ion_group.charge + * version.calculate_coulomb_energy( + math.sqrt(dist_sq), weight)) + new_det = Determinant(ion_group, value) + titratable_group.determinants['coulomb'].append(new_det) - return -def setBackBoneDeterminants(titratable_groups, backbone_groups, version): +def set_backbone_determinants(titratable_groups, backbone_groups, version): + """Set determinants between titrable and backbone groups. + Args: + titratable_groups: list of titratable groups + backbone_groups: list of backbone groups + version: version object + """ for titratable_group in titratable_groups: - titratable_group_interaction_atoms = titratable_group.interaction_atoms_for_acids + titratable_group_interaction_atoms = ( + titratable_group.interaction_atoms_for_acids) if not titratable_group_interaction_atoms: continue - # find out which backbone groups this titratable is interacting with for backbone_group in backbone_groups: # find the interacting atoms - backbone_interaction_atoms = backbone_group.get_interaction_atoms(titratable_group) + backbone_interaction_atoms = ( + backbone_group.get_interaction_atoms(titratable_group)) if not backbone_interaction_atoms: continue - # find the smallest distance - [backbone_atom, distance, titratable_atom] = propka.calculations.get_smallest_distance(backbone_interaction_atoms, - titratable_group_interaction_atoms) + [backbone_atom, distance, titratable_atom] = ( + get_smallest_distance( + backbone_interaction_atoms, + titratable_group_interaction_atoms)) # get the parameters - parameters = version.get_backbone_hydrogen_bond_parameters(backbone_atom, titratable_atom) + parameters = ( + version.get_backbone_hydrogen_bond_parameters( + backbone_atom, titratable_atom)) if not parameters: continue - [dpKa_max, [cutoff1, cutoff2]] = parameters - - + [dpka_max, [cutoff1, cutoff2]] = parameters if distance < cutoff2: # calculate angle factor f_angle = 1.0 @@ -202,19 +246,20 @@ def setBackBoneDeterminants(titratable_groups, backbone_groups, version): # || # C if backbone_group.type == 'BBC': - if titratable_group.type in version.parameters.angular_dependent_sidechain_interactions: + if (titratable_group.type + in version.parameters.angular_dependent_sidechain_interactions): if titratable_atom.element == 'H': - heavy_atom = titratable_atom.bonded_atoms[0] + heavy_atom = titratable_atom.bonded_atoms[0] hydrogen_atom = titratable_atom - [d1, f_angle, d2] = propka.calculations.AngleFactorX(atom1=heavy_atom, - atom2=hydrogen_atom, - atom3=backbone_atom) + [_, f_angle, _] = angle_distance_factors( + atom1=heavy_atom, atom2=hydrogen_atom, + atom3=backbone_atom) else: - # Either the structure is corrupt (no hydrogen), or the heavy atom is closer to - # the titratable atom than the hydrogen. In either case we set the angle factor - # to 0 + # Either the structure is corrupt (no hydrogen), + # or the heavy atom is closer to the titratable + # atom than the hydrogen. In either case we set the + # angle factor to 0 f_angle = 0.0 - # for BBN groups, the hydrogen is on the backbone group # # Titra. @@ -225,23 +270,22 @@ def setBackBoneDeterminants(titratable_groups, backbone_groups, version): # / \ if backbone_group.type == 'BBN': if backbone_atom.element == 'H': - backbone_N = backbone_atom.bonded_atoms[0] - backbone_H = backbone_atom - [d1, f_angle, d2] = propka.calculations.AngleFactorX(atom1=titratable_atom, - atom2=backbone_H, - atom3=backbone_N) + backbone_n = backbone_atom.bonded_atoms[0] + backbone_h = backbone_atom + [_, f_angle, _] = ( + angle_distance_factors( + atom1=titratable_atom, atom2=backbone_h, + atom3=backbone_n)) else: - # Either the structure is corrupt (no hydrogen), or the heavy atom is closer to - # the titratable atom than the hydrogen. In either case we set the angle factor - # to 0 + # Either the structure is corrupt (no hydrogen), or the + # heavy atom is closer to the titratable atom than the + # hydrogen. In either case we set the angle factor to 0 f_angle = 0.0 - - - if f_angle > 0.001: - value = titratable_group.charge * propka.calculations.HydrogenBondEnergy(distance, dpKa_max, [cutoff1,cutoff2], f_angle) - - newDeterminant = Determinant(backbone_group, value) - titratable_group.determinants['backbone'].append(newDeterminant) - - - return + if f_angle > FANGLE_MIN: + value = ( + titratable_group.charge + * hydrogen_bond_energy( + distance, dpka_max, [cutoff1, cutoff2], f_angle)) + new_determinant = Determinant(backbone_group, value) + titratable_group.determinants['backbone'].append( + new_determinant) diff --git a/propka/group.py b/propka/group.py index 14cc5ec..465c195 100644 --- a/propka/group.py +++ b/propka/group.py @@ -1,336 +1,322 @@ -# -# Class for storing groups important for propka calculations -# - -from __future__ import division -from __future__ import print_function - -import propka.ligand, propka.determinant, propka.ligand_pka_values, math, propka.protonate +"""Routines and classes for storing groups important to PROPKA calculations.""" +import math +import propka.ligand +import propka.protonate +from propka.ligand_pka_values import LigandPkaValues +from propka.determinant import Determinant from propka.lib import info, warning -my_protonator = propka.protonate.Protonate(verbose=False) - -expected_atoms_acid_interactions = { - 'COO':{'O':2}, - 'HIS':{'H':2, 'N':2}, - 'CYS':{'S':1}, - 'TYR':{'O':1}, - 'LYS':{'N':1}, - 'ARG':{'H':5, 'N':3}, - 'ROH':{'O':1}, - 'AMD':{'H':2, 'N':1}, - 'TRP':{'H':1, 'N':1}, - 'N+': {'N':1}, - 'C-': {'O':2}, - 'BBN':{'H':1, 'N':1,}, - 'BBC':{'O':1}, - 'NAR':{'H':1, 'N':1}, - 'NAM':{'H':1, 'N':1}, - 'F': {'F':1}, - 'Cl': {'Cl':1}, - 'OH': {'H':1, 'O':1}, - 'OP': {'O':1}, - 'O3': {'O':1}, - 'O2': {'O':1}, - 'SH': {'S':1}, - 'CG': {'H':5, 'N':3}, - 'C2N':{'H':4, 'N':2}, - 'OCO':{'O':2}, - 'N30':{'H':4, 'N':1}, - 'N31':{'H':3, 'N':1}, - 'N32':{'H':2, 'N':1}, - 'N33':{'H':1, 'N':1}, - 'NP1':{'H':2, 'N':1}, - 'N1' :{'N':1} -} - -expected_atoms_base_interactions = { - 'COO':{'O':2}, - 'HIS':{'N':2}, - 'CYS':{'S':1}, - 'TYR':{'O':1}, - 'LYS':{'N':1}, - 'ARG':{'N':3}, - 'ROH':{'O':1}, - 'AMD':{'O':1}, - 'TRP':{'N':1}, - 'N+': {'N':1}, - 'C-': {'O':2}, - 'BBN':{'H':1, 'N':1,}, - 'BBC':{'O':1}, - 'NAR':{'H':1, 'N':1}, - 'NAM':{'H':1, 'N':1}, - 'F': {'F':1}, - 'Cl': {'Cl':1}, - 'OH': {'H':1, 'O':1}, - 'OP': {'O':1}, - 'O3': {'O':1}, - 'O2': {'O':1}, - 'SH': {'S':1}, - 'CG': {'N':3}, - 'C2N':{'N':2}, - 'OCO':{'O':2}, - 'N30':{'N':1}, - 'N31':{'N':1}, - 'N32':{'N':1}, - 'N33':{'N':1}, - 'NP1':{'N':1}, - 'N1' :{'N':1} -} +# Constants that start with "UNK_" are a mystery to me +UNK_PKA_SCALING = -1.36 +PROTONATOR = propka.protonate.Protonate(verbose=False) +EXPECTED_ATOMS_ACID_INTERACTIONS = { + 'COO': {'O': 2}, 'HIS': {'H': 2, 'N': 2}, 'CYS': {'S': 1}, 'TYR': {'O': 1}, + 'LYS': {'N': 1}, 'ARG': {'H': 5, 'N': 3}, 'ROH': {'O': 1}, + 'AMD': {'H': 2, 'N': 1}, 'TRP': {'H': 1, 'N': 1}, 'N+': {'N': 1}, + 'C-': {'O': 2}, 'BBN': {'H': 1, 'N': 1,}, 'BBC': {'O': 1}, + 'NAR': {'H': 1, 'N': 1}, 'NAM': {'H': 1, 'N': 1}, 'F': {'F': 1}, + 'Cl': {'Cl': 1}, 'OH': {'H': 1, 'O': 1}, 'OP': {'O': 1}, 'O3': {'O': 1}, + 'O2': {'O': 1}, 'SH': {'S': 1}, 'CG': {'H': 5, 'N': 3}, + 'C2N': {'H': 4, 'N': 2}, 'OCO': {'O': 2}, 'N30': {'H': 4, 'N': 1}, + 'N31': {'H': 3, 'N': 1}, 'N32': {'H': 2, 'N': 1}, 'N33': {'H': 1, 'N': 1}, + 'NP1': {'H': 2, 'N': 1}, 'N1': {'N': 1}} +EXPECTED_ATOMS_BASE_INTERACTIONS = { + 'COO': {'O': 2}, 'HIS': {'N': 2}, 'CYS': {'S': 1}, 'TYR': {'O': 1}, + 'LYS': {'N': 1}, 'ARG': {'N': 3}, 'ROH': {'O': 1}, 'AMD': {'O': 1}, + 'TRP': {'N': 1}, 'N+': {'N': 1}, 'C-': {'O': 2}, 'BBN': {'H': 1, 'N': 1}, + 'BBC': {'O': 1}, 'NAR': {'H': 1, 'N': 1}, 'NAM': {'H': 1, 'N': 1}, + 'F': {'F': 1}, 'Cl': {'Cl': 1}, 'OH': {'H': 1, 'O': 1}, 'OP': {'O': 1}, + 'O3': {'O': 1}, 'O2': {'O': 1}, 'SH': {'S': 1}, 'CG': {'N': 3}, + 'C2N': {'N': 2}, 'OCO': {'O': 2}, 'N30': {'N': 1}, 'N31': {'N': 1}, + 'N32': {'N': 1}, 'N33': {'N': 1}, 'NP1': {'N': 1}, 'N1': {'N': 1}} class Group: + """Class for storing groups important to pKa calculations.""" + def __init__(self, atom): - #info('Made new %s group from %s'%(type,atom)) + """Initialize with an atom. + + Args: + atom: atom object + """ self.atom = atom self.type = '' atom.group = self - # set up data structures - self.determinants = {'sidechain':[],'backbone':[],'coulomb':[]} + self.determinants = {'sidechain': [], 'backbone': [], 'coulomb': []} self.pka_value = 0.0 self.model_pka = 0.0 - - self.Emass = 0.0 - self.Nmass = 0.0 - self.Elocl = 0.0 - self.Nlocl = 0.0 + # Energy associated with volume interactions + self.energy_volume = 0.0 + # Number of atoms associated with volume interactions + self.num_volume = 0.0 + # Energy associated with local interactions + self.energy_local = 0.0 + # Number of atoms associated with local interactions + self.num_local = 0.0 self.buried = 0.0 self.x = 0.0 self.y = 0.0 self.z = 0.0 self.charge = 0 + self.parameters = None + self.exclude_cys_from_results = None self.interaction_atoms_for_acids = [] self.interaction_atoms_for_bases = [] self.model_pka_set = False - + self.intrinsic_pka = None + self.titratable = None # information on covalent and non-covalent coupling self.non_covalently_coupled_groups = [] self.covalently_coupled_groups = [] self.coupled_titrating_group = None self.common_charge_centre = False - - self.residue_type = self.atom.res_name if self.atom.terminal: self.residue_type = self.atom.terminal - - if self.atom.type=='atom': - self.label = '%-3s%4d%2s'%(self.residue_type, atom.res_num, atom.chain_id) - elif self.atom.res_name in ['DA ','DC ','DG ','DT ']: - self.label = '%1s%1s%1s%4d%2s'%(self.residue_type[1], - atom.element, - atom.name.replace('\'','')[-1], - atom.res_num, - atom.chain_id) - -# self.label = '%1s%1s%1s%4d%2s'%(self.residue_type[1], atom.element,atom.name[-1], atom.res_num, atom.chain_id) + if self.atom.type == 'atom': + fmt = "{g.residue_type:<3s}{a.res_num:>4d}{a.chain_id:>2s}" + self.label = fmt.format(g=self, a=atom) + elif self.atom.res_name in ['DA ', 'DC ', 'DG ', 'DT ']: + fmt = "{type:1s}{elem:1s}{name:1s}{res_num:>4d}{chain:>2s}" + self.label = fmt.format( + type=self.residue_type[1], elem=atom.element, + name=atom.name.replace('\'', '')[-1], res_num=atom.res_num, + chain=atom.chain_id) else: - self.label = '%-3s%4s%2s'%(self.residue_type, atom.name, atom.chain_id) - - + fmt = "{type:<3s}{name:>4s}{chain:>2s}" + self.label = fmt.format( + type=self.residue_type, name=atom.name, chain=atom.chain_id) # container for squared distances self.squared_distances = {} - return - - - # - # Coupling-related methods - # def couple_covalently(self, other): - """ Couple this group with another group """ + """Couple this group with another group. + + Args: + other: other group for coupling + """ # do the coupling if not other in self.covalently_coupled_groups: self.covalently_coupled_groups.append(other) - if not self in other.covalently_coupled_groups: other.covalently_coupled_groups.append(self) - return - def couple_non_covalently(self, other): - """ Couple this group with another group """ + """Non-covalenthly couple this group with another group. + + Args: + other: other group for coupling + """ # do the coupling if not other in self.non_covalently_coupled_groups: self.non_covalently_coupled_groups.append(other) - if not self in other.non_covalently_coupled_groups: other.non_covalently_coupled_groups.append(self) - return + def get_covalently_coupled_groups(self): + """Get covalently coupled groups. - def get_covalently_coupled_groups(self): return self.covalently_coupled_groups - def get_non_covalently_coupled_groups(self): return self.non_covalently_coupled_groups + Returns: + list of covalently coupled groups. + """ + return self.covalently_coupled_groups + def get_non_covalently_coupled_groups(self): + """Get non-covalently coupled groups. + + Returns: + list of covalently coupled groups. + """ + return self.non_covalently_coupled_groups def share_determinants(self, others): + """Share determinants between this group and others. + Args: + others: list of other groups + """ # for each determinant type for other in others: if other == self: + the_other = other continue - - for type in ['sidechain','backbone','coulomb']: - for g in other.determinants[type]: self.share_determinant(g,type) - + for type_ in ['sidechain', 'backbone', 'coulomb']: + for det in other.determinants[type_]: + self.share_determinant(det, type_) # recalculate pka values self.calculate_total_pka() - other.calculate_total_pka() + the_other.calculate_total_pka() - return + def share_determinant(self, new_determinant, type_): + """Add determinant to this group's list of determinants. - - def share_determinant(self, new_determinant, type): + Args: + new_determinant: determinant to add + type_: type of determinant + """ added = False # first check if we already have a determinant with this label - for own_determinant in self.determinants[type]: + for own_determinant in self.determinants[type_]: if own_determinant.group == new_determinant.group: # if so, find the average value - avr = (own_determinant.value + new_determinant.value)/2.0 + avr = 0.5*(own_determinant.value + new_determinant.value) own_determinant.value = avr new_determinant.value = avr added = True - # otherwise we just add the determinant to our list if not added: - self.determinants[type].append(propka.determinant.Determinant(new_determinant.group, - new_determinant.value)) - - return + self.determinants[type_].append( + Determinant(new_determinant.group, new_determinant.value)) def make_covalently_coupled_line(self): + """Create line for covalent coupling. + Returns: + string + """ # first check if there are any coupled groups at all if len(self.covalently_coupled_groups) == 0: return '' - - line = 'CCOUPL%5d'%self.atom.numb - + line = 'CCOUPL{0:5d}'.format(self.atom.numb) # extract and sort numbers of coupled groups coupled = [] - for g in self.covalently_coupled_groups: - coupled.append(g.atom.numb) + for group in self.covalently_coupled_groups: + coupled.append(group.atom.numb) coupled.sort() - # write 'em out - for b in coupled: - line += '%5d'%b + for num in coupled: + line += '{0:5d}'.format(num) line += '\n' - return line def make_non_covalently_coupled_line(self): + """Create line for non-covalent coupling. + + Returns: + string + """ # first check if there are any coupled groups at all if len(self.non_covalently_coupled_groups) == 0: return '' - - line = 'NCOUPL%5d'%self.atom.numb - + line = 'NCOUPL{0:5d}'.format(self.atom.numb) # extract and sort numbers of coupled groups coupled = [] - for g in self.non_covalently_coupled_groups: - coupled.append(g.atom.numb) + for group in self.non_covalently_coupled_groups: + coupled.append(group.atom.numb) coupled.sort() - # write 'em out - for b in coupled: - line += '%5d'%b + for num in coupled: + line += '{0:5d}'.format(num) line += '\n' - - return line - # - # Bookkeeping methods - # def __eq__(self, other): - """ - Check if two groups should be considered identical - """ + """Needed for creating sets of groups.""" if self.atom.type == 'atom': # In case of protein atoms we trust the labels - return self.label==other.label + return self.label == other.label else: # For heterogene atoms we also need to check the residue number - return self.label==other.label and self.atom.res_num == other.atom.res_num + return ( + (self.label == other.label) + and (self.atom.res_num == other.atom.res_num)) def __hash__(self): - """ Needed together with __eq__ - otherwise we can't make sets of groups """ + """Needed for creating sets of groups.""" return id(self) def __iadd__(self, other): if self.type != other.type: - raise Exception('Cannot add groups of different types (%s and %s)'%(self.type,other.type)) - + str_ = ( + 'Cannot add groups of different types ' + '({0:s} and {1:s})'.format(self.type, other.type)) + raise Exception(str_) # add all values self.pka_value += other.pka_value - self.Nmass += other.Nmass - self.Emass += other.Emass - self.Nlocl += other.Nlocl - self.Elocl += other.Elocl + self.num_volume += other.num_volume + self.energy_volume += other.energy_volume + self.num_local += other.num_local + self.energy_local += other.energy_local self.buried += other.buried # and add all determinants - for type in ['sidechain','backbone','coulomb']: - for determinant in other.determinants[type]: - self.add_determinant(determinant, type) + # TODO - list ['sidechain', 'backbone', 'coulomb'] should be constant + # This list appears all over the code and should be moved to a constant + # higher in the package + for type_ in ['sidechain', 'backbone', 'coulomb']: + for determinant in other.determinants[type_]: + self.add_determinant(determinant, type_) return self + def add_determinant(self, new_determinant, type_): + """Add to current and creates non-present determinants. - def add_determinant(self, new_determinant, type): - """ Adds to current and creates non-present determinants""" + Args: + new_determinant: new determinant to add + type_: determinant type + """ # first check if we already have a determinant with this label - for own_determinant in self.determinants[type]: + for own_determinant in self.determinants[type_]: if own_determinant.group == new_determinant.group: # if so, add the value own_determinant.value += new_determinant.value return - # otherwise we just add the determinant to our list - self.determinants[type].append(propka.determinant.Determinant(new_determinant.group, - new_determinant.value)) + self.determinants[type_].append(Determinant(new_determinant.group, + new_determinant.value)) - return + def set_determinant(self, new_determinant, type_): + """Overwrite current and create non-present determinants. - def set_determinant(self, new_determinant, type): - """ Overwrites current and creates non-present determinants""" + Args: + new_determinant: new determinant to add + type_: determinant type + """ # first check if we already have a determinant with this label - for own_determinant in self.determinants[type]: + for own_determinant in self.determinants[type_]: if own_determinant.group == new_determinant.group: # if so, overwrite the value own_determinant.value = new_determinant.value return - # otherwise we just add the determinant to our list - self.determinants[type].append(propka.determinant.Determinant(new_determinant.group, - new_determinant.value)) - - return + self.determinants[type_].append(Determinant(new_determinant.group, + new_determinant.value)) def remove_determinants(self, labels): - """ removes all determinants with label in labels """ - for type in ['sidechain','backbone','coulomb']: - matches = list(filter(lambda d: d.label in labels, [d for d in self.determinants[type]])) - for m in matches: self.determinants[type].remove(m) + """Remove all determinants with specified labels. - return + Args: + labels: list of labels to remove + """ + for type_ in ['sidechain', 'backbone', 'coulomb']: + matches = list( + filter(lambda d: d.label + in labels, [d for d in self.determinants[type_]])) + for match in matches: + self.determinants[type_].remove(match) def __truediv__(self, value): value = float(value) # divide all values self.pka_value /= value - self.Nmass /= value - self.Emass /= value - self.Nlocl /= value - self.Elocl /= value + self.num_volume /= value + self.energy_volume /= value + self.num_local /= value + self.energy_local /= value self.buried /= value # and all determinants - for type in ['sidechain','backbone','coulomb']: - for determinant in self.determinants[type]: + for type_ in ['sidechain', 'backbone', 'coulomb']: + for determinant in self.determinants[type_]: determinant.value /= value return self def clone(self): + """Create a copy of this group. + + Returns: + Copy of this group. + """ res = Group(self.atom) res.type = self.type res.residue_type = self.residue_type @@ -344,1005 +330,1090 @@ class Group: return res def setup(self): + """Set up a group.""" # set the charges if self.type in self.parameters.charge.keys(): self.charge = self.parameters.charge[self.type] if self.residue_type in self.parameters.ions.keys(): self.charge = self.parameters.ions[self.residue_type] - - #info('ION setup',self,self.residue_type, self.charge) - # find the center and the interaction atoms self.setup_atoms() - # set the model pka value self.titratable = False if self.residue_type in self.parameters.model_pkas.keys(): if not self.model_pka_set: self.model_pka = self.parameters.model_pkas[self.residue_type] # check if we should apply a custom model pka - key = '%s-%s'%(self.atom.res_name.strip(), self.atom.name.strip()) + key = '{0:s}-{1:s}'.format( + self.atom.res_name.strip(), + self.atom.name.strip()) if key in self.parameters.custom_model_pkas.keys(): self.model_pka = self.parameters.custom_model_pkas[key] - self.model_pka_set = True - if self.model_pka_set and not self.atom.cysteine_bridge: self.titratable = True self.exclude_cys_from_results = False - return - def setup_atoms(self): - # This method is overwritten for some types of groups + """Set up atoms in group. + + This method is overwritten for some types of groups + """ # set the center at the position of the main atom self.set_center([self.atom]) # set the main atom as interaction atom self.set_interaction_atoms([self.atom], [self.atom]) - return - def set_interaction_atoms(self, interaction_atoms_for_acids, interaction_atoms_for_bases): - [a.set_group_type(self.type) for a in interaction_atoms_for_acids+interaction_atoms_for_bases] + def set_interaction_atoms(self, interaction_atoms_for_acids, + interaction_atoms_for_bases): + """Set interacting atoms and group types. + Args: + interaction_atoms_for_acids: list of atoms for acid interactions + interaction_atoms_for_base: list of atoms for base interactions + """ + for atom in interaction_atoms_for_acids + interaction_atoms_for_bases: + atom.set_group_type(self.type) self.interaction_atoms_for_acids = interaction_atoms_for_acids self.interaction_atoms_for_bases = interaction_atoms_for_bases - # check if all atoms have been identified ok = True - for [expect, found, t] in [[expected_atoms_acid_interactions, self.interaction_atoms_for_acids, 'acid'], - [expected_atoms_base_interactions, self.interaction_atoms_for_bases, 'base']]: + for [expect, found, _] in [[EXPECTED_ATOMS_ACID_INTERACTIONS, + self.interaction_atoms_for_acids, 'acid'], + [EXPECTED_ATOMS_BASE_INTERACTIONS, + self.interaction_atoms_for_bases, 'base']]: if self.type in expect.keys(): - for e in expect[self.type].keys(): - if len([a for a in found if a.element==e]) != expect[self.type][e]: + for elem in expect[self.type].keys(): + if (len([a for a in found if a.element == elem]) + != expect[self.type][elem]): ok = False - if not ok: - warning('Missing atoms or failed protonation for %s (%s) -- please check the structure' % (self.label, self.type)) - warning('%s' % self) - Na = sum([expected_atoms_acid_interactions[self.type][e] for e in expected_atoms_acid_interactions[self.type].keys()]) - Nb = sum([expected_atoms_base_interactions[self.type][e] for e in expected_atoms_base_interactions[self.type].keys()]) - - warning('Expected %d interaction atoms for acids, found:' % Na) + str_ = 'Missing atoms or failed protonation for ' + str_ += '{0:s} ({1:s}) -- please check the structure'.format( + self.label, self.type) + warning(str_) + warning('{0:s}'.format(str(self))) + num_acid = sum( + [EXPECTED_ATOMS_ACID_INTERACTIONS[self.type][e] + for e in EXPECTED_ATOMS_ACID_INTERACTIONS[self.type].keys()]) + num_base = sum( + [EXPECTED_ATOMS_BASE_INTERACTIONS[self.type][e] + for e in EXPECTED_ATOMS_BASE_INTERACTIONS[self.type].keys()]) + warning( + 'Expected {0:d} interaction atoms for acids, found:'.format( + num_acid)) for i in range(len(self.interaction_atoms_for_acids)): - warning(' %s' % self.interaction_atoms_for_acids[i]) - - warning('Expected %d interaction atoms for bases, found:' % Nb) + warning( + ' {0:s}'.format( + str(self.interaction_atoms_for_acids[i]))) + warning( + 'Expected {0:d} interaction atoms for bases, found:'.format( + num_base)) for i in range(len(self.interaction_atoms_for_bases)): - warning(' %s' % self.interaction_atoms_for_bases[i]) - - - #return - - return + warning( + ' {0:s}'.format( + str(self.interaction_atoms_for_bases[i]))) def get_interaction_atoms(self, interacting_group): + """Get atoms involved in interaction with other group. + + Args: + interacting_group: other group + Returns: + list of atoms + """ if interacting_group.residue_type in self.parameters.base_list: return self.interaction_atoms_for_bases else: - return self.interaction_atoms_for_acids #default is acid interaction atoms - cf. 3.0 + # default is acid interaction atoms - cf. 3.0 + return self.interaction_atoms_for_acids def set_center(self, atoms): + """Set center of group based on atoms. + + Args: + atoms: list of atoms + """ if not atoms: raise ValueError("At least one atom must be specified") - # reset center - self.x = 0.0; self.y = 0.0; self.z = 0.0 - - # find the average positon of atoms + self.x = 0.0 + self.y = 0.0 + self.z = 0.0 + # find the average position of atoms for atom in atoms: - self.x += atom.x; self.y += atom.y; self.z += atom.z - + self.x += atom.x + self.y += atom.y + self.z += atom.z self.x /= float(len(atoms)) self.y /= float(len(atoms)) self.z /= float(len(atoms)) - return + def get_determinant_string(self, remove_penalised_group=False): + """Create a string to identify this determinant. - def getDeterminantString(self, remove_penalised_group=False): + Args: + remove_penalised_group: Boolean flag to remove penalized groups + Returns: + string + """ if self.coupled_titrating_group and remove_penalised_group: return '' - number_of_sidechain = len(self.determinants['sidechain']) - number_of_backbone = len(self.determinants['backbone']) - number_of_coulomb = len(self.determinants['coulomb']) - - number_of_lines = max(1, number_of_sidechain, number_of_backbone, number_of_coulomb) - str = "" + number_of_backbone = len(self.determinants['backbone']) + number_of_coulomb = len(self.determinants['coulomb']) + number_of_lines = max(1, number_of_sidechain, number_of_backbone, + number_of_coulomb) + str_ = "" for line_number in range(number_of_lines): - str += "%s" % (self.label) + str_ += "{0:s}".format(self.label) if line_number == 0: - str += " %6.2lf" %(self.pka_value) - if len(self.non_covalently_coupled_groups)>0: - str+='*' + str_ += " {0:6.2f}".format(self.pka_value) + if len(self.non_covalently_coupled_groups) > 0: + str_ += '*' else: - str+=' ' - - # if self.atom.cysteine_bridge: - # str += " BONDED " - # else: - str += " %4d%2s " % (int(100.0*self.buried), "%") - - str += " %6.2lf %4d" % (self.Emass, self.Nmass) - str += " %6.2lf %4d" % (self.Elocl, self.Nlocl) + str_ += ' ' + str_ += " {0:4d}{1:>2s} ".format(int(100.0*self.buried), "%") + str_ += " {0:6.2f} {1:4d}".format( + self.energy_volume, int(self.num_volume)) + str_ += " {0:6.2f} {1:4d}".format( + self.energy_local, int(self.num_local)) else: - str += "%40s" % (" ") - + str_ += "{0:>40s}".format(" ") # add the determinants - for type in ['sidechain','backbone','coulomb']: - str += self.get_determinant_for_string(type,line_number) - + for type_ in ['sidechain', 'backbone', 'coulomb']: + str_ += self.get_determinant_for_string(type_, line_number) # adding end-of-line - str += "\n" + str_ += "\n" + str_ += "\n" + return str_ - str += "\n" + def get_determinant_for_string(self, type_, number): + """Return a string describing determinant. - return str - - def get_determinant_for_string(self, type, number): - if number >= len(self.determinants[type]): - empty_determinant = "%s%4d%2s" % ("XXX", 0, "X") - return "%8.2lf %s" % (0.0, empty_determinant) + Args: + type_: determinant type + number: determinant index number + Returns: + string + """ + if number >= len(self.determinants[type_]): + return " 0.00 XXX 0 X" else: - determinant = self.determinants[type][number] - return "%8.2lf %s" % (determinant.value, determinant.label) - + determinant = self.determinants[type_][number] + return "{0:8.2f} {1:s}".format( + determinant.value, determinant.label) def calculate_total_pka(self): + """Calculate total pKa based on determinants associated with this + group.""" # if this is a cysteine involved in a di-sulphide bond if self.atom.cysteine_bridge: self.pka_value = 99.99 return - - - self.pka_value = self.model_pka + self.Emass + self.Elocl - + self.pka_value = ( + self.model_pka + self.energy_volume + self.energy_local) for determinant_type in ['sidechain', 'backbone', 'coulomb']: for determinant in self.determinants[determinant_type]: self.pka_value += determinant.value - return - - - - def calculate_intrinsic_pka(self): + """Calculate the intrinsic pKa values from the desolvation + determinants, back-bone hydrogen bonds, and side-chain hydrogen bonds + to non-titratable residues. """ - Calculates the intrinsic pKa values from the desolvation determinants, back-bone hydrogen bonds, - and side-chain hydrogen bond to non-titratable residues - """ - back_bone = 0.0 + back_bone = 0.0 for determinant in self.determinants['backbone']: value = determinant.value back_bone += value - side_chain = 0.0 for determinant in self.determinants['sidechain']: - if determinant.label[0:3] not in ['ASP','GLU','LYS','ARG','HIS','CYS','TYR','C- ','N+ ']: + if determinant.label[0:3] not in [ + 'ASP', 'GLU', 'LYS', 'ARG', 'HIS', 'CYS', 'TYR', 'C- ', + 'N+ ']: value = determinant.value side_chain += value + self.intrinsic_pka = ( + self.model_pka + self.energy_volume + self.energy_local + + back_bone + side_chain) - self.intrinsic_pKa = self.model_pka + self.Emass + self.Elocl + back_bone + side_chain + def get_summary_string(self, remove_penalised_group=False): + """Create summary string for this group. - return - - - - - - def getSummaryString(self, remove_penalised_group=False): + Args: + remove_penalised_group: Boolean to ignore penalized groups + Returns: + string + """ if self.coupled_titrating_group and remove_penalised_group: return '' - ligand_type = '' if self.atom.type == 'hetatm': ligand_type = self.type - penalty = '' if self.coupled_titrating_group: - penalty = ' NB: Discarded due to coupling with %s'%self.coupled_titrating_group.label - - str = " %9s %8.2lf %10.2lf %18s %s\n" % (self.label, - self.pka_value, - self.model_pka,ligand_type, - penalty) - - return str + penalty = ( + ' NB: Discarded due to coupling with {0:s}'.format( + self.coupled_titrating_group.label)) + fmt = ( + " {g.label:>9s} {g.pka_value:8.2f} {g.model_pka:10.2f} " + "{type:>18s} {penalty:s}\n") + return fmt.format(g=self, type=ligand_type, penalty=penalty) def __str__(self): - return 'Group (%s) for %s'%(self.type,self.atom) + str_ = 'Group ({0:s}) for {1:s}'.format(self.type, str(self.atom)) + return str_ + def calculate_folding_energy(self, parameters, ph=None, reference=None): + """Return the electrostatic energy of this residue at specified pH. - - # - # Energy-related methods - # - - def calculate_folding_energy(self, parameters, pH=None, reference=None): + Args: + parameters: parameters for energy calculation + ph: pH value for calculation + reference: reference state for calculation + Returns: + float describing energy """ - returning the electrostatic energy of this residue at pH 'pH' - """ - if pH == None: - pH = parameters.pH - if reference == None: + if ph is None: + ph = parameters.pH + if reference is None: reference = parameters.reference - # If not titratable, the contribution is zero - if not self.titratable: return 0.00 - - # calculating the ddG(neutral --> low-pH) contribution - ddG_neutral = 0.00 + # calculating the ddg(neutral --> low-pH) contribution + ddg_neutral = 0.00 if reference == 'neutral' and self.charge > 0.00: pka_prime = self.pka_value for determinant in self.determinants['coulomb']: if determinant.value > 0.00: pka_prime -= determinant.value - ddG_neutral = -1.36*(pka_prime - self.model_pka) - - # calculating the ddG(low-pH --> pH) contribution + ddg_neutral = UNK_PKA_SCALING*(pka_prime - self.model_pka) + # calculating the ddg(low-pH --> pH) contribution # folded - x = pH - self.pka_value - y = 10**x - Q_pro = math.log10(1+y) - + dpka = ph - self.pka_value + conc_ratio = 10**dpka + q_pro = math.log10(1+conc_ratio) # unfolded - x = pH - self.model_pka - y = 10**x - Q_mod = math.log10(1+y) + dpka = ph - self.model_pka + conc_ratio = 10**dpka + q_mod = math.log10(1+conc_ratio) + ddg_low = UNK_PKA_SCALING*(q_pro - q_mod) + ddg = ddg_neutral + ddg_low + return ddg - ddG_low = -1.36*(Q_pro - Q_mod) - ddG = ddG_neutral + ddG_low - - return ddG - - def calculate_charge(self, parmaeters, pH=7.0, state='folded'): + def calculate_charge(self, _, ph=7.0, state='folded'): + """Calculate the charge of the specified state at the specified pH. + Args: + _: parameters for calculation + ph: pH value + state: "folded" or "unfolded" + Returns: + float with charge + """ if state == "unfolded": - x = self.charge * (self.model_pka - pH) + q_dpka = self.charge * (self.model_pka - ph) else: - x = self.charge * (self.pka_value - pH) - - y = 10**x - charge = self.charge*(y/(1.0+y)) - + q_dpka = self.charge * (self.pka_value - ph) + conc_ratio = 10**q_dpka + charge = self.charge*(conc_ratio/(1.0+conc_ratio)) return charge def use_in_calculations(self): + """Indicate whether group should be included in results report. + + If --titrate_only option is specified, only residues that are + titratable and are in that list are included; otherwise all titratable + residues and CYS residues are included. """ - Whether this group should be included in the results report. If - --titrate_only option is specified, only residues that are titratable - and are in that list are included; otherwise all titratable residues - and CYS residues are included. - """ - return self.titratable or (self.residue_type == 'CYS' and \ - not self.exclude_cys_from_results) + return self.titratable or ( + self.residue_type == 'CYS' and not self.exclude_cys_from_results) -class COO_group(Group): +class COOGroup(Group): + """Carboxyl group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'COO' def setup_atoms(self): + """Set up group.""" # Identify the two caroxyl oxygen atoms the_oxygens = self.atom.get_bonded_elements('O') - # set the center using the two oxygen carboxyl atoms (if present) if the_oxygens: self.set_center(the_oxygens) else: self.set_center([self.atom]) - # FIXME perhaps it would be better to ignore this group completely + # TODO - perhaps it would be better to ignore this group completely # if the oxygen is missing from this residue? - self.set_interaction_atoms(the_oxygens, the_oxygens) - return -class HIS_group(Group): +class HISGroup(Group): + """Histidine group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'HIS' def setup_atoms(self): + """Set up atoms in group.""" # Find the atoms in the histidine ring ring_atoms = propka.ligand.is_ring_member(self.atom) if len(ring_atoms) != 5: warning('His group does not seem to contain a ring', self) - # protonate ring - for r in ring_atoms: - my_protonator.protonate_atom(r) - + for ring_atom in ring_atoms: + PROTONATOR.protonate_atom(ring_atom) # set the center using the ring atoms if ring_atoms: self.set_center(ring_atoms) else: # Missing side-chain atoms self.set_center([self.atom]) - # FIXME perhaps it would be better to ignore this group completely? - + # TODO - perhaps it would be better to ignore this group completely? # find the hydrogens on the ring-nitrogens hydrogens = [] nitrogens = [ra for ra in ring_atoms if ra.element == 'N'] - for nitrogen in nitrogens: hydrogens.extend(nitrogen.get_bonded_elements('H')) - self.set_interaction_atoms(hydrogens+nitrogens, nitrogens) - return +class CYSGroup(Group): + """Cysteine group.""" -class CYS_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'CYS' -class TYR_group(Group): +class TYRGroup(Group): + """Tyrosine group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'TYR' -class LYS_group(Group): +class LYSGroup(Group): + """Lysine group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'LYS' -class ARG_group(Group): +class ARGGroup(Group): + """Arginine group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'ARG' def setup_atoms(self): + """Set up group.""" # set the center at the position of the main atom self.set_center([self.atom]) - # find all the hydrogens on the nitrogen atoms nitrogens = self.atom.get_bonded_elements('N') - for n in nitrogens: - my_protonator.protonate_atom(n) - + for nitrogen in nitrogens: + PROTONATOR.protonate_atom(nitrogen) hydrogens = [] for nitrogen in nitrogens: hydrogens.extend(nitrogen.get_bonded_elements('H')) self.set_interaction_atoms(nitrogens+hydrogens, nitrogens) - return +class ROHGroup(Group): + """Alcohol group.""" -class ROH_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'ROH' -class SER_group(Group): + +class SERGroup(Group): + """Serine group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'SER' -class AMD_group(Group): + +class AMDGroup(Group): + """Amide group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'AMD' def setup_atoms(self): + """Setup group.""" # Identify the oxygen and nitrogen amide atoms the_oxygen = self.atom.get_bonded_elements('O') the_nitrogen = self.atom.get_bonded_elements('N') - # add protons to the nitrogen - my_protonator.protonate_atom(the_nitrogen[0]) + PROTONATOR.protonate_atom(the_nitrogen[0]) the_hydrogens = the_nitrogen[0].get_bonded_elements('H') - # set the center using the oxygen and nitrogen amide atoms self.set_center(the_oxygen+the_nitrogen) - - self.set_interaction_atoms(the_nitrogen+the_hydrogens,the_oxygen) - - return + self.set_interaction_atoms(the_nitrogen + the_hydrogens, the_oxygen) -class TRP_group(Group): +class TRPGroup(Group): + """Tryptophan group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'TRP' def setup_atoms(self): + """Set up atoms in group.""" # set the center at the position of the main atom self.set_center([self.atom]) - # find the hydrogen on the nitrogen atom - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogen = self.atom.get_bonded_elements('H') self.set_interaction_atoms(the_hydrogen+[self.atom], [self.atom]) - return -class Nterm_group(Group): +class NtermGroup(Group): + """N-terminus group.""" def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'N+' -class Cterm_group(Group): +class CtermGroup(Group): + """C-terminus group.""" def __init__(self, atom): - Group.__init__(self,atom) - self.type = 'COO' # this is to deal with the COO-C- parameter unification. + Group.__init__(self, atom) + # this is to deal with the COO-C- parameter unification. + self.type = 'COO' def setup_atoms(self): + """Set up atoms in group.""" # Identify the carbon and other oxygen carboxyl atoms the_carbons = self.atom.get_bonded_elements('C') if not the_carbons: self.set_center([self.atom]) - # FIXME perhaps it would be better to ignore this group completely + # TODO - perhaps it would be better to ignore this group completely # if the carbon is missing from this residue? else: the_other_oxygen = the_carbons[0].get_bonded_elements('O') the_other_oxygen.remove(self.atom) - # set the center and interaction atoms the_oxygens = [self.atom]+ the_other_oxygen self.set_center(the_oxygens) self.set_interaction_atoms(the_oxygens, the_oxygens) - return +class BBNGroup(Group): + """Backbone nitrogen group.""" - -class BBN_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'BBN' self.residue_type = 'BBN' - def setup_atoms(self): + """Set up atoms in group.""" # Identify the hydrogen - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogen = self.atom.get_bonded_elements('H') - # set the center using the nitrogen self.set_center([self.atom]) - self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) - return + self.set_interaction_atoms( + the_hydrogen+[self.atom], the_hydrogen+[self.atom]) -class BBC_group(Group): +class BBCGroup(Group): + """Backbone carbon group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'BBC' self.residue_type = 'BBC' def setup_atoms(self): + """Set up atoms in group.""" # Identify the oxygen the_oxygen = self.atom.get_bonded_elements('O') - # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_oxygen, the_oxygen) - return -class NAR_group(Group): +class NARGroup(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'NAR' self.residue_type = 'NAR' info('Found NAR group:', atom) - return - def setup_atoms(self): + """Set up atoms in group.""" # Identify the hydrogen - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogen = self.atom.get_bonded_elements('H') - # set the center using the nitrogen self.set_center([self.atom]) - self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) - return + self.set_interaction_atoms( + the_hydrogen+[self.atom], the_hydrogen+[self.atom]) +class NAMGroup(Group): + """Unknown group. + TODO - identify this group. + """ - -class NAM_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'NAM' self.residue_type = 'NAM' info('Found NAM group:', atom) - return - def setup_atoms(self): + """Set up atoms in this group.""" # Identify the hydrogen - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogen = self.atom.get_bonded_elements('H') - # set the center using the nitrogen self.set_center([self.atom]) - self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) - return + self.set_interaction_atoms( + the_hydrogen+[self.atom], the_hydrogen+[self.atom]) +class FGroup(Group): + """Fluoride group.""" -class F_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'F' self.residue_type = 'F' info('Found F group:', atom) - return -class Cl_group(Group): + +class ClGroup(Group): + """Chloride group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'Cl' self.residue_type = 'Cl' info('Found Cl group:', atom) - return -class OH_group(Group): + +class OHGroup(Group): + """Hydroxide group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'OH' self.residue_type = 'OH' info('Found OH group:', atom) - return - def setup_atoms(self): + """Set up atoms in this group.""" # Identify the hydrogen - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogen = self.atom.get_bonded_elements('H') - # set the center using the nitrogen self.set_center([self.atom]) - self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) - return + self.set_interaction_atoms( + the_hydrogen+[self.atom], the_hydrogen+[self.atom]) + + +class OPGroup(Group): + """Phosphate group.""" -class OP_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'OP' self.residue_type = 'OP' info('Found OP group:', atom) - return - def setup_atoms(self): + """Set up atoms in this group.""" # Identify the hydrogen - my_protonator.protonate_atom(self.atom) - #the_hydrogen = self.atom.get_bonded_elements('H') - + PROTONATOR.protonate_atom(self.atom) # set the center using the oxygen self.set_center([self.atom]) - #self.set_interaction_atoms(the_hydrogen+[self.atom], the_hydrogen+[self.atom]) self.set_interaction_atoms([self.atom], [self.atom]) - return -class O3_group(Group): +class O3Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'O3' self.residue_type = 'O3' info('Found O3 group:', atom) - return -class O2_group(Group): +class O2Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'O2' self.residue_type = 'O2' info('Found O2 group:', atom) - return -class SH_group(Group): + +class SHGroup(Group): + """Sulfhydryl group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'SH' self.residue_type = 'SH' info('Found SH group:', atom) - return -class CG_group(Group): - """Guadinium group""" +class CGGroup(Group): + """Guadinium group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'CG' self.residue_type = 'CG' info('Found CG group:', atom) - return def setup_atoms(self): + """Set up atoms in this group.""" # Identify the nitrogens the_nitrogens = self.atom.get_bonded_elements('N') - # set the center using the nitrogen self.set_center([self.atom]) - the_hydrogens = [] - for n in the_nitrogens: - my_protonator.protonate_atom(n) - the_hydrogens += n.get_bonded_elements('H') - self.set_interaction_atoms(the_hydrogens+the_nitrogens, the_nitrogens) + for nitrogen in the_nitrogens: + PROTONATOR.protonate_atom(nitrogen) + the_hydrogens += nitrogen.get_bonded_elements('H') + self.set_interaction_atoms( + the_hydrogens+the_nitrogens, the_nitrogens) - return -class C2N_group(Group): - """Amidinium group""" +class C2NGroup(Group): + """Amidinium group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'C2N' self.residue_type = 'C2N' info('Found C2N group:', atom) - return def setup_atoms(self): + """Set up atoms in this group.""" # Identify the nitrogens the_nitrogens = self.atom.get_bonded_elements('N') - the_nitrogens = [n for n in the_nitrogens if len(n.get_bonded_heavy_atoms())==1] - + the_nitrogens = [ + n for n in the_nitrogens if len(n.get_bonded_heavy_atoms()) == 1] # set the center using the nitrogen self.set_center([self.atom]) - the_hydrogens = [] - for n in the_nitrogens: - my_protonator.protonate_atom(n) - the_hydrogens += n.get_bonded_elements('H') - + for nitrogen in the_nitrogens: + PROTONATOR.protonate_atom(nitrogen) + the_hydrogens += nitrogen.get_bonded_elements('H') self.set_interaction_atoms(the_hydrogens+the_nitrogens, the_nitrogens) - return -class OCO_group(Group): + +class OCOGroup(Group): + """Carboxyl group.""" + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'OCO' self.residue_type = 'OCO' info('Found OCO group:', atom) - return def setup_atoms(self): - # Identify the two caroxyl oxygen atoms + """Set up atoms in group.""" + # Identify the two carboxyl oxygen atoms the_oxygens = self.atom.get_bonded_elements('O') - # set the center using the two oxygen carboxyl atoms self.set_center(the_oxygens) self.set_interaction_atoms(the_oxygens, the_oxygens) - return +class N30Group(Group): + """Unknown group. + + TODO - identify this group. + """ -class N30_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'N30' self.residue_type = 'N30' info('Found N30 group:', atom) - return def setup_atoms(self): + """Set up atoms in this group.""" # Identify the nitrogens - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogens = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) - return -class N31_group(Group): + +class N31Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'N31' self.residue_type = 'N31' info('Found N31 group:', atom) - return def setup_atoms(self): + """Set up atoms in this group.""" # Identify the nitrogens - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogens = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) - return -class N32_group(Group): + +class N32Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'N32' self.residue_type = 'N32' info('Found N32 group:', atom) - return def setup_atoms(self): + """Set up atoms in this group.""" # Identify the nitrogens - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogens = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) - return -class N33_group(Group): + +class N33Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'N33' self.residue_type = 'N33' info('Found N33 group:', atom) - return def setup_atoms(self): + """Set up atoms in this group.""" # Identify the nitrogens - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogens = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) - return -class NP1_group(Group): + +class NP1Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'NP1' self.residue_type = 'NP1' info('Found NP1 group:', atom) - return - def setup_atoms(self): + """Set up atoms in group.""" # Identify the nitrogens - my_protonator.protonate_atom(self.atom) + PROTONATOR.protonate_atom(self.atom) the_hydrogens = self.atom.get_bonded_elements('H') # set the center using the nitrogen self.set_center([self.atom]) self.set_interaction_atoms(the_hydrogens+[self.atom], [self.atom]) - return -class N1_group(Group): + +class N1Group(Group): + """Unknown group. + + TODO - identify this group. + """ + def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'N1' self.residue_type = 'N1' info('Found N1 group:', atom) - return +class IonGroup(Group): + """Ion group.""" -class Ion_group(Group): def __init__(self, atom): - Group.__init__(self,atom) + Group.__init__(self, atom) self.type = 'ION' self.residue_type = atom.res_name.strip() info('Found ion group:', atom) - return -class non_titratable_ligand_group(Group): +class NonTitratableLigandGroup(Group): + """Non-titratable ligand group.""" + def __init__(self, atom): Group.__init__(self, atom) self.type = 'LG' self.residue_type = 'LG' -# info('Non-titratable ligand group',atom) - return -class titratable_ligand_group(Group): + +class TitratableLigandGroup(Group): + """Titratable ligand group.""" + def __init__(self, atom): Group.__init__(self, atom) # set the charge and determine type (acid or base) self.charge = atom.charge - if self.charge <0: + if self.charge < 0: self.type = 'ALG' self.residue_type = 'ALG' elif self.charge > 0: self.type = 'BLG' self.residue_type = 'BLG' else: - raise Exception('Unable to determine type of ligand group - charge not set?') - - + raise Exception('Unable to determine type of ligand group - ' + 'charge not set?') # check if marvin model pka has been calculated # this is not true if we are reading an input file if atom.marvin_pka: self.model_pka = atom.marvin_pka - info('Titratable ligand group ', atom, self.model_pka, self.charge) + info('Titratable ligand group ', + atom, self.model_pka, self.charge) self.model_pka_set = True - return - def is_group(parameters, atom): - atom.groups_extracted = 1 + """Identify whether the atom belongs to a group. + Args: + parameters: parameters for check + atom: atom to check + Returns: + group for atom or None + """ + atom.groups_extracted = 1 # check if this atom belongs to a protein group protein_group = is_protein_group(parameters, atom) - if protein_group: return protein_group - + if protein_group: + return protein_group # check if this atom belongs to a ion group ion_group = is_ion_group(parameters, atom) - if ion_group: return ion_group - + if ion_group: + return ion_group # check if this atom belongs to a ligand group if parameters.ligand_typing == 'marvin': ligand_group = is_ligand_group_by_marvin_pkas(parameters, atom) elif parameters.ligand_typing == 'sybyl': - ligand_group = is_ligand_group_by_sybyl_types(parameters, atom) + ligand_group = None elif parameters.ligand_typing == 'groups': ligand_group = is_ligand_group_by_groups(parameters, atom) else: - raise Exception('Unknown ligand typing method \'%s\''%parameters.ligand_typing) - - if ligand_group: return ligand_group - - - + raise Exception( + 'Unknown ligand typing method \'{0.s}\''.format( + parameters.ligand_typing)) + if ligand_group: + return ligand_group return None -def is_protein_group(parameters,atom): +def is_protein_group(parameters, atom): + """Identify whether the atom belongs to a protein group. + + Args: + parameters: parameters for check + atom: atom to check + Returns: + group for atom or None + """ if atom.type != 'atom': return None - ### Check for termial groups if atom.terminal == 'N+': - return Nterm_group(atom) + return NtermGroup(atom) elif atom.terminal == 'C-': - return Cterm_group(atom) - + return CtermGroup(atom) ### Backbone if atom.type == 'atom' and atom.name == 'N': # ignore proline backbone nitrogens if atom.res_name != 'PRO': - return BBN_group(atom) + return BBNGroup(atom) if atom.type == 'atom' and atom.name == 'C': # ignore C- carboxyl if atom.count_bonded_elements('O') == 1: - return BBC_group(atom) - + return BBCGroup(atom) ### Filters for side chains based on PDB protein atom names - key = '%s-%s'%(atom.res_name, atom.name) - + key = '{0:s}-{1:s}'.format(atom.res_name, atom.name) if key in parameters.protein_group_mapping.keys(): - return eval('%s_group(atom)'%parameters.protein_group_mapping[key]) - - return None - -def is_ligand_group_by_sybyl_types(parameters, atom): - - + class_str = "{0:s}Group".format(parameters.protein_group_mapping[key]) + group_class = globals()[class_str] + return group_class(atom) return None -def is_ligand_group_by_groups(parameters, atom): +def is_ligand_group_by_groups(_, atom): + """Identify whether the atom belongs to a ligand group by checking groups. + + Args: + _: parameters for check + atom: atom to check + Returns: + group for atom or None + """ ### Ligand group filters if atom.type != 'hetatm': return None - - my_protonator.protonate_atom(atom) - + PROTONATOR.protonate_atom(atom) if atom.sybyl_type == 'N.ar': - if len(atom.get_bonded_heavy_atoms())==2: - return NAR_group(atom) - + if len(atom.get_bonded_heavy_atoms()) == 2: + return NARGroup(atom) if atom.sybyl_type == 'N.am': - return NAM_group(atom) - + return NAMGroup(atom) if atom.sybyl_type in ['N.3', 'N.4']: heavy_bonded = atom.get_bonded_heavy_atoms() if len(heavy_bonded) == 0: - return N30_group(atom) + return N30Group(atom) elif len(heavy_bonded) == 1: - return N31_group(atom) + return N31Group(atom) elif len(heavy_bonded) == 2: - return N32_group(atom) + return N32Group(atom) elif len(heavy_bonded) == 3: - return N33_group(atom) - + return N33Group(atom) if atom.sybyl_type == 'N.1': - return N1_group(atom) - + return N1Group(atom) if atom.sybyl_type == 'N.pl3': # make sure that this atom is not part of a guadinium or amidinium group bonded_carbons = atom.get_bonded_elements('C') if len(bonded_carbons) == 1: bonded_nitrogens = bonded_carbons[0].get_bonded_elements('N') if len(bonded_nitrogens) == 1: - return NP1_group(atom) - - + return NP1Group(atom) if atom.sybyl_type == 'C.2': # Guadinium and amidinium groups bonded_nitrogens = atom.get_bonded_elements('N') - npls = [n for n in bonded_nitrogens if (n.sybyl_type == 'N.pl3' and len(n.get_bonded_heavy_atoms())==1)] + npls = [ + n for n in bonded_nitrogens + if (n.sybyl_type == 'N.pl3' + and len(n.get_bonded_heavy_atoms()) == 1)] if len(npls) == 2: - n_with_max_two_heavy_atom_bonds = [n for n in bonded_nitrogens if len(n.get_bonded_heavy_atoms())<3] + n_with_max_two_heavy_atom_bonds = [ + n for n in bonded_nitrogens + if len(n.get_bonded_heavy_atoms()) < 3] if len(n_with_max_two_heavy_atom_bonds) == 2: - return C2N_group(atom) + return C2NGroup(atom) if len(bonded_nitrogens) == 3: - return CG_group(atom) + return CGGroup(atom) # carboxyl group bonded_oxygens = atom.get_bonded_elements('O') bonded_oxygens = [b for b in bonded_oxygens if 'O.co2' in b.sybyl_type] if len(bonded_oxygens) == 2: - return OCO_group(atom) - - + return OCOGroup(atom) if atom.sybyl_type == 'F': - return F_group(atom) - + return FGroup(atom) if atom.sybyl_type == 'Cl': - return Cl_group(atom) - + return ClGroup(atom) if atom.sybyl_type == 'O.3': if len(atom.get_bonded_heavy_atoms()) == 1: # phosphate group if atom.count_bonded_elements('P') == 1: - return OP_group(atom) + return OPGroup(atom) # hydroxyl group else: - return OH_group(atom) + return OHGroup(atom) # other SP3 oxygen else: - return O3_group(atom) - + return O3Group(atom) if atom.sybyl_type == 'O.2': - return O2_group(atom) - - + return O2Group(atom) if atom.sybyl_type == 'S.3': # sulphide group if len(atom.get_bonded_heavy_atoms()) == 1: - return SH_group(atom) - # other SP3 oxygen - #else: - # return S3_group(atom) - - + return SHGroup(atom) return None def is_ligand_group_by_marvin_pkas(parameters, atom): + """Identify whether the atom belongs to a ligand group by calculating + 'Marvin pKas'. + + Args: + parameters: parameters for check + atom: atom to check + Returns: + group for atom or None + """ if atom.type != 'hetatm': return None - # calculate Marvin ligand pkas for this conformation container # if not already done + # TODO - double-check testing coverage of these functions. if not atom.conformation_container.marvin_pkas_calculated: - lpka = propka.ligand_pka_values.ligand_pka_values(parameters) - lpka.get_marvin_pkas_for_molecular_container(atom.molecular_container, - min_pH=parameters.min_ligand_model_pka, - max_pH=parameters.max_ligand_model_pka) - - + lpka = LigandPkaValues(parameters) + lpka.get_marvin_pkas_for_molecular_container( + atom.molecular_container, min_ph=parameters.min_ligand_model_pka, + max_ph=parameters.max_ligand_model_pka) if atom.marvin_pka: - return titratable_ligand_group(atom) - - # Special case of oxygen in carboxyl group not assigned a pka value by marvin + return TitratableLigandGroup(atom) + # Special case of oxygen in carboxyl group not assigned pka value by marvin if atom.sybyl_type == 'O.co2': atom.charge = -1.0 - other_oxygen = [o for o in atom.get_bonded_elements('C')[0].get_bonded_elements('O') if not o==atom][0] + other_oxygen = [ + o for o + in atom.get_bonded_elements('C')[0].get_bonded_elements('O') + if not o == atom][0] atom.marvin_pka = other_oxygen.marvin_pka - return titratable_ligand_group(atom) - - + return TitratableLigandGroup(atom) if atom.element in parameters.hydrogen_bonds.elements: - return non_titratable_ligand_group(atom) - + return NonTitratableLigandGroup(atom) return None def is_ion_group(parameters, atom): + """Identify whether the atom belongs to an ion group. + Args: + parameters: parameters for check + atom: atom to check + Returns: + group for atom or None + """ if atom.res_name.strip() in parameters.ions.keys(): - return Ion_group(atom) - + return IonGroup(atom) return None diff --git a/propka/hybrid36.py b/propka/hybrid36.py index e0af2dc..b1e934a 100644 --- a/propka/hybrid36.py +++ b/propka/hybrid36.py @@ -1,21 +1,26 @@ -"""Provides an alternative PDB format that can transparently encode larger atom numbers. +"""Provides alternative PDB format that can encode larger atom numbers. http://cci.lbl.gov/hybrid_36/ """ import string -_hybrid36_upper_chars = set(string.ascii_uppercase) -_hybrid36_lower_chars = set(string.ascii_lowercase) -_hybrid36_digits = set(string.digits) -_hybrid36_upper_set = _hybrid36_upper_chars | _hybrid36_digits -_hybrid36_lower_set = _hybrid36_lower_chars | _hybrid36_digits + +_HYBRID36_UPPER_CHARS = set(string.ascii_uppercase) +_HYBRID36_LOWER_CHARS = set(string.ascii_lowercase) +_HYBRID36_DIGITS = set(string.digits) +_HYBRID36_UPPER_SET = _HYBRID36_UPPER_CHARS | _HYBRID36_DIGITS +_HYBRID36_LOWER_SET = _HYBRID36_LOWER_CHARS | _HYBRID36_DIGITS + def decode(input_string): - """ - Convert an input string of a number in hybrid-36 format to an integer. + """Convert an input string of a number in hybrid-36 format to an integer. + Args: + input_string: input string + Returns: + integer """ - value_error_message = "invalid literal for hybrid-36 conversion: '%s'" + value_error_message = "invalid literal for hybrid-36 conversion: '{0:s}'" original_input_string = input_string input_string = input_string.strip() @@ -27,30 +32,30 @@ def decode(input_string): else: sign = 1 - if not len(input_string): - raise ValueError(value_error_message % input_string) + if len(input_string) == 0: + raise ValueError(value_error_message.format(input_string)) # See http://cci.lbl.gov/hybrid_36/ for documentation on the format. num_chars = len(input_string) first_char = input_string[0] - if first_char in _hybrid36_digits: + if first_char in _HYBRID36_DIGITS: return sign * int(input_string) - elif first_char in _hybrid36_upper_chars: + elif first_char in _HYBRID36_UPPER_CHARS: reference = - (10 * 36 ** (num_chars - 1) - 10 ** num_chars) - _hybrid36_set = _hybrid36_upper_set - elif first_char in _hybrid36_lower_chars: + _hybrid36_set = _HYBRID36_UPPER_SET + elif first_char in _HYBRID36_LOWER_CHARS: reference = (16 * 36 ** (num_chars - 1) + 10 ** num_chars) - _hybrid36_set = _hybrid36_lower_set + _hybrid36_set = _HYBRID36_LOWER_SET else: - raise ValueError(value_error_message % original_input_string) + raise ValueError(value_error_message.format(original_input_string)) # Check the validity of the input string: ASCII characters should be # either all uppercase or all lowercase. - for c in input_string[1:]: - if c not in _hybrid36_set: - raise ValueError(value_error_message % original_input_string) + for char in input_string[1:]: + if char not in _hybrid36_set: + raise ValueError(value_error_message.format(original_input_string)) # Convert with the int function. return sign * (int(input_string, 36) + reference) diff --git a/propka/iterative.py b/propka/iterative.py index 3614d93..c3f999a 100644 --- a/propka/iterative.py +++ b/propka/iterative.py @@ -1,60 +1,68 @@ +"""Iterative functions for pKa calculations. -from __future__ import division -from __future__ import print_function - -import math, time - -import propka.lib as lib +These appear to mostly involve determinants. +""" from propka.determinant import Determinant -import propka.calculations -from propka.lib import info, warning, debug - -# Some library functions for the interative pKa determinants +from propka.lib import info, debug -def addtoDeterminantList(group1, group2, distance, iterative_interactions, version): +# TODO - these are undocumented constants +UNK_MIN_VALUE = 0.005 + + +def add_to_determinant_list(group1, group2, distance, iterative_interactions, + version): + """Add iterative determinantes to the list. + + [[R1, R2], [side-chain, coulomb], [A1, A2]], ... + + NOTE - sign is determined when the interaction is added to the iterative + object! + NOTE - distance < coulomb_cutoff here + + Args: + group1: first group in pair + group2: second group in pair + distance: distance between groups + iterative_interactions: interaction list to modify + version: version object """ - Adds 'iterative determinants' to list ..., [[R1, R2], [side-chain, coulomb], [A1, A2]], ... - Note, the sign is determined when the interaction is added to the iterative object! - Note, distance < coulomb_cutoff here - """ - - hbond_value = version.hydrogen_bond_interaction(group1, group2) + hbond_value = version.hydrogen_bond_interaction(group1, group2) coulomb_value = version.electrostatic_interaction(group1, group2, distance) - # adding the interaction to 'iterative_interactions' if hbond_value or coulomb_value: - pair = [group1, group2] - - values = [hbond_value, coulomb_value] + pair = [group1, group2] + values = [hbond_value, coulomb_value] while None in values: values[values.index(None)] = 0.0 - annihilation = [0., 0.] - interaction = [pair, values, annihilation] + interaction = [pair, values, annihilation] iterative_interactions.append(interaction) - return +def add_iterative_acid_pair(object1, object2, interaction): + """Add the Coulomb 'iterative' interaction (an acid pair). -def addIterativeAcidPair(object1, object2, interaction): + The higher pKa is raised with QQ+HB + The lower pKa is lowered with HB + + Args: + object1: first object in pair + object2: second object in pair + interaction: list with [values, annihilation] """ - Adding the Coulomb 'iterative' interaction (an acid pair): - the higher pKa is raised with QQ+HB - the lower pKa is lowered with HB - """ - values = interaction[1] + values = interaction[1] annihilation = interaction[2] - hbond_value = values[0] + hbond_value = values[0] coulomb_value = values[1] diff = coulomb_value + 2*hbond_value - comp1 = object1.pKa_old + annihilation[0] + diff - comp2 = object2.pKa_old + annihilation[1] + diff - annihilation[0] = 0. - annihilation[1] = 0. + comp1 = object1.pka_old + annihilation[0] + diff + comp2 = object2.pka_old + annihilation[1] + diff + annihilation[0] = 0.0 + annihilation[1] = 0.0 if comp1 > comp2: # side-chain - determinant = [object2, hbond_value] + determinant = [object2, hbond_value] object1.determinants['sidechain'].append(determinant) determinant = [object1, -hbond_value] object2.determinants['sidechain'].append(determinant) @@ -64,7 +72,7 @@ def addIterativeAcidPair(object1, object2, interaction): annihilation[0] = -diff else: # side-chain - determinant = [object1, hbond_value] + determinant = [object1, hbond_value] object2.determinants['sidechain'].append(determinant) determinant = [object2, -hbond_value] object1.determinants['sidechain'].append(determinant) @@ -74,26 +82,31 @@ def addIterativeAcidPair(object1, object2, interaction): annihilation[1] = -diff -def addIterativeBasePair(object1, object2, interaction): +def add_iterative_base_pair(object1, object2, interaction): + """Add the Coulomb 'iterative' interaction (a base pair). + + The lower pKa is lowered + + Args: + object1: first object in pair + object2: second object in pair + interaction: list with [values, annihilation] """ - Adding the Coulomb 'iterative' interaction (a base pair): - the lower pKa is lowered - """ - values = interaction[1] + values = interaction[1] annihilation = interaction[2] - hbond_value = values[0] + hbond_value = values[0] coulomb_value = values[1] diff = coulomb_value + 2*hbond_value diff = -diff - comp1 = object1.pKa_old + annihilation[0] + diff - comp2 = object2.pKa_old + annihilation[1] + diff - annihilation[0] = 0. - annihilation[1] = 0. + comp1 = object1.pka_old + annihilation[0] + diff + comp2 = object2.pka_old + annihilation[1] + diff + annihilation[0] = 0.0 + annihilation[1] = 0.0 if comp1 < comp2: # side-chain determinant = [object2, -hbond_value] object1.determinants['sidechain'].append(determinant) - determinant = [object1, hbond_value] + determinant = [object1, hbond_value] object2.determinants['sidechain'].append(determinant) # Coulomb determinant = [object2, -coulomb_value] @@ -103,7 +116,7 @@ def addIterativeBasePair(object1, object2, interaction): # side-chain determinant = [object1, -hbond_value] object2.determinants['sidechain'].append(determinant) - determinant = [object2, hbond_value] + determinant = [object2, hbond_value] object1.determinants['sidechain'].append(determinant) # Coulomb determinant = [object1, -coulomb_value] @@ -111,254 +124,249 @@ def addIterativeBasePair(object1, object2, interaction): annihilation[1] = -diff -def addIterativeIonPair(object1, object2, interaction, version): - """ - Adding the Coulomb 'iterative' interaction (an acid-base pair): +def add_iterative_ion_pair(object1, object2, interaction, version): + """Add the Coulomb 'iterative' interaction (an acid-base pair) + the pKa of the acid is lowered & the pKa of the base is raised + + Args: + object1: first object in pair + object2: second object in pair + interaction: list with [values, annihilation] + version: version object """ - values = interaction[1] + values = interaction[1] annihilation = interaction[2] - hbond_value = values[0] + hbond_value = values[0] coulomb_value = values[1] - Q1 = object1.Q - Q2 = object2.Q - comp1 = object1.pKa_old + annihilation[0] + Q1*coulomb_value - comp2 = object2.pKa_old + annihilation[1] + Q2*coulomb_value - if object1.res_name not in version.parameters.exclude_sidechain_interactions: - comp1 += Q1*hbond_value - if object2.res_name not in version.parameters.exclude_sidechain_interactions: - comp2 += Q2*hbond_value - - if Q1 == -1.0 and comp1 < comp2: - add_term = True # pKa(acid) < pKa(base) - elif Q1 == 1.0 and comp1 > comp2: - add_term = True # pKa(base) > pKa(acid) + q1 = object1.q + q2 = object2.q + comp1 = object1.pka_old + annihilation[0] + q1*coulomb_value + comp2 = object2.pka_old + annihilation[1] + q2*coulomb_value + if (object1.res_name + not in version.parameters.exclude_sidechain_interactions): + comp1 += q1*hbond_value + if (object2.res_name + not in version.parameters.exclude_sidechain_interactions): + comp2 += q2*hbond_value + if q1 == -1.0 and comp1 < comp2: + # pKa(acid) < pKa(base) + add_term = True + elif q1 == 1.0 and comp1 > comp2: + # pKa(base) > pKa(acid) + add_term = True else: - add_term = False - + add_term = False annihilation[0] = 0.00 annihilation[1] = 0.00 - - if add_term == True: - - # Coulomb - if coulomb_value > 0.005: - # residue1 - interaction = [object2, Q1*coulomb_value] - annihilation[0] += -Q1*coulomb_value - object1.determinants['coulomb'].append(interaction) - # residue2 - interaction = [object1, Q2*coulomb_value] - annihilation[1] += -Q2*coulomb_value - object2.determinants['coulomb'].append(interaction) - - # Side-chain - if hbond_value > 0.005: - # residue1 - if object1.res_name not in version.parameters.exclude_sidechain_interactions: - interaction = [object2, Q1*hbond_value] - annihilation[0] += -Q1*hbond_value - object1.determinants['sidechain'].append(interaction) - # residue2 - if object2.res_name not in version.parameters.exclude_sidechain_interactions: - interaction = [object1, Q2*hbond_value] - annihilation[1] += -Q2*hbond_value - object2.determinants['sidechain'].append(interaction) + if add_term: + # Coulomb + if coulomb_value > UNK_MIN_VALUE: + # residue1 + interaction = [object2, q1*coulomb_value] + annihilation[0] += -q1*coulomb_value + object1.determinants['coulomb'].append(interaction) + # residue2 + interaction = [object1, q2*coulomb_value] + annihilation[1] += -q2*coulomb_value + object2.determinants['coulomb'].append(interaction) + # Side-chain + if hbond_value > UNK_MIN_VALUE: + # residue1 + if (object1.res_name + not in version.parameters.exclude_sidechain_interactions): + interaction = [object2, q1*hbond_value] + annihilation[0] += -q1*hbond_value + object1.determinants['sidechain'].append(interaction) + # residue2 + if (object2.res_name + not in version.parameters.exclude_sidechain_interactions): + interaction = [object1, q2*hbond_value] + annihilation[1] += -q2*hbond_value + object2.determinants['sidechain'].append(interaction) -def addDeterminants(iterative_interactions, version, options=None): - """ +def add_determinants(iterative_interactions, version, _=None): + """Add determinants iteratively. + The iterative pKa scheme. Later it is all added in 'calculateTotalPKA' + + Args: + iterative_interactions: list of iterative interactions + version: version object + _: options object """ # --- setup --- iteratives = [] done_group = [] - - # creating iterative objects with references to their real group counterparts + # create iterative objects with references to their real group counterparts for interaction in iterative_interactions: pair = interaction[0] for group in pair: if group in done_group: - #print "done already" - """ do nothing - already have an iterative object for this group """ + # do nothing - already have an iterative object for this group + pass else: - newIterative = Iterative(group) - iteratives.append(newIterative) + new_iterative = Iterative(group) + iteratives.append(new_iterative) done_group.append(group) - # Initialize iterative scheme - debug("\n --- pKa iterations (%d groups, %d interactions) ---" % - (len(iteratives), len(iterative_interactions))) + debug( + "\n --- pKa iterations ({0:d} groups, {1:d} interactions) ---".format( + len(iteratives), len(iterative_interactions))) converged = False iteration = 0 # set non-iterative pka values as first step - for itres in iteratives: - itres.pKa_iter.append(itres.pKa_NonIterative) - - + for iter_ in iteratives: + iter_.pka_iter.append(iter_.pka_noniterative) # --- starting pKa iterations --- - while converged == False: + while not converged: + # initialize pka_new + iteration += 1 + for itres in iteratives: + itres.determinants = {'sidechain': [], 'backbone': [], + 'coulomb': []} + itres.pka_new = itres.pka_noniterative + # Adding interactions to temporary determinant container + for interaction in iterative_interactions: + pair = interaction[0] + object1, object2 = find_iterative(pair, iteratives) + q1 = object1.q + q2 = object2.q + if q1 < 0.0 and q2 < 0.0: + # both are acids + add_iterative_acid_pair(object1, object2, interaction) + elif q1 > 0.0 and q2 > 0.0: + # both are bases + add_iterative_base_pair(object1, object2, interaction) + else: + # one of each + add_iterative_ion_pair(object1, object2, interaction, version) + # Calculating pka_new values + for itres in iteratives: + for type_ in ['sidechain', 'backbone', 'coulomb']: + for determinant in itres.determinants[type_]: + itres.pka_new += determinant[1] - # initialize pKa_new - iteration += 1 - for itres in iteratives: - itres.determinants = {'sidechain':[],'backbone':[],'coulomb':[]} - itres.pKa_new = itres.pKa_NonIterative + # Check convergence + converged = True + for itres in iteratives: + if itres.pka_new == itres.pka_old: + itres.converged = True + else: + itres.converged = False + converged = False + # reset pka_old & storing pka_new in pka_iter + for itres in iteratives: + itres.pka_old = itres.pka_new + itres.pka_iter.append(itres.pka_new) - # Adding interactions to temporary determinant container - for interaction in iterative_interactions: - pair = interaction[0] - values = interaction[1] - annihilation = interaction[2] - #print "len(interaction) = %d" % (len(interaction)) - object1, object2 = findIterative(pair, iteratives) - Q1 = object1.Q - Q2 = object2.Q - if Q1 < 0.0 and Q2 < 0.0: - """ both are acids """ - addIterativeAcidPair(object1, object2, interaction) - elif Q1 > 0.0 and Q2 > 0.0: - """ both are bases """ - addIterativeBasePair(object1, object2, interaction) - else: - """ one of each """ - addIterativeIonPair(object1, object2, interaction, version) - - - # Calculating pKa_new values - for itres in iteratives: - for type in ['sidechain','backbone','coulomb']: - for determinant in itres.determinants[type]: - itres.pKa_new += determinant[1] - - # Check convergence - converged = True - for itres in iteratives: - if itres.pKa_new == itres.pKa_old: - itres.converged = True - else: - itres.converged = False - converged = False - - # reset pKa_old & storing pKa_new in pKa_iter - for itres in iteratives: - itres.pKa_old = itres.pKa_new - itres.pKa_iter.append(itres.pKa_new) - - if iteration == 10: - info("did not converge in %d iterations" % (iteration)) - break - - # --- Iterations finished --- - + if iteration == 10: + info("did not converge in {0:d} iterations".format(iteration)) + break # printing pKa iterations # formerly was conditioned on if options.verbosity >= 2 - now unnecessary - str = "%12s" % (" ") - for index in range(0, iteration+1 ): - str += "%8d" % (index) - debug(str) + str_ = ' ' + for index in range(iteration+1): + str_ += "{0:>8d}".format(index) + debug(str_) for itres in iteratives: - str = "%s " % (itres.label) - for pKa in itres.pKa_iter: - str += "%8.2lf" % (pKa) - if itres.converged == False: - str += " *" - debug(str) - + str_ = "{0:s} ".format(itres.label) + for pka in itres.pka_iter: + str_ += "{0:>8.2f}".format(pka) + if not itres.converged: + str_ += " *" + debug(str_) # creating real determinants and adding them to group object for itres in iteratives: - for type in ['sidechain','backbone','coulomb']: - for interaction in itres.determinants[type]: + for type_ in ['sidechain', 'backbone', 'coulomb']: + for interaction in itres.determinants[type_]: #info('done',itres.group.label,interaction[0],interaction[1]) value = interaction[1] - if value > 0.005 or value < -0.005: - g = interaction[0] - newDeterminant = Determinant(g, value) - itres.group.determinants[type].append(newDeterminant) + if value > UNK_MIN_VALUE or value < -UNK_MIN_VALUE: + group = interaction[0] + new_det = Determinant(group, value) + itres.group.determinants[type_].append(new_det) +def find_iterative(pair, iteratives): + """Find the 'iteratives' that correspond to the groups in 'pair'. -def findIterative(pair, iteratives): - """ - Function to find the two 'iteratives' that corresponds to the groups in 'pair' + Args: + pair: groups to match + iteratives: list of iteratives to search + Returns: + 1. first matched iterative + 2. second matched iterative """ for iterative in iteratives: if iterative.group == pair[0]: iterative0 = iterative elif iterative.group == pair[1]: iterative1 = iterative - return iterative0, iterative1 - class Iterative: - """ - Iterative class - pKa values and references of iterative groups - Note, this class has a fake determinant list, true determinants are - made after the iterations are finished. + """Iterative class - pKa values and references of iterative groups. + + NOTE - this class has a fake determinant list, true determinants are made + after the iterations are finished. """ def __init__(self, group): - """ - Contructer of the iterative object - """ + """Initialize object with group. - #print "creating 'iterative object' for %s" % (group.label) - - self.label = group.label - self.atom = group.atom - self.res_name = group.residue_type - self.Q = group.charge - self.pKa_old = None - self.pKa_new = None - self.pKa_iter = [] - self.pKa_NonIterative = 0.00 - self.determinants = {'sidechain':[],'backbone':[],'coulomb':[]} + Args: + group: group to use for initialization. + """ + self.label = group.label + self.atom = group.atom + self.res_name = group.residue_type + self.q = group.charge + self.pka_old = None + self.pka_new = None + self.pka_iter = [] + self.pka_noniterative = 0.00 + self.determinants = {'sidechain': [], 'backbone': [], 'coulomb': []} self.group = group self.converged = True - # Calculate the Non-Iterative part of pKa from the group object # Side chain side_chain = 0.00 for determinant in group.determinants['sidechain']: value = determinant.value side_chain += value - # Back bone - back_bone = 0.00 + back_bone = 0.00 for determinant in group.determinants['backbone']: value = determinant.value - back_bone += value - + back_bone += value # Coulomb - coulomb = 0.00 + coulomb = 0.00 for determinant in group.determinants['coulomb']: value = determinant.value - coulomb += value - - self.pKa_NonIterative = group.model_pka - self.pKa_NonIterative += group.Emass - self.pKa_NonIterative += group.Elocl - self.pKa_NonIterative += side_chain - self.pKa_NonIterative += back_bone - self.pKa_NonIterative += coulomb - - self.pKa_old = self.pKa_NonIterative - + coulomb += value + self.pka_noniterative = group.model_pka + self.pka_noniterative += group.energy_volume + self.pka_noniterative += group.energy_local + self.pka_noniterative += side_chain + self.pka_noniterative += back_bone + self.pka_noniterative += coulomb + self.pka_old = self.pka_noniterative def __eq__(self, other): - """ - Check if two groups should be considered identical - """ + """Needed to use objects in sets.""" if self.atom.type == 'atom': # In case of protein atoms we trust the labels - return self.label==other.label + return self.label == other.label else: # For heterogene atoms we also need to check the residue number - return self.label==other.label and self.atom.res_num == other.atom.res_num + return ( + self.label == other.label + and self.atom.res_num == other.atom.res_num) def __hash__(self): - """ Needed together with __eq__ - otherwise we can't make sets of groups """ + """Needed to use objects in sets.""" return id(self) diff --git a/propka/lib.py b/propka/lib.py index 1988fd8..48eab4e 100644 --- a/propka/lib.py +++ b/propka/lib.py @@ -1,112 +1,142 @@ -from __future__ import division -from __future__ import print_function - +"""Implements many of the main functions used to call PROPKA.""" import sys -import pkg_resources import logging import argparse +import pkg_resources -logger = logging.getLogger("propka") -stdout_handler = logging.StreamHandler(sys.stdout) -stdout_handler.setFormatter(logging.Formatter("%(message)s")) -logger.addHandler(stdout_handler) +_LOGGER = logging.getLogger("propka") +_STDOUT_HANDLER = logging.StreamHandler(sys.stdout) +_STDOUT_HANDLER.setFormatter(logging.Formatter("%(message)s")) +_LOGGER.addHandler(_STDOUT_HANDLER) -# -# file I/O -# -def open_file_for_reading(filename): - """Open file or file-like stream *filename* for reading. - *filename* may be a string and then it is opened but if it is a - file-like object (such as an open :class:`file` or - :class:`StringIO.StringIO` --- really anything with ``next()``, - ``read()``, ``readlines()``, ``readline``, ``close`` methods) then - the object is just passed through (the stream is attempted to be - reset to the beginning with ``fseek(0)``). +def open_file_for_reading(input_file): + """Open file or file-like stream for reading. + + TODO - convert this to a context manager + + Args: + input_file: path to file or file-like object. If file-like object, + then will attempt fseek(0). """ - if (hasattr(filename, 'next') or hasattr(filename, '__next__')) \ - and hasattr(filename, 'read') \ - and hasattr(filename, 'readline') and hasattr(filename, 'readlines') \ - and hasattr(filename, 'close'): - # already a stream - try: - filename.fseek(0) - except AttributeError: - pass - return filename + try: + input_file.fseek(0) + return input_file + except AttributeError: + pass try: - f = open(filename,'r') + file_ = open(input_file, 'rt') except: - raise IOError('Cannot find file %s' %filename) - return f + raise IOError('Cannot find file {0:s}'.format(input_file)) + return file_ -def open_file_for_writing(filename): - """Open file or file-like stream for writing""" - if hasattr(filename, 'write') and hasattr(filename, 'writeline') and hasattr(filename, 'writelines') \ - and hasattr(filename, 'close'): - # already a stream - try: - mode = filename.mode - except AttributeError: - mode = "w" + +def open_file_for_writing(input_file): + """Open file or file-like stream for writing. + + TODO - convert this to a context manager. + + Args: + input_file: path to file or file-like object. If file-like object, + then will attempt to get file mode. + """ + try: + mode = input_file.mode if not ("w" in mode or "a" in mode or "+" in mode): raise IOError("File/stream not open for writing") - return filename - + return input_file + except AttributeError: + pass try: - f = open(filename,'w') - except: - raise Exception('Could not open %s'%filename) - return f + file_ = open(input_file, 'wt') + except FileNotFoundError: + raise Exception('Could not open {0:s}'.format(input_file)) + return file_ + -# -# bookkeeping etc. -# def conformation_sorter(conf): + """TODO - figure out what this function does.""" model = int(conf[:-1]) altloc = conf[-1:] return model*100+ord(altloc) + def split_atoms_into_molecules(atoms): + """Maps atoms into molecules. + + Args: + atoms: list of atoms + Returns: + list of molecules + """ molecules = [] - - while len(atoms)>0: + while len(atoms) > 0: initial_atom = atoms.pop() - molecules.append( make_molecule(initial_atom,atoms)) - + molecules.append(make_molecule(initial_atom, atoms)) return molecules + def make_molecule(atom, atoms): + """Make a molecule from atoms. + + Args: + atom: one of the atoms + atoms: a list of the remaining atoms + Return: + list of atoms + """ bonded_atoms = [a for a in atoms if atom in a.bonded_atoms] res_atoms = [atom,] - - for ba in bonded_atoms: - if ba in atoms: - atoms.remove(ba) - res_atoms.extend(make_molecule(ba, atoms)) - + for bond_atom in bonded_atoms: + if bond_atom in atoms: + atoms.remove(bond_atom) + res_atoms.extend(make_molecule(bond_atom, atoms)) return res_atoms -def make_grid(min,max,step): - x = min - while x <= max: +def make_grid(min_, max_, step): + """Make a grid across the specified tange. + + TODO - figure out if this duplicates existing generators like `range` or + numpy function. + + Args: + min_: minimum value of grid + max_: maximum value of grid + step: grid step size + """ + x = min_ + while x <= max_: yield x x += step - return + def generate_combinations(interactions): + """Generate combinations of interactions. + + Args: + interactions: list of interactions + Returns: + list of combinations + """ res = [[]] for interaction in interactions: res = make_combination(res, interaction) res.remove([]) - return res def make_combination(combis, interaction): + """Make a specific set of combinations. + + Args: + combis: list of combinations + interaction: interaction to add to combinations + Returns: + list of combinations + """ res = [] for combi in combis: res.append(combi+[interaction]) @@ -115,15 +145,20 @@ def make_combination(combis, interaction): def parse_res_string(res_str): - """ - Parse the residue string, in format "chain:resnum[inscode]", and return - a tuple of (chain, resnum, inscode). Raises ValueError if the input - string is invalid. + """Parse a residue string. + + Args: + res_string: residue string in format "chain:resnum[inscode]" + Returns: + a tuple of (chain, resnum, inscode). + Raises: + ValueError if the input string is invalid. """ try: chain, resnum_str = res_str.split(":") except ValueError: - raise ValueError("Invalid residue string (must contain 2 colon-separated values)") + raise ValueError("Invalid residue string (must contain 2 " + "colon-separated values)") try: resnum = int(resnum_str) except ValueError: @@ -142,8 +177,8 @@ def build_parser(parser=None): """Build an argument parser for PROPKA. Args: - parser_: existing parser. If this is not None, then the PROPKA parser will - be created as a subparser to this existing parser. Otherwise, a + parser: existing parser. If this is not None, then the PROPKA parser will + be created as a subparser to this existing parser. Otherwise, a new parser will be created. Returns: ArgumentParser object. @@ -151,74 +186,96 @@ def build_parser(parser=None): if parser is not None: group = parser.add_argument_group(title="PROPKA invoation options") else: - parser = argparse.ArgumentParser(description=("PROPKA predicts the pKa values of ionizable " - "groups in proteins and protein-ligand " - "complexes based in the 3D structure"), - formatter_class=argparse.ArgumentDefaultsHelpFormatter) + parser = argparse.ArgumentParser( + description=("PROPKA predicts the pKa values of ionizable " + "groups in proteins and protein-ligand " + "complexes based in the 3D structure"), + formatter_class=argparse.ArgumentDefaultsHelpFormatter) # This is duck-typing at its finest group = parser group.add_argument("input_pdb", help="read data from ") - - group.add_argument("-f", "--file", action="append", dest="filenames", default=[], - help="read data from , i.e. is added to arguments") - group.add_argument("-r", "--reference", dest="reference", default="neutral", - help=("setting which reference to use for stability calculations " - "[neutral/low-pH]")) - group.add_argument("-c", "--chain", action="append", dest="chains", - help=('creating the protein with only a specified chain. Specify ' - '" " for chains without ID [all]')) - group.add_argument("-i", "--titrate_only", dest="titrate_only", - help=('Treat only the specified residues as titratable. Value should ' - 'be a comma-separated list of "chain:resnum" values; for example: ' - '-i "A:10,A:11"')) - group.add_argument("-t", "--thermophile", action="append", dest="thermophiles", - help=("defining a thermophile filename; usually used in " - "'alignment-mutations'")) - group.add_argument("-a", "--alignment", action="append", dest="alignment", - help=("alignment file connecting and " - "[.pir]")) - group.add_argument("-m", "--mutation", action="append", dest="mutations", - help=("specifying mutation labels which is used to modify " - " according to, e.g. N25R/N181D")) - group.add_argument("-v", "--version", dest="version_label", default="Jan15", - help="specifying the sub-version of propka [Jan15/Dec19]") - group.add_argument("-p", "--parameters", dest="parameters", - default=pkg_resources.resource_filename(__name__, "propka.cfg"), - help="set the parameter file [%(default)s]") + group.add_argument( + "-f", "--file", action="append", dest="filenames", default=[], + help="read data from , i.e. is added to arguments") + group.add_argument( + "-r", "--reference", dest="reference", default="neutral", + help=("setting which reference to use for stability calculations " + "[neutral/low-pH]")) + group.add_argument( + "-c", "--chain", action="append", dest="chains", + help=('creating the protein with only a specified chain. Specify ' + '" " for chains without ID [all]')) + group.add_argument( + "-i", "--titrate_only", dest="titrate_only", + help=('Treat only the specified residues as titratable. Value should ' + 'be a comma-separated list of "chain:resnum" values; for example: ' + '-i "A:10,A:11"')) + group.add_argument( + "-t", "--thermophile", action="append", dest="thermophiles", + help=("defining a thermophile filename; usually used in " + "'alignment-mutations'")) + group.add_argument( + "-a", "--alignment", action="append", dest="alignment", + help=("alignment file connecting and " + "[.pir]")) + group.add_argument( + "-m", "--mutation", action="append", dest="mutations", + help=("specifying mutation labels which is used to modify " + " according to, e.g. N25R/N181D")) + group.add_argument( + "-v", "--version", dest="version_label", default="Jan15", + help="specifying the sub-version of propka [Jan15/Dec19]") + group.add_argument( + "-p", "--parameters", dest="parameters", + default=pkg_resources.resource_filename(__name__, "propka.cfg"), + help="set the parameter file [{default:s}]") try: - group.add_argument("--log-level", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], - help="logging level verbosity", default="INFO") + group.add_argument( + "--log-level", + choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], + help="logging level verbosity", default="INFO") except argparse.ArgumentError: # It is possible that --log-level has already been set by APBS pass - group.add_argument("-o", "--pH", dest="pH", type=float, default=7.0, - help="setting pH-value used in e.g. stability calculations [7.0]") - group.add_argument("-w", "--window", dest="window", nargs=3, type=float, - default=(0.0, 14.0, 1.0), - help=("setting the pH-window to show e.g. stability profiles " - "[0.0, 14.0, 1.0]")) - group.add_argument("-g", "--grid", dest="grid", nargs=3, type=float, - default=(0.0, 14.0, 0.1), - help=("setting the pH-grid to calculate e.g. stability " - "related properties [0.0, 14.0, 0.1]")) - group.add_argument("--mutator", dest="mutator", - help="setting approach for mutating [alignment/scwrl/jackal]") - group.add_argument("--mutator-option", dest="mutator_options", action="append", - help="setting property for mutator [e.g. type=\"side-chain\"]") - group.add_argument("-d", "--display-coupled-residues", dest="display_coupled_residues", - action="store_true", help=("Displays alternative pKa values due " - "to coupling of titratable groups")) - group.add_argument("-l", "--reuse-ligand-mol2-files", dest="reuse_ligand_mol2_file", - action="store_true", default=False, - help=("Reuses the ligand mol2 files allowing the user to alter " - "ligand bond orders")) - group.add_argument("-k", "--keep-protons", dest="keep_protons", action="store_true", - help="Keep protons in input file", default=False) - group.add_argument("-q", "--quiet", action="store_const", const="WARNING", - dest="log_level", help="suppress non-warning messages") - group.add_argument("--protonate-all", dest="protonate_all", action="store_true", - help="Protonate all atoms (will not influence pKa calculation)", - default=False) + group.add_argument( + "-o", "--pH", dest="pH", type=float, default=7.0, + help="setting pH-value used in e.g. stability calculations [7.0]") + group.add_argument( + "-w", "--window", dest="window", nargs=3, type=float, + default=(0.0, 14.0, 1.0), + help=("setting the pH-window to show e.g. stability profiles " + "[0.0, 14.0, 1.0]")) + group.add_argument( + "-g", "--grid", dest="grid", nargs=3, type=float, + default=(0.0, 14.0, 0.1), + help=("setting the pH-grid to calculate e.g. stability " + "related properties [0.0, 14.0, 0.1]")) + group.add_argument( + "--mutator", dest="mutator", + help="setting approach for mutating [alignment/scwrl/jackal]") + group.add_argument( + "--mutator-option", dest="mutator_options", action="append", + help="setting property for mutator [e.g. type=\"side-chain\"]") + group.add_argument( + "-d", "--display-coupled-residues", dest="display_coupled_residues", + action="store_true", + help=("Displays alternative pKa values due " + "to coupling of titratable groups")) + group.add_argument( + "-l", "--reuse-ligand-mol2-files", dest="reuse_ligand_mol2_file", + action="store_true", default=False, + help=("Reuses the ligand mol2 files allowing the user to alter " + "ligand bond orders")) + group.add_argument( + "-k", "--keep-protons", dest="keep_protons", action="store_true", + help="Keep protons in input file", default=False) + group.add_argument( + "-q", "--quiet", action="store_const", const="WARNING", + dest="log_level", help="suppress non-warning messages") + group.add_argument( + "--protonate-all", dest="protonate_all", action="store_true", + help="Protonate all atoms (will not influence pKa calculation)", + default=False) return parser @@ -234,13 +291,11 @@ def loadOptions(args=None): """ # loading the parser parser = build_parser() - # parsing and returning options and arguments options = parser.parse_args(args) # adding specified filenames to arguments options.filenames.append(options.input_pdb) - # Convert titrate_only string to a list of (chain, resnum) items: if options.titrate_only is not None: res_list = [] @@ -248,76 +303,109 @@ def loadOptions(args=None): try: chain, resnum, inscode = parse_res_string(res_str) except ValueError: - logger.critical('Invalid residue string: "%s"' % res_str) + _LOGGER.critical( + 'Invalid residue string: "{0:s}"'.format(res_str)) sys.exit(1) res_list.append((chain, resnum, inscode)) options.titrate_only = res_list - - # Set the no-print variable level = getattr(logging, options.log_level) - logger.setLevel(level) - + _LOGGER.setLevel(level) # done! return options -def makeTidyAtomLabel(name,element): - """ - Returns a 'tidier' atom label for printing the new pdbfile - """ +def make_tidy_atom_label(name, element): + """Returns a 'tidier' atom label for printing to the new PDB file. - if len(name)>4:# if longer than 4, just truncate the name - label=name[0:4] - elif len(name)==4:# if lenght is 4, otherwise use the name as it is + Args: + name: atom name + element: atom element + Returns: + string + """ + if len(name) > 4: # if longer than 4, just truncate the name + label = name[0:4] + elif len(name) == 4: # if length is 4, otherwise use the name as it is label = name else: # if less than 4 characters long, insert white space as needed - if len(element)==1: - label = ' %-3s'%name - else: # The element shoul occupy the two first chars - label = '%-4s'%name - + if len(element) == 1: + label = ' {0:<3s}'.format(name) + else: # The element should occupy the two first chars + label = '{0:<4s}'.format(name) return label - def get_sorted_configurations(configuration_keys): - """ - extract and sort configurations + """Extract and sort configurations. + + Args: + configuration_keys: list of configuration keys + Returns: + list of configurations """ configurations = list(configuration_keys) configurations.sort(key=configuration_compare) return configurations + def configuration_compare(conf): + """TODO - figure out what this function does.""" return 100*int(conf[1:-2]) + ord(conf[-1]) +def write_file(filename, lines): + """Writes a new file. - -def writeFile(filename, lines): + Args: + filename: name of file + lines: lines to write to file """ - Writes a new file - """ - f = open_file_for_writing(filename) - + file_ = open_file_for_writing(filename) for line in lines: - f.write( "%s\n" % (line) ) - f.close() - + file_.write("{0:s}\n".format(line)) + file_.close() def _args_to_str(arg_list): + """Summarize list of arguments in string. + + Args: + arg_list: list of arguments + Returns: + string + """ return " ".join(map(str, arg_list)) + def info(*args): - """Log a message. Level defaults to INFO unless overridden.""" - logger.info(_args_to_str(args)) + """Log a message to info. + + Level defaults to INFO unless overridden. + + Args: + args: argument list + """ + _LOGGER.info(_args_to_str(args)) + def debug(*args): - """Log a message on the DEBUG level.""" - logger.debug(_args_to_str(args)) + """Log a message to debug. + + Level defaults to DEBUG unless overridden. + + Args: + args: argument list + """ + _LOGGER.debug(_args_to_str(args)) + def warning(*args): - """Log a WARN message""" - logger.warning(_args_to_str(args)) + """Log a message to warning. + + Level defaults to WARNING unless overridden. + + Args: + args: argument list + """ + _LOGGER.warning(_args_to_str(args)) diff --git a/propka/ligand.py b/propka/ligand.py index ec571f7..3386793 100644 --- a/propka/ligand.py +++ b/propka/ligand.py @@ -1,16 +1,9 @@ -#!/usr/bin/python - -from __future__ import division -from __future__ import print_function - -import sys - -import propka.calculations -from propka.vector_algebra import * -from propka.lib import info, warning +"""Ligand classes and functions.""" +from propka.calculations import squared_distance +from propka.vector_algebra import Vector -all_sybyl_types = [ +ALL_SYBYL_TYPES = [ 'C.3', # carbon sp3 'H', # hydrogen 'C.2', # carbon sp2 @@ -66,344 +59,277 @@ all_sybyl_types = [ 'Sn'] # tin -#propka_input_types = ['1P','1N','2P','2N'] -#for type in all_sybyl_types: -# temp = type.replace('.','') -# if len(temp)>3: -# temp = temp[0:3] -# propka_input_types.append(temp) -# -#for t in propka_input_types: -# print (t) - - -propka_input_types = [ - '1P', - '1N', - '2P', - '2N', - 'C3', - 'H', - 'C2', - 'Hsp', - 'C1', - 'Ht3', - 'Car', - 'LP', - 'Cca', - 'Du', - 'N3', - 'DuC', - 'N2', - 'Any', - 'N1', - 'Hal', - 'Nar', - 'Het', - 'Nam', - 'Hev', - 'Npl', - 'Li', - 'N4', - 'Na', - 'O3', - 'Mg', - 'O2', - 'Al', - 'Oco', - 'Si', - 'Osp', - 'K', - 'Ot3', - 'Ca', - 'S3', - 'Crt', - 'S2', - 'Cro', - 'SO', - 'Mn', - 'SO2', - 'Fe', - 'P3', - 'Coo', - 'F', - 'Cu', - 'Cl', - 'Zn', - 'Br', - 'Se', - 'I', - 'Mo', - 'Sn'] - - -max_C_double_bond = 1.3 -max_C_triple_bond = 1.2 - -max_C_double_bond_squared = max_C_double_bond*max_C_double_bond -max_C_triple_bond_squared = max_C_triple_bond*max_C_triple_bond - - +PROPKA_INPUT_TYPES = ['1P', '1N', '2P', '2N', 'C3', 'H', 'C2', 'Hsp', 'C1', + 'Ht3', 'Car', 'LP', 'Cca', 'Du', 'N3', 'DuC', 'N2', + 'Any', 'N1', 'Hal', 'Nar', 'Het', 'Nam', 'Hev', 'Npl', + 'Li', 'N4', 'Na', 'O3', 'Mg', 'O2', 'Al', 'Oco', 'Si', + 'Osp', 'K', 'Ot3', 'Ca', 'S3', 'Crt', 'S2', 'Cro', 'SO', + 'Mn', 'SO2', 'Fe', 'P3', 'Coo', 'F', 'Cu', 'Cl', 'Zn', + 'Br', 'Se', 'I', 'Mo', 'Sn'] +MAX_C_DOUBLE_BOND = 1.3 +MAX_C_TRIPLE_BOND = 1.2 +MAX_C_DOUBLE_BOND_SQUARED = MAX_C_DOUBLE_BOND*MAX_C_DOUBLE_BOND +MAX_C_TRIPLE_BOND_SQUARED = MAX_C_TRIPLE_BOND*MAX_C_TRIPLE_BOND +PLANARITY_MARGIN = 0.20 def assign_sybyl_type(atom): + """Assign Sybyl type to atom. + + Args: + atom: atom to assign + """ # check if we already have assigned a name to this atom if atom.sybyl_assigned: - #info(atom.name,'already assigned') return - # find some properties of the atom ring_atoms = is_ring_member(atom) - aromatic = is_aromatic_ring(ring_atoms) - planar = is_planar(atom) + aromatic = is_aromatic_ring(ring_atoms) + planar = is_planar(atom) bonded_elements = {} - for i in range(len(atom.bonded_atoms)): - bonded_elements[i]=atom.bonded_atoms[i].element - - - + for i, bonded_atom in enumerate(atom.bonded_atoms): + bonded_elements[i] = bonded_atom.element # Aromatic carbon/nitrogen if aromatic: - for ra in ring_atoms: - if ra.element in ['C','N']: - set_type(ra, ra.element+'.ar') + for ring_atom in ring_atoms: + if ring_atom.element in ['C', 'N']: + set_type(ring_atom, ring_atom.element+'.ar') return - # check for amide - if atom.element in ['O','N','C']: - O=None - N=None - C=None + if atom.element in ['O', 'N', 'C']: + o_atom = None + n_atom = None + c_atom = None # oxygen, nitrogen - if atom.element in ['O','N']: - for b in atom.get_bonded_elements('C'): - for bb in b.bonded_atoms: - if (bb.element =='N' and atom.element == 'O'): - O=atom - C=b - N=bb - elif (bb.element =='O' and atom.element == 'N'): - N=atom - C=b - O=bb + if atom.element in ['O', 'N']: + for bonded_elem in atom.get_bonded_elements('C'): + for bonded_atom in bonded_elem.bonded_atoms: + if (bonded_atom.element == 'N' and atom.element == 'O'): + o_atom = atom + c_atom = bonded_elem + n_atom = bonded_atom + elif (bonded_atom.element == 'O' and atom.element == 'N'): + n_atom = atom + c_atom = bonded_elem + o_atom = bonded_atom # carbon if atom.element == 'C': nitrogens = atom.get_bonded_elements('N') oxygens = atom.get_bonded_elements('O') - if len(nitrogens)==1 and len(oxygens)==1: - C = atom - N = nitrogens[0] - O = oxygens[0] - - - if C and N and O: - # make sure that the Nitrogen is not aromatic and that it has two heavy atom bonds - if not is_aromatic_ring(is_ring_member(N)) and len(N.get_bonded_heavy_atoms())==2: - set_type(N,'N.am') - set_type(C,'C.2') - set_type(O,'O.2') + if len(nitrogens) == 1 and len(oxygens) == 1: + c_atom = atom + n_atom = nitrogens[0] + o_atom = oxygens[0] + if c_atom and n_atom and o_atom: + # make sure that the Nitrogen is not aromatic and that it has two + # heavy atom bonds + if (not is_aromatic_ring(is_ring_member(n_atom)) + and len(n_atom.get_bonded_heavy_atoms()) == 2): + set_type(n_atom, 'N.am') + set_type(c_atom, 'C.2') + set_type(o_atom, 'O.2') return - - - if atom.element=='C': + if atom.element == 'C': # check for carboxyl - if len(atom.bonded_atoms)==3 and list(bonded_elements.values()).count('O')==2: - i1 = list(bonded_elements.values()).index('O') - i2 = list(bonded_elements.values()).index('O',i1+1) - if len(atom.bonded_atoms[i1].bonded_atoms)==1 and len(atom.bonded_atoms[i2].bonded_atoms)==1: - set_type(atom.bonded_atoms[i1],'O.co2-') - set_type(atom.bonded_atoms[i2],'O.co2') - set_type(atom,'C.2') + if (len(atom.bonded_atoms) == 3 + and list(bonded_elements.values()).count('O') == 2): + index1 = list(bonded_elements.values()).index('O') + index2 = list(bonded_elements.values()).index('O', index1+1) + if (len(atom.bonded_atoms[index1].bonded_atoms) == 1 + and len(atom.bonded_atoms[index2].bonded_atoms) == 1): + set_type(atom.bonded_atoms[index1], 'O.co2-') + set_type(atom.bonded_atoms[index2], 'O.co2') + set_type(atom, 'C.2') return - - - # sp carbon - if len(atom.bonded_atoms)<=2: - for b in atom.bonded_atoms: - if propka.calculations.squared_distance(atom, b) < max_C_triple_bond_squared: - set_type(atom,'C.1') - set_type(b,b.element+'.1') + if len(atom.bonded_atoms) <= 2: + for bonded_atom in atom.bonded_atoms: + if (squared_distance(atom, bonded_atom) + < MAX_C_TRIPLE_BOND_SQUARED): + set_type(atom, 'C.1') + set_type(bonded_atom, bonded_atom.element + '.1') if atom.sybyl_assigned: return - # sp2 carbon if planar: - set_type(atom,'C.2') + set_type(atom, 'C.2') # check for N.pl3 - for b in atom.bonded_atoms: - if b.element=='N': - if len(b.bonded_atoms)<3 or is_planar(b): - set_type(b,'N.pl3') + for bonded_atom in atom.bonded_atoms: + if bonded_atom.element == 'N': + if (len(bonded_atom.bonded_atoms) < 3 + or is_planar(bonded_atom)): + set_type(bonded_atom, 'N.pl3') return - # sp3 carbon set_type(atom, 'C.3') return - # Nitrogen if atom.element == 'N': # check for planar N - if len(atom.bonded_atoms)==1: + if len(atom.bonded_atoms) == 1: if is_planar(atom.bonded_atoms[0]): - set_type(atom,'N.pl3') + set_type(atom, 'N.pl3') return - if planar: - set_type(atom,'N.pl3') + set_type(atom, 'N.pl3') return - - set_type(atom,'N.3') + set_type(atom, 'N.3') return - # Oxygen if atom.element == 'O': - set_type(atom,'O.3') - + set_type(atom, 'O.3') if len(atom.bonded_atoms) == 1: # check for carboxyl if atom.bonded_atoms[0].element == 'C': the_carbon = atom.bonded_atoms[0] - if len(the_carbon.bonded_atoms)==3 and the_carbon.count_bonded_elements('O')==2: - [O1,O2] = the_carbon.get_bonded_elements('O') - - if len(O1.bonded_atoms)==1 and len(O2.bonded_atoms)==1: - set_type(O1,'O.co2-') - set_type(O2,'O.co2') - set_type(the_carbon,'C.2') + if (len(the_carbon.bonded_atoms) == 3 + and the_carbon.count_bonded_elements('O') == 2): + [oxy1, oxy2] = the_carbon.get_bonded_elements('O') + if (len(oxy1.bonded_atoms) == 1 + and len(oxy2.bonded_atoms) == 1): + set_type(oxy1, 'O.co2-') + set_type(oxy2, 'O.co2') + set_type(the_carbon, 'C.2') return - # check for X=O - if propka.calculations.squared_distance(atom, atom.bonded_atoms[0]) < max_C_double_bond_squared: - set_type(atom,'O.2') - if atom.bonded_atoms[0].element=='C': - set_type(atom.bonded_atoms[0],'C.2') + if (squared_distance(atom, atom.bonded_atoms[0]) + < MAX_C_DOUBLE_BOND_SQUARED): + set_type(atom, 'O.2') + if atom.bonded_atoms[0].element == 'C': + set_type(atom.bonded_atoms[0], 'C.2') return - - # Sulphur if atom.element == 'S': # check for SO2 - if list(bonded_elements.values()).count('O')==2: - i1 = list(bonded_elements.values()).index('O') - i2 = list(bonded_elements.values()).index('O',i1+1) - set_type(atom.bonded_atoms[i1],'O.2') - set_type(atom.bonded_atoms[i2],'O.2') - set_type(atom,'S.o2') + if list(bonded_elements.values()).count('O') == 2: + index1 = list(bonded_elements.values()).index('O') + index2 = list(bonded_elements.values()).index('O', index1+1) + set_type(atom.bonded_atoms[index1], 'O.2') + set_type(atom.bonded_atoms[index2], 'O.2') + set_type(atom, 'S.o2') return - # check for SO4 - if list(bonded_elements.values()).count('O')==4: - no_O2 = 0 + if list(bonded_elements.values()).count('O') == 4: + no_o2 = 0 for i in range(len(atom.bonded_atoms)): - if len(atom.bonded_atoms[i].bonded_atoms)==1 and no_O2<2: - set_type(atom.bonded_atoms[i],'O.2') - no_O2+=1 + if len(atom.bonded_atoms[i].bonded_atoms) == 1 and no_o2 < 2: + set_type(atom.bonded_atoms[i], 'O.2') + no_o2 += 1 else: - set_type(atom.bonded_atoms[i],'O.3') - - set_type(atom,'S.3') - - + set_type(atom.bonded_atoms[i], 'O.3') + set_type(atom, 'S.3') return - - # Phosphorus if atom.element == 'P': - set_type(atom,'P.3') - + set_type(atom, 'P.3') # check for phosphate group bonded_oxygens = atom.get_bonded_elements('O') - for o in bonded_oxygens: set_type(o,'O.3') -# if len(bonded_oxygens)>=3: -# # find oxygens only bonded to current phosphorus -# bonded_oxygens_1 = [o for o in bonded_oxygens if len(o.get_bonded_heavy_atoms())==1] -# # find the closest oxygen ... -# closest_oxygen = min(bonded_oxygens_1, -# key= lambda o:propka.calculations.squared_distance(atom,o)) -# # ... and set it to O.2 -# set_type(closest_oxygen,'O.2') - + for o_atom in bonded_oxygens: + set_type(o_atom, 'O.3') return - - - element = atom.element.capitalize() - set_type(atom,element) - # info('Using element as type for %s'%atom.element) - - return + set_type(atom, element) def is_ring_member(atom): - return identify_ring(atom,atom,0,[]) + """Determine if atom is a member of a ring. + + Args: + atom: atom to test + Returns: + list of atoms + """ + return identify_ring(atom, atom, 0, []) + def identify_ring(this_atom, original_atom, number, past_atoms): - number+=1 - past_atoms=past_atoms+[this_atom] + """Identify the atoms in a ring + + Args: + this_atom: atom to test + original_atom: some other atom + number: number of atoms + past_atoms: atoms that have already been found + Returns: + list of atoms + """ + number += 1 + past_atoms = past_atoms + [this_atom] return_atoms = [] if number > 10: return return_atoms - for atom in this_atom.get_bonded_heavy_atoms(): - if atom == original_atom and number>2: + if atom == original_atom and number > 2: return past_atoms - if atom not in past_atoms: - these_return_atoms = identify_ring(atom, original_atom, number, past_atoms) + these_return_atoms = identify_ring(atom, original_atom, number, + past_atoms) if len(these_return_atoms) > 0: - if len(return_atoms)>len(these_return_atoms) or len(return_atoms)==0: + if (len(return_atoms) > len(these_return_atoms) + or len(return_atoms) == 0): return_atoms = these_return_atoms - return return_atoms +def set_type(atom, type_): + """Set atom type.. - -def set_type(atom,type): - #info(atom, '->',type) - atom.sybyl_type = type - atom.sybyl_assigned=True - return - - + Args: + atom: atom to set + type_: type value to set + """ + atom.sybyl_type = type_ + atom.sybyl_assigned = True def is_planar(atom): - """ Finds out if atom forms a plane together with its bonded atoms""" - atoms = [atom]+atom.bonded_atoms + """Finds out if atom forms a plane together with its bonded atoms. + + Args: + atom: atom to test + Returns: + Boolean + """ + atoms = [atom] + atom.bonded_atoms return are_atoms_planar(atoms) + def are_atoms_planar(atoms): - if len(atoms)==0: - return False - if len(atoms)<4: - return False - v1 = vector(atom1=atoms[0], atom2=atoms[1]) - v2 = vector(atom1=atoms[0], atom2=atoms[2]) - n = (v1**v2).rescale(1.0) + """Test whether a group of atoms are planar. - margin = 0.20 - for b in atoms[3:]: - v = vector(atom1=atoms[0], atom2=b).rescale(1.0) - #info(atoms[0],abs(v*n) ) - if abs(v*n)>margin: + Args: + atoms: list of atoms + Returns: + Boolean + """ + if len(atoms) == 0: + return False + if len(atoms) < 4: + return False + vec1 = Vector(atom1=atoms[0], atom2=atoms[1]) + vec2 = Vector(atom1=atoms[0], atom2=atoms[2]) + norm = (vec1**vec2).rescale(1.0) + margin = PLANARITY_MARGIN + for atom in atoms[3:]: + vec = Vector(atom1=atoms[0], atom2=atom).rescale(1.0) + if abs(vec*norm) > margin: return False - return True -def is_aromatic_ring(atoms): - if len(atoms)<5: - return False +def is_aromatic_ring(atoms): + """Determine whether group of atoms form aromatic ring. + + Args: + atoms: list of atoms to test + Returns: + Boolean + """ + if len(atoms) < 5: + return False for i in range(len(atoms)): if not are_atoms_planar(atoms[i:]+atoms[:i]): return False - return True - - - - diff --git a/propka/ligand_pka_values.py b/propka/ligand_pka_values.py index 512b7cd..d7a5423 100644 --- a/propka/ligand_pka_values.py +++ b/propka/ligand_pka_values.py @@ -1,15 +1,25 @@ -#!/usr/bin/env python - -from __future__ import division -from __future__ import print_function +"""Ligand pKa values""" +import os +import subprocess +import sys +import propka.molecular_container +import propka.calculations +import propka.parameters +import propka.pdb +import propka.lib from propka.lib import info, warning -import propka.molecular_container, propka.calculations, propka.calculations, propka.parameters, propka.pdb, propka.lib, os, subprocess, sys -class ligand_pka_values: +class LigandPkaValues: + """Ligand pKa value class.""" + def __init__(self, parameters): - self.parameters = parameters + """Initialize object with parameters. + Args: + parameters: parameters + """ + self.parameters = parameters # attempt to find Marvin executables in the path self.molconvert = self.find_in_path('molconvert') self.cxcalc = self.find_in_path('cxcalc') @@ -17,106 +27,173 @@ class ligand_pka_values: info(self.cxcalc) info(self.molconvert) - return + @staticmethod + def find_in_path(program): + """Find a program in the system path. - - def find_in_path(self, program): + Args: + program: program to find + Returns: + location of program + """ path = os.environ.get('PATH').split(os.pathsep) - - l = [i for i in filter(lambda loc: os.access(loc, os.F_OK), - map(lambda dir: os.path.join(dir, program),path))] - - if len(l) == 0: - info('Error: Could not find %s. Please make sure that it is found in the path.' % program) + locs = [ + i for i in filter(lambda loc: os.access(loc, os.F_OK), + map(lambda dir: os.path.join(dir, program), + path))] + if len(locs) == 0: + str_ = "'Error: Could not find {0:s}.".format(program) + str_ += ' Please make sure that it is found in the path.' + info(str_) sys.exit(-1) + return locs[0] - return l[0] + def get_marvin_pkas_for_pdb_file(self, pdbfile, num_pkas=10, min_ph=-10, + max_ph=20): + """Use Marvin executables to get pKas for a PDB file. - def get_marvin_pkas_for_pdb_file(self, file, no_pkas=10, min_pH =-10, max_pH=20): - molecule = propka.molecular_container.Molecular_container(file) - self.get_marvin_pkas_for_molecular_container(molecule, no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) - return + Args: + pdbfile: PDB file + num_pkas: number of pKas to get + min_ph: minimum pH value + max_ph: maximum pH value + """ + molecule = propka.molecular_container.Molecular_container(pdbfile) + self.get_marvin_pkas_for_molecular_container( + molecule, num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph) - def get_marvin_pkas_for_molecular_container(self, molecule, no_pkas=10, min_pH =-10, max_pH=20): + def get_marvin_pkas_for_molecular_container(self, molecule, num_pkas=10, + min_ph=-10, max_ph=20): + """Use Marvin executables to calculate pKas for a molecular container. + + Args: + molecule: molecular container + num_pkas: number of pKas to calculate + min_ph: minimum pH value + max_ph: maximum pH value + """ for name in molecule.conformation_names: - filename = '%s_%s'%(molecule.name,name) - self.get_marvin_pkas_for_conformation_container(molecule.conformations[name], name=filename, reuse=molecule.options.reuse_ligand_mol2_file, - no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) + filename = '{0:s}_{1:s}'.format(molecule.name, name) + self.get_marvin_pkas_for_conformation_container( + molecule.conformations[name], name=filename, + reuse=molecule.options.reuse_ligand_mol2_file, + num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph) - return + def get_marvin_pkas_for_conformation_container(self, conformation, + name='temp', reuse=False, + num_pkas=10, min_ph=-10, + max_ph=20): + """Use Marvin executables to calculate pKas for a conformation container. - def get_marvin_pkas_for_conformation_container(self, conformation, name = 'temp', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): + Args: + conformation: conformation container + name: filename + reuse: flag to reuse the structure files + num_pkas: number of pKas to calculate + min_ph: minimum pH value + max_ph: maximum pH value + """ conformation.marvin_pkas_calculated = True - self.get_marvin_pkas_for_atoms(conformation.get_heavy_ligand_atoms(), name=name, reuse=reuse, - no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) + self.get_marvin_pkas_for_atoms( + conformation.get_heavy_ligand_atoms(), name=name, reuse=reuse, + num_pkas=num_pkas, min_ph=min_ph, max_ph=max_ph) - return + def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False, + num_pkas=10, min_ph=-10, max_ph=20): + """Use Marvin executables to calculate pKas for a list of atoms. - def get_marvin_pkas_for_atoms(self, atoms, name='temp', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): + Args: + atoms: list of atoms + name: filename + reuse: flag to reuse the structure files + num_pkas: number of pKas to calculate + min_ph: minimum pH value + max_ph: maximum pH value + """ # do one molecule at the time so we don't confuse marvin molecules = propka.lib.split_atoms_into_molecules(atoms) - for i in range(len(molecules)): - filename = '%s_%d.mol2'%(name, i+1) - self.get_marvin_pkas_for_molecule(molecules[i], filename=filename, reuse=reuse, no_pkas=no_pkas, min_pH =min_pH, max_pH=max_pH) + for i, molecule in enumerate(molecules): + filename = '{0:s}_{1:d}.mol2'.format(name, i+1) + self.get_marvin_pkas_for_molecule( + molecule, filename=filename, reuse=reuse, num_pkas=num_pkas, + min_ph=min_ph, max_ph=max_ph) - return + def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2', + reuse=False, num_pkas=10, min_ph=-10, + max_ph=20): + """Use Marvin executables to calculate pKas for a molecule. - - def get_marvin_pkas_for_molecule(self, atoms, filename='__tmp_ligand.mol2', reuse=False, no_pkas=10, min_pH =-10, max_pH=20): + Args: + molecule: the molecule + name: filename + reuse: flag to reuse the structure files + num_pkas: number of pKas to calculate + min_ph: minimum pH value + max_ph: maximum pH value + """ # print out structure unless we are using user-modified structure if not reuse: propka.pdb.write_mol2_for_atoms(atoms, filename) # check that we actually have a file to work with if not os.path.isfile(filename): - warning('Didn\'t find a user-modified file \'%s\' - generating one' % filename) + errstr = ( + "Didn't find a user-modified file '{0:s}' " + "- generating one".format( + filename)) + warning(errstr) propka.pdb.write_mol2_for_atoms(atoms, filename) - - - - # Marvin - # calculate pKa values - options = 'pka -a %d -b %d --min %f --max %f -d large'%(no_pkas, no_pkas, min_pH, max_pH) - (output,errors) = subprocess.Popen([self.cxcalc, filename]+options.split(), - stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate() - - if len(errors)>0: - info('********************************************************************************************************') - info('* Warning: Marvin execution failed: *') - info('* %-100s *' % errors) - info('* *') - info('* Please edit the ligand mol2 file and re-run PropKa with the -l option: %29s *' % filename) - info('********************************************************************************************************') + # Marvin calculate pKa values + fmt = ( + 'pka -a {num1} -b {num2} --min {min_ph} ' + '--max {max_ph} -d large') + options = ( + fmt.format( + num1=num_pkas, num2=num_pkas, min_ph=min_ph, max_ph=max_ph)) + (output, errors) = subprocess.Popen( + [self.cxcalc, filename]+options.split(), stdout=subprocess.PIPE, + stderr=subprocess.PIPE).communicate() + if len(errors) > 0: + info('***********************************************************' + '*********************************************') + info('* Warning: Marvin execution failed: ' + ' *') + info('* {0:<100s} *'.format(errors)) + info('* ' + ' *') + info('* Please edit the ligand mol2 file and re-run PropKa with ' + 'the -l option: {0:>29s} *'.format(filename)) + info('***********************************************************' + '*********************************************') sys.exit(-1) - # extract calculated pkas - indices,pkas,types = self.extract_pkas(output) - + indices, pkas, types = self.extract_pkas(output) # store calculated pka values - for i in range(len(indices)): - atoms[indices[i]].marvin_pka = pkas[i] - atoms[indices[i]].charge = {'a':-1,'b':+1}[types[i]] - info('%s model pKa: %.2f' % (atoms[indices[i]], pkas[i])) + for i, index in enumerate(indices): + atoms[index].marvin_pka = pkas[i] + atoms[index].charge = {'a': -1, 'b': 1}[types[i]] + info('{0:s} model pKa: {1:<.2f}'.format(atoms[index], pkas[i])) - return + @staticmethod + def extract_pkas(output): + """Extract pKa value from output. - def extract_pkas(self, output): + Args: + output: output string to parse + Returns: + 1. Indices + 2. Values + 3. Types + """ # split output - [tags, values,empty_line] = output.decode().split('\n') - #info(tags) - #info(values) + [tags, values, _] = output.decode().split('\n') tags = tags.split('\t') values = values.split('\t') - # format values - types = [tags[i][0] for i in range(1,len(tags)-1) if len(values)>i and values[i] != ''] - indices = [int(a)-1 for a in values[-1].split(',') if a !=''] - values = [float(v.replace(',','.')) for v in values[1:-1] if v != ''] - + types = [ + tags[i][0] for i in range(1, len(tags)-1) + if len(values) > i and values[i] != ''] + indices = [int(a)-1 for a in values[-1].split(',') if a != ''] + values = [float(v.replace(',', '.')) for v in values[1:-1] if v != ''] if len(indices) != len(values) != len(types): raise Exception('Lengths of atoms and pka values mismatch') - return indices, values, types - - - - diff --git a/propka/molecular_container.py b/propka/molecular_container.py index d2d59cb..3d2409f 100644 --- a/propka/molecular_container.py +++ b/propka/molecular_container.py @@ -1,145 +1,148 @@ -#!/usr/bin/python -# -# Molecular container for storing all contents of pdb files -# -# -from __future__ import division -from __future__ import print_function - -import os, sys - -import propka.pdb, propka.version, propka.output, propka.conformation_container, propka.group, propka.lib +"""Molecular container for storing all contents of PDB files.""" +import os +import sys +import propka.pdb +import propka.version +import propka.output +import propka.group +import propka.lib +from propka.conformation_container import ConformationContainer from propka.lib import info, warning -class Molecular_container: - def __init__(self, input_file, options=None): - # printing out header before parsing input - propka.output.printHeader() +# TODO - these are constants whose origins are a little murky +UNK_PI_CUTOFF = 0.01 +# Maximum number of iterations for finding PI +MAX_ITERATION = 4 + + +class Molecular_container: + """Container for storing molecular contents of PDB files. + + TODO - this class name does not conform to PEP8 but has external use. + We should deprecate and change eventually. + """ + + def __init__(self, input_file, options=None): + """Initialize molecular container. + + Args: + input_file: molecular input file + options: options object + """ + # printing out header before parsing input + propka.output.print_header() # set up some values self.options = options self.input_file = input_file + # TODO - replace this indelicate os.path code with pathlib self.dir = os.path.split(input_file)[0] self.file = os.path.split(input_file)[1] self.name = self.file[0:self.file.rfind('.')] input_file_extension = input_file[input_file.rfind('.'):] - # set the version if options: parameters = propka.parameters.Parameters(self.options.parameters) else: parameters = propka.parameters.Parameters('propka.cfg') try: - exec('self.version = propka.version.%s(parameters)'%parameters.version) - except: - raise Exception('Error: Version %s does not exist'%parameters.version) - + version_class = getattr(propka.version, parameters.version) + self.version = version_class(parameters) + except AttributeError as err: + print(err) + errstr = 'Error: Version {0:s} does not exist'.format( + parameters.version) + raise Exception(errstr) # read the input file if input_file_extension[0:4] == '.pdb': - # input is a pdb file - # read in atoms and top up containers to make sure that all atoms are present in all conformations - [self.conformations, self.conformation_names] = propka.pdb.read_pdb(input_file, self.version.parameters,self) - if len(self.conformations)==0: - info('Error: The pdb file does not seems to contain any molecular conformations') + # input is a pdb file. read in atoms and top up containers to make + # sure that all atoms are present in all conformations + [self.conformations, self.conformation_names] = ( + propka.pdb.read_pdb(input_file, self.version.parameters, self)) + if len(self.conformations) == 0: + info('Error: The pdb file does not seems to contain any ' + 'molecular conformations') sys.exit(-1) - self.top_up_conformations() - # make a structure precheck - propka.pdb.protein_precheck(self.conformations, self.conformation_names) - + propka.pdb.protein_precheck(self.conformations, + self.conformation_names) # set up atom bonding and protonation self.version.setup_bonding_and_protonation(self) - # Extract groups self.extract_groups() - # sort atoms for name in self.conformation_names: self.conformations[name].sort_atoms() - # find coupled groups self.find_covalently_coupled_groups() - # write out the input file - filename = self.file.replace(input_file_extension,'.propka_input') + filename = self.file.replace(input_file_extension, '.propka_input') propka.pdb.write_input(self, filename) - - elif input_file_extension == '.propka_input': #input is a propka_input file - [self.conformations, self.conformation_names] = propka.pdb.read_input(input_file, self.version.parameters, self) - - # Extract groups - this merely sets up the groups found in the input file + [self.conformations, self.conformation_names] = ( + propka.pdb.read_input(input_file, self.version.parameters, + self)) + # Extract groups - this merely sets up the groups found in the + # input file self.extract_groups() - # do some additional set up self.additional_setup_when_reading_input_file() - else: - info('Unrecognized input file:%s' % input_file) + info('Unrecognized input file:{0:s}'.format(input_file)) sys.exit(-1) - - return def top_up_conformations(self): - """ Makes sure that all atoms are present in all conformations """ + """Makes sure that all atoms are present in all conformations.""" for name in self.conformation_names: - if name!='1A' and (len(self.conformations[name]) < len(self.conformations['1A'])): + if (name != '1A' and (len(self.conformations[name]) + < len(self.conformations['1A']))): self.conformations[name].top_up(self.conformations['1A']) - return - def find_covalently_coupled_groups(self): + """Find covalently coupled groups.""" info('-' * 103) for name in self.conformation_names: self.conformations[name].find_covalently_coupled_groups() - return - def find_non_covalently_coupled_groups(self): + """Find non-covalently coupled groups.""" info('-' * 103) + verbose = self.options.display_coupled_residues for name in self.conformation_names: - self.conformations[name].find_non_covalently_coupled_groups(verbose=self.options.display_coupled_residues) - - return + self.conformations[name].find_non_covalently_coupled_groups( + verbose=verbose) def extract_groups(self): - """ Identify the groups needed for pKa calculation """ + """Identify the groups needed for pKa calculation.""" for name in self.conformation_names: self.conformations[name].extract_groups() - return - def additional_setup_when_reading_input_file(self): + """Additional setup.""" for name in self.conformation_names: self.conformations[name].additional_setup_when_reading_input_file() - return - - def calculate_pka(self): + """Calculate pKa values.""" # calculate for each conformation for name in self.conformation_names: - self.conformations[name].calculate_pka(self.version, self.options) - + self.conformations[name].calculate_pka( + self.version, self.options) # find non-covalently coupled groups self.find_non_covalently_coupled_groups() - # find the average of the conformations self.average_of_conformations() - # print out the conformation-average results - propka.output.printResult(self, 'AVR', self.version.parameters) - - return + propka.output.print_result(self, 'AVR', self.version.parameters) def average_of_conformations(self): + """Generate an average of conformations.""" + parameters = self.conformations[self.conformation_names[0]].parameters # make a new configuration to hold the average values - avr_conformation = propka.conformation_container.Conformation_container(name='average', - parameters=self.conformations[self.conformation_names[0]].parameters, - molecular_container=self) - + avr_conformation = ConformationContainer( + name='average', parameters=parameters, molecular_container=self) container = self.conformations[self.conformation_names[0]] for group in container.get_groups_for_calculations(): # new group to hold average values @@ -150,104 +153,135 @@ class Molecular_container: if group_to_add: avr_group += group_to_add else: - warning('Group %s could not be found in conformation %s.' % (group.atom.residue_label, name)) + str_ = ( + 'Group {0:s} could not be found in ' + 'conformation {0:s}.'.format( + group.atom.residue_label, name)) + warning(str_) # ... and store the average value avr_group = avr_group / len(self.conformation_names) avr_conformation.groups.append(avr_group) - # store information on coupling in the average container - if len(list(filter(lambda c: c.non_covalently_coupled_groups, self.conformations.values()))): + if len(list(filter(lambda c: c.non_covalently_coupled_groups, + self.conformations.values()))): avr_conformation.non_covalently_coupled_groups = True - # store chain info - avr_conformation.chains = self.conformations[self.conformation_names[0]].chains - + avr_conformation.chains = self.conformations[ + self.conformation_names[0]].chains self.conformations['AVR'] = avr_conformation - return - def write_pka(self, filename=None, reference="neutral", direction="folding", options=None): - #for name in self.conformation_names: - # propka.output.writePKA(self, self.version.parameters, filename='%s_3.1_%s.pka'%(self.name, name), - # conformation=name,reference=reference, - # direction=direction, options=options) + def write_pka(self, filename=None, reference="neutral", + direction="folding", options=None): + """Write pKa information to a file. + Args: + filename: file to write to + reference: reference state + direction: folding vs. unfolding + options: options object + """ # write out the average conformation - filename=os.path.join('%s.pka'%(self.name)) - - # if the display_coupled_residues option is true, - # write the results out to an alternative pka file + filename = os.path.join('{0:s}.pka'.format(self.name)) + # if the display_coupled_residues option is true, write the results out + # to an alternative pka file if self.options.display_coupled_residues: - filename=os.path.join('%s_alt_state.pka'%(self.name)) + filename = os.path.join('{0:s}_alt_state.pka'.format(self.name)) + if (hasattr(self.version.parameters, 'output_file_tag') + and len(self.version.parameters.output_file_tag) > 0): + filename = os.path.join( + '{0:s}_{1:s}.pka'.format( + self.name, self.version.parameters.output_file_tag)) + propka.output.write_pka( + self, self.version.parameters, filename=filename, + conformation='AVR', reference=reference) - if hasattr(self.version.parameters, 'output_file_tag') and len(self.version.parameters.output_file_tag)>0: - filename=os.path.join('%s_%s.pka'%(self.name,self.version.parameters.output_file_tag)) + def get_folding_profile(self, conformation='AVR', reference="neutral", + grid=[0., 14., 0.1]): + """Get a folding profile. - propka.output.writePKA(self, self.version.parameters, filename=filename, - conformation='AVR',reference=reference, - direction=direction, options=options) - - return - - def getFoldingProfile(self, conformation='AVR',reference="neutral", direction="folding", grid=[0., 14., 0.1], options=None): + Args: + conformation: conformation to select + reference: reference state + direction: folding direction (folding) + grid: the grid of pH values [min, max, step_size] + options: options object + Returns: + TODO - figure out what these are + 1. profile + 2. opt + 3. range_80pct + 4. stability_range + """ # calculate stability profile profile = [] for ph in propka.lib.make_grid(*grid): - ddg = self.conformations[conformation].calculate_folding_energy( pH=ph, reference=reference) - #info(ph,ddg) + conf = self.conformations[conformation] + ddg = conf.calculate_folding_energy(ph=ph, reference=reference) profile.append([ph, ddg]) - # find optimum - opt =[None, 1e6] + opt = [None, 1e6] for point in profile: - opt = min(opt, point, key=lambda v:v[1]) - + opt = min(opt, point, key=lambda v: v[1]) # find values within 80 % of optimum range_80pct = [None, None] - values_within_80pct = [p[0] for p in profile if p[1]< 0.8*opt[1]] - if len(values_within_80pct)>0: + values_within_80pct = [p[0] for p in profile if p[1] < 0.8*opt[1]] + if len(values_within_80pct) > 0: range_80pct = [min(values_within_80pct), max(values_within_80pct)] - # find stability range stability_range = [None, None] - stable_values = [p[0] for p in profile if p[1]< 0.0] - - if len(stable_values)>0: + stable_values = [p[0] for p in profile if p[1] < 0.0] + if len(stable_values) > 0: stability_range = [min(stable_values), max(stable_values)] - return profile, opt, range_80pct, stability_range + def get_charge_profile(self, conformation='AVR', grid=[0., 14., .1]): + """Get charge profile for conformation as function of pH. - def getChargeProfile(self, conformation='AVR', grid=[0., 14., .1]): + Args: + conformation: conformation to test + grid: grid of pH values [min, max, step] + Returns: + list of charge state values + """ charge_profile = [] for ph in propka.lib.make_grid(*grid): - q_unfolded, q_folded = self.conformations[conformation].calculate_charge(self.version.parameters, pH=ph) + conf = self.conformations[conformation] + q_unfolded, q_folded = conf.calculate_charge( + self.version.parameters, ph=ph) charge_profile.append([ph, q_unfolded, q_folded]) - return charge_profile - def getPI(self, conformation='AVR', grid=[0., 14., 1], iteration=0): - #info('staring',grid, iteration) - # search - charge_profile = self.getChargeProfile(conformation=conformation, grid=grid) - pi = [] - pi_folded = pi_unfolded = [None, 1e6,1e6] - for point in charge_profile: - pi_folded = min(pi_folded, point, key=lambda v:abs(v[2])) - pi_unfolded = min(pi_unfolded, point, key=lambda v:abs(v[1])) + def get_pi(self, conformation='AVR', grid=[0., 14., 1], iteration=0): + """Get the isoelectric points for folded and unfolded states. - # If results are not good enough, do it again with a higher sampling resolution - pi_folded_value = pi_folded[0] + Args: + conformation: conformation to test + grid: grid of pH values [min, max, step] + iteration: iteration number of process + Returns: + 1. Folded state PI + 2. Unfolded state PI + """ + charge_profile = self.get_charge_profile( + conformation=conformation, grid=grid) + pi_folded = pi_unfolded = [None, 1e6, 1e6] + for point in charge_profile: + pi_folded = min(pi_folded, point, key=lambda v: abs(v[2])) + pi_unfolded = min(pi_unfolded, point, key=lambda v: abs(v[1])) + # If results are not good enough, do it again with a higher sampling + # resolution + pi_folded_value = pi_folded[0] pi_unfolded_value = pi_unfolded[0] step = grid[2] - if (pi_folded[2] > 0.01 or pi_unfolded[1] > 0.01) and iteration<4: - pi_folded_value, x = self.getPI(conformation=conformation, grid=[pi_folded[0]-step, pi_folded[0]+step, step/10.0], iteration=iteration+1) - x, pi_unfolded_value = self.getPI(conformation=conformation, grid=[pi_unfolded[0]-step, pi_unfolded[0]+step, step/10.0], iteration=iteration+1) - - + # TODO - need to warn if maximum number of iterations is exceeded + if ((pi_folded[2] > UNK_PI_CUTOFF + or pi_unfolded[1] > UNK_PI_CUTOFF) and iteration < MAX_ITERATION): + pi_folded_value, _ = self.get_pi( + conformation=conformation, + grid=[pi_folded[0]-step, pi_folded[0]+step, step/10.0], + iteration=iteration+1) + _, pi_unfolded_value = self.get_pi( + conformation=conformation, + grid=[pi_unfolded[0]-step, pi_unfolded[0]+step, step/10.0], + iteration=iteration+1) return pi_folded_value, pi_unfolded_value - - - -if __name__ == '__main__': - input_file = sys.argv[1] - mc = Molecular_container(input_file) diff --git a/propka/output.py b/propka/output.py index 531af02..61f001b 100644 --- a/propka/output.py +++ b/propka/output.py @@ -1,397 +1,463 @@ - -from __future__ import division -from __future__ import print_function - -import sys - -import propka.lib -from propka.lib import info, warning +"""Output routines.""" +from datetime import date +from propka.lib import info -def printHeader(): +def print_header(): + """Print header section of output.""" + str_ = "{0:s}\n".format(get_propka_header()) + str_ += "{0:s}\n".format(get_references_header()) + str_ += "{0:s}\n".format(get_warning_header()) + info(str_) + + +def write_pdb(protein, pdbfile=None, filename=None, include_hydrogens=False, + _=None): + """Write a residue to the new PDB file. + + Args: + protein: protein object + pdbfile: PDB file + filename: file to write to + include_hydrogens: Boolean indicating whether to include hydrogens + options: options object """ - prints the header section - """ - str = "%s\n" % ( getPropkaHeader() ) - str += "%s\n" % ( getReferencesHeader() ) - str += "%s\n" % ( getWarningHeader() ) - - info(str) - - -def writePDB(protein, file=None, filename=None, include_hydrogens=False, options=None): - """ - Write the residue to the new pdbfile - """ - - if file == None: - # opening file if not given - if filename == None: - filename = "%s.pdb" % (protein.name) - file = open(filename, 'w') - info("writing pdbfile %s" % (filename)) - close_file = True + if pdbfile is None: + # opening file if not given + if filename is None: + filename = "{0:s}.pdb".format(protein.name) + # TODO - this would be better as a context manager + pdbfile = open(filename, 'w') + info("writing pdbfile {0:s}".format(filename)) + close_file = True else: - # don't close the file, it was opened in a different place - close_file = False - + # don't close the file, it was opened in a different place + close_file = False numb = 0 for chain in protein.chains: - for residue in chain.residues: - if residue.res_name not in ["N+ ", "C- "]: - for atom in residue.atoms: - if include_hydrogens == False and atom.name[0] == "H": - """ don't print """ - else: - numb += 1 - line = atom.make_pdb_line2(numb=numb) - line += "\n" - file.write(line) - - if close_file == True: - file.close() + for residue in chain.residues: + if residue.res_name not in ["N+ ", "C- "]: + for atom in residue.atoms: + if (not include_hydrogens) and atom.name[0] == "H": + # don't print + pass + else: + numb += 1 + line = atom.make_pdb_line2(numb=numb) + line += "\n" + pdbfile.write(line) + if close_file: + pdbfile.close() -def writePKA(protein, parameters, filename=None, conformation ='1A',reference="neutral", direction="folding", verbose=False, options=None): - """ - Write the pka-file based on the given protein +def write_pka(protein, parameters, filename=None, conformation='1A', + reference="neutral", _="folding", verbose=False, + __=None): + """Write the pKa-file based on the given protein. + + Args: + protein: protein object + filename: output file name + conformation: TODO - figure this out + reference: reference state + _: "folding" or other + verbose: Boolean flag for verbosity + __: options object """ + # TODO - the code immediately overrides the verbose argument; why? verbose = True - if filename == None: - filename = "%s.pka" % (protein.name) - file = open(filename, 'w') - if verbose == True: - info("Writing %s" % (filename)) - + if filename is None: + filename = "{0:s}.pka".format(protein.name) + # TODO - this would be much better with a context manager + file_ = open(filename, 'w') + if verbose: + info("Writing {0:s}".format(filename)) # writing propka header - str = "%s\n" % ( getPropkaHeader() ) - str += "%s\n" % ( getReferencesHeader() ) - str += "%s\n" % ( getWarningHeader() ) - + str_ = "{0:s}\n".format(get_propka_header()) + str_ += "{0:s}\n".format(get_references_header()) + str_ += "{0:s}\n".format(get_warning_header()) # writing pKa determinant section - str += getDeterminantSection(protein,conformation, parameters) - + str_ += get_determinant_section(protein, conformation, parameters) # writing pKa summary section - str += getSummarySection(protein,conformation,parameters) - str += "%s\n" % ( getTheLine() ) - + str_ += get_summary_section(protein, conformation, parameters) + str_ += "{0:s}\n".format(get_the_line()) # printing Folding Profile - str += getFoldingProfileSection(protein, conformation=conformation, reference=reference, direction=direction, window=[0., 14., 1.0], options=options) - + str_ += get_folding_profile_section( + protein, conformation=conformation, reference=reference, + window=[0., 14., 1.0]) # printing Protein Charge Profile - str += getChargeProfileSection(protein, conformation=conformation) - + str_ += get_charge_profile_section(protein, conformation=conformation) # now, writing the pka text to file - file.write(str) - - file.close() + file_.write(str_) + file_.close() -def printTmProfile(protein, reference="neutral", window=[0., 14., 1.], Tm=[0.,0.], Tms=None, ref=None, verbose=False, options=None): +def print_tm_profile(protein, reference="neutral", window=[0., 14., 1.], + __=[0., 0.], tms=None, ref=None, _=False, + options=None): + """Print Tm profile. + + I think Tm refers to the denaturation temperature. + + Args: + protein: protein object + reference: reference state + window: pH window [min, max, step] + __: temperature range [min, max] + tms: TODO - figure this out + ref: TODO - figure this out (probably reference state?) + _: Boolean for verbosity + options: options object """ - prints Tm profile - """ - profile = protein.getTmProfile(reference=reference, grid=[0., 14., 0.1], Tms=Tms, ref=ref, options=options) - if profile == None: - str = "Could not determine Tm-profile\n" + profile = protein.getTmProfile( + reference=reference, grid=[0., 14., 0.1], tms=tms, ref=ref, + options=options) + if profile is None: + str_ = "Could not determine Tm-profile\n" else: - str = " suggested Tm-profile for %s\n" % (protein.name) - for (pH, Tm) in profile: - if pH >= window[0] and pH <= window[1] and (pH%window[2] < 0.01 or pH%window[2] > 0.99*window[2]): - str += "%6.2lf%10.2lf\n" % (pH, Tm) - info(str) + str_ = " suggested Tm-profile for {0:s}\n".format(protein.name) + for (ph, tm_) in profile: + if (ph >= window[0] and ph <= window[1] + and (ph % window[2] < 0.01 + or ph % window[2] > 0.99*window[2])): + str_ += "{0:>6.2f}{1:>10.2f}\n".format(ph, tm_) + info(str_) -def printResult(protein, conformation, parameters): +def print_result(protein, conformation, parameters): + """Prints all resulting output from determinants and down. + + Args: + protein: protein object + conformation: specific conformation + parameters: parameters """ - prints all resulting output from determinants and down - """ - printPKASection(protein, conformation, parameters) + print_pka_section(protein, conformation, parameters) -def printPKASection(protein, conformation, parameters): - """ - prints out the pka-section of the result +def print_pka_section(protein, conformation, parameters): + """Prints out pKa section of results. + + Args: + protein: protein object + conformation: specific conformation + parameters: parameters """ # geting the determinants section - str = getDeterminantSection(protein, conformation, parameters) - info(str) - - str = getSummarySection(protein,conformation,parameters) - info(str) + str_ = get_determinant_section(protein, conformation, parameters) + info(str_) + str_ = get_summary_section(protein, conformation, parameters) + info(str_) -def getDeterminantSection(protein, conformation, parameters): - """ - prints out the pka-section of the result +def get_determinant_section(protein, conformation, parameters): + """Returns string with determinant section of results. + + Args: + protein: protein object + conformation: specific conformation + parameters: parameters + Returns: + string """ # getting the same order as in propka2.0 - str = "%s\n" % ( getDeterminantsHeader() ) + str_ = "{0:s}\n".format(get_determinants_header()) # printing determinants for chain in protein.conformations[conformation].chains: for residue_type in parameters.write_out_order: - groups = [g for g in protein.conformations[conformation].groups if g.atom.chain_id == chain] + groups = [ + g for g in protein.conformations[conformation].groups + if g.atom.chain_id == chain] for group in groups: if group.residue_type == residue_type: - str += "%s" % ( group.getDeterminantString(parameters.remove_penalised_group) ) - + str_ += "{0:s}".format( + group.get_determinant_string( + parameters.remove_penalised_group)) # Add a warning in case of coupled residues - if protein.conformations[conformation].non_covalently_coupled_groups and not protein.options.display_coupled_residues: - str += 'Coupled residues (marked *) were detected. Please rerun PropKa with the --display-coupled-residues \nor -d option for detailed information.\n' - - return str + if (protein.conformations[conformation].non_covalently_coupled_groups + and not protein.options.display_coupled_residues): + str_ += 'Coupled residues (marked *) were detected.' + str_ += 'Please rerun PropKa with the --display-coupled-residues \n' + str_ += 'or -d option for detailed information.\n' + return str_ -def getSummarySection(protein, conformation, parameters): +def get_summary_section(protein, conformation, parameters): + """Returns string with summary section of the results. + + Args: + protein: protein object + conformation: specific conformation + parameters: parameters + Returns: + string """ - prints out the pka-section of the result - """ - str = "%s\n" % ( getSummaryHeader() ) + str_ = "{0:s}\n".format(get_summary_header()) # printing pKa summary for residue_type in parameters.write_out_order: for group in protein.conformations[conformation].groups: - if group.residue_type == residue_type: - str += "%s" % ( group.getSummaryString(parameters.remove_penalised_group) ) - - return str + if group.residue_type == residue_type: + str_ += "{0:s}".format( + group.get_summary_string( + parameters.remove_penalised_group)) + return str_ -def getFoldingProfileSection(protein, conformation='AVR', direction="folding", reference="neutral", window=[0., 14., 1.0], verbose=False, options=None): +def get_folding_profile_section( + protein, conformation='AVR', direction="folding", reference="neutral", + window=[0., 14., 1.0], _=False, __=None): + """Returns string with the folding profile section of the results. + + Args: + protein: protein object + conformation: specific conformation + direction: 'folding' or other + reference: reference state + window: pH window [min, max, step] + _: Boolean for verbose output + __: options object + Returns: + string """ - returns the protein-folding-profile section - """ - str = getTheLine() - str += "\n" - str += "Free energy of %9s (kcal/mol) as a function of pH (using %s reference)\n" % (direction, reference) - - profile, [pH_opt, dG_opt], [dG_min, dG_max], [pH_min, pH_max] = protein.getFoldingProfile(conformation=conformation, - reference=reference, - direction=direction, grid=[0., 14., 0.1], options=options) - if profile == None: - str += "Could not determine folding profile\n" + str_ = get_the_line() + str_ += "\n" + str_ += "Free energy of {0:>9s} (kcal/mol) as a function".format(direction) + str_ += " of pH (using {0:s} reference)\n".format(reference) + profile, [ph_opt, dg_opt], [dg_min, dg_max], [ph_min, ph_max] = ( + protein.get_folding_profile( + conformation=conformation, reference=reference, + grid=[0., 14., 0.1])) + if profile is None: + str_ += "Could not determine folding profile\n" else: - for (pH, dG) in profile: - if pH >= window[0] and pH <= window[1] and (pH%window[2] < 0.05 or pH%window[2] > 0.95): - str += "%6.2lf%10.2lf\n" % (pH, dG) - str += "\n" - - if pH_opt == None or dG_opt == None: - str += "Could not determine pH optimum\n" + for (ph, dg) in profile: + if ph >= window[0] and ph <= window[1]: + if ph % window[2] < 0.05 or ph % window[2] > 0.95: + str_ += "{0:>6.2f}{1:>10.2f}\n".format(ph, dg) + str_ += "\n" + if ph_opt is None or dg_opt is None: + str_ += "Could not determine pH optimum\n" else: - str += "The pH of optimum stability is %4.1lf for which the free energy is%6.1lf kcal/mol at 298K\n" % (pH_opt, dG_opt) - - if dG_min == None or dG_max == None: - str += "Could not determine pH values where the free energy is within 80 %s of minimum\n" % ("%") + str_ += "The pH of optimum stability is {0:>4.1f}".format(ph_opt) + str_ += ( + " for which the free energy is {0:>6.1f} kcal/mol at 298K\n".format( + dg_opt)) + if dg_min is None or dg_max is None: + str_ += "Could not determine pH values where the free energy" + str_ += " is within 80 % of minimum\n" else: - str += "The free energy is within 80 %s of maximum at pH %4.1lf to %4.1lf\n" % ("%", dG_min, dG_max) - - if pH_min == None or pH_max == None: - str += "Could not determine the pH-range where the free energy is negative\n\n" + str_ += "The free energy is within 80 % of maximum" + str_ += " at pH {0:>4.1f} to {1:>4.1f}\n".format(dg_min, dg_max) + if ph_min is None or ph_max is None: + str_ += "Could not determine the pH-range where the free" + str_ += " energy is negative\n\n" else: - str += "The free energy is negative in the range %4.1lf - %4.1lf\n\n" % (pH_min, pH_max) + str_ += "The free energy is negative in the range" + str_ += " {0:>4.1f} - {1:>4.1f}\n\n".format(ph_min, ph_max) + return str_ - return str +def get_charge_profile_section(protein, conformation='AVR', _=None): + """Returns string with the charge profile section of the results. - - -def getChargeProfileSection(protein, conformation='AVR', options=None): + Args: + protein: protein object + conformation: specific conformation + _: options object + Returns: + string """ - returns the protein-folding-profile section - """ - str = "Protein charge of folded and unfolded state as a function of pH\n" - - profile = protein.getChargeProfile(conformation=conformation,grid=[0., 14., 1.]) - if profile == None: - str += "Could not determine charge profile\n" + str_ = "Protein charge of folded and unfolded state as a function of pH\n" + profile = protein.get_charge_profile(conformation=conformation, + grid=[0., 14., 1.]) + if profile is None: + str_ += "Could not determine charge profile\n" else: - str += "%6s%10s%8s\n" % ("pH", "unfolded", "folded") - for (pH, Q_mod, Q_pro) in profile: - str += "%6.2lf%10.2lf%8.2lf\n" % (pH, Q_mod, Q_pro) - - - pI_pro, pI_mod = protein.getPI(conformation=conformation) - if pI_pro == None or pI_mod == None: - str += "Could not determine the pI\n\n" + str_ += ' pH unfolded folded\n' + for (ph, q_mod, q_pro) in profile: + str_ += "{ph:6.2f}{qm:10.2f}{qp:8.2f}\n".format( + ph=ph, qm=q_mod, qp=q_pro) + pi_pro, pi_mod = protein.get_pi(conformation=conformation) + if pi_pro is None or pi_mod is None: + str_ += "Could not determine the pI\n\n" else: - str += "The pI is %5.2lf (folded) and %5.2lf (unfolded)\n" % (pI_pro, pI_mod) + str_ += ("The pI is {0:>5.2f} (folded) and {1:>5.2f} (unfolded)\n") + return str_ - return str +def write_jackal_scap_file(mutation_data=None, filename="1xxx_scap.list", + _=None): + """Write a scap file for, i.e., generating a mutated protein - -def writeJackalScapFile(mutationData=None, filename="1xxx_scap.list", options=None): + TODO - figure out what this is """ - writing a scap file for, i.e., generating a mutated protein + with open(filename, 'w') as file_: + for chain_id, _, res_num, code2 in mutation_data: + str_ = "{chain:s}, {num:d}, {code:s}\n".format( + chain=chain_id, num=res_num, code=code2) + file_.write(str_) + + +def write_scwrl_sequence_file(sequence, filename="x-ray.seq", _=None): + """Write a scwrl sequence file for, e.g., generating a mutated protein + + TODO - figure out what this is """ - file = open(filename, 'w') - - for chain_id, code1, res_num, code2 in mutationData: - str = "%s, %d, %s\n" % (chain_id, res_num, code2) - file.write(str) - file.close() + with open(filename, 'w') as file_: + start = 0 + while len(sequence[start:]) > 60: + file_.write("{0:s}s\n".format(sequence[start:start+60])) + start += 60 + file_.write("{0:s}\n".format(sequence[start:])) -def writeScwrlSequenceFile(sequence, filename="x-ray.seq", options=None): +def get_propka_header(): + """Create the header. + + Returns: + string """ - writing a scwrl sequence file for, e.g., generating a mutated protein - """ - file = open(filename, 'w') - - start = 0 - while len(sequence[start:]) > 60: - file.write( "%s\n" % (sequence[start:start+60]) ) - start += 60 - file.write( "%s\n" % (sequence[start:]) ) - - file.close() - - - -# --- various header text --- # - - -def getPropkaHeader(): - """ - Creates the header - """ - from datetime import date today = date.today() + str_ = "propka3.1 {0!s:>93s}\n".format(today) + str_ += """ +------------------------------------------------------------------------------- +-- -- +-- PROPKA: A PROTEIN PKA PREDICTOR -- +-- -- +-- VERSION 1.0, 04/25/2004, IOWA CITY -- +-- BY HUI LI -- +-- -- +-- VERSION 2.0, 11/05/2007, IOWA CITY/COPENHAGEN -- +-- BY DELPHINE C. BAS AND DAVID M. ROGERS -- +-- -- +-- VERSION 3.0, 01/06/2011, COPENHAGEN -- +-- BY MATS H.M. OLSSON AND CHRESTEN R. SONDERGARD -- +-- -- +-- VERSION 3.1, 07/01/2011, COPENHAGEN -- +-- BY CHRESTEN R. SONDERGARD AND MATS H.M. OLSSON -- +-- -- +------------------------------------------------------------------------------- +""" + return str_ +def get_references_header(): + """Create the 'references' part of output file. - str = "propka3.1 %93s\n" % (today) - str += "-------------------------------------------------------------------------------------------------------\n" - str += "-- --\n" - str += "-- PROPKA: A PROTEIN PKA PREDICTOR --\n" - str += "-- --\n" - str += "-- VERSION 1.0, 04/25/2004, IOWA CITY --\n" - str += "-- BY HUI LI --\n" - str += "-- --\n" - str += "-- VERSION 2.0, 11/05/2007, IOWA CITY/COPENHAGEN --\n" - str += "-- BY DELPHINE C. BAS AND DAVID M. ROGERS --\n" - str += "-- --\n" - str += "-- VERSION 3.0, 01/06/2011, COPENHAGEN --\n" - str += "-- BY MATS H.M. OLSSON AND CHRESTEN R. SONDERGARD --\n" - str += "-- --\n" - str += "-- VERSION 3.1, 07/01/2011, COPENHAGEN --\n" - str += "-- BY CHRESTEN R. SONDERGARD AND MATS H.M. OLSSON --\n" - str += "-------------------------------------------------------------------------------------------------------\n" - str += "\n" - - - - return str - - -def getReferencesHeader(): + Returns: + string """ - Returns the 'references' part in output file + str_ = """ +------------------------------------------------------------------------------- +References: + +Very Fast Empirical Prediction and Rationalization of Protein pKa Values. +Hui Li, Andrew D. Robertson and Jan H. Jensen. PROTEINS: Structure, Function, +and Bioinformatics. 61:704-721 (2005) + +Very Fast Prediction and Rationalization of pKa Values for Protein-Ligand +Complexes. Delphine C. Bas, David M. Rogers and Jan H. Jensen. PROTEINS: +Structure, Function, and Bioinformatics 73:765-783 (2008) + +PROPKA3: Consistent Treatment of Internal and Surface Residues in Empirical +pKa predictions. Mats H.M. Olsson, Chresten R. Sondergard, Michal Rostkowski, +and Jan H. Jensen. Journal of Chemical Theory and Computation, 7(2):525-537 +(2011) + +Improved Treatment of Ligands and Coupling Effects in Empirical Calculation +and Rationalization of pKa Values. Chresten R. Sondergaard, Mats H.M. Olsson, +Michal Rostkowski, and Jan H. Jensen. Journal of Chemical Theory and +Computation, (2011) +------------------------------------------------------------------------------- +""" + return str_ + + +def get_warning_header(): + """Create the 'warning' part of the output file. + + TODO - this function is essentially a no-op. + + Returns: + string """ - - str = "" - str += "-------------------------------------------------------------------------------------------------------\n" - str += " References:\n" - str += "\n" - str += " Very Fast Empirical Prediction and Rationalization of Protein pKa Values\n" - str += " Hui Li, Andrew D. Robertson and Jan H. Jensen\n" - str += " PROTEINS: Structure, Function, and Bioinformatics 61:704-721 (2005)\n" - str += " \n" - str += " Very Fast Prediction and Rationalization of pKa Values for Protein-Ligand Complexes\n" - str += " Delphine C. Bas, David M. Rogers and Jan H. Jensen\n" - str += " PROTEINS: Structure, Function, and Bioinformatics 73:765-783 (2008)\n" - str += " \n" - str += " PROPKA3: Consistent Treatment of Internal and Surface Residues in Empirical pKa predictions\n" - str += " Mats H.M. Olsson, Chresten R. Sondergard, Michal Rostkowski, and Jan H. Jensen\n" - str += " Journal of Chemical Theory and Computation, 7(2):525-537 (2011)\n" - str += " \n" - str += " Improved Treatment of Ligands and Coupling Effects in Empirical Calculation\n" - str += " and Rationalization of pKa Values\n" - str += " Chresten R. Sondergaard, Mats H.M. Olsson, Michal Rostkowski, and Jan H. Jensen\n" - str += " Journal of Chemical Theory and Computation, (2011)\n" - str += " \n" - str += "-------------------------------------------------------------------------------------------------------\n" - - return str + str_ = "" + return str_ -def getWarningHeader(): +def get_determinants_header(): + """Create the Determinant header. + + Returns: + string """ - Returns the 'warning' part in output file + str_ = """ +--------- ----- ------ --------------------- -------------- -------------- -------------- + DESOLVATION EFFECTS SIDECHAIN BACKBONE COULOMBIC + RESIDUE pKa BURIED REGULAR RE HYDROGEN BOND HYDROGEN BOND INTERACTION +--------- ----- ------ --------- --------- -------------- -------------- -------------- +""" + return str_ + + +def get_summary_header(): + """Create the summary header. + + Returns: + string """ - - str = "" - - return str + str_ = get_the_line() + str_ += "\n" + str_ += "SUMMARY OF THIS PREDICTION\n" + str_ += " Group pKa model-pKa ligand atom-type" + return str_ -def getDeterminantsHeader(): +def get_the_line(): + """Draw the line-Johnny Cash would have been proud-or actually Aerosmith! + + NOTE - Johnny Cash walked the line. + + Returns: + string """ - Creates the Determinant header + str_ = "" + str_ += ("-" * 104) + return str_ + + +def make_interaction_map(name, list_, interaction): + """Print out an interaction map named 'name' of the groups in 'list' + based on the function 'interaction' + + Args: + list_: list of groups + interaction: some sort of function + Returns: + string """ - str = "" - str += "--------- ----- ------ --------------------- -------------- -------------- --------------\n" - str += " DESOLVATION EFFECTS SIDECHAIN BACKBONE COULOMBIC \n" - str += " RESIDUE pKa BURIED REGULAR RE HYDROGEN BOND HYDROGEN BOND INTERACTION \n" - str += "--------- ----- ------ --------- --------- -------------- -------------- --------------\n" - - return str - - -def getSummaryHeader(): - """ - returns the summary header - """ - str = getTheLine() - str += "\n" - str += "SUMMARY OF THIS PREDICTION\n" - str += " Group pKa model-pKa ligand atom-type" - - return str - - -def getTheLine(): - """ - draw the line - Johnny Cash would have been proud - or actually Aerosmith! - """ - str = "" - for i in range(0, 104): - str += "-" - - return str - - -# Interaction maps -def make_interaction_map(name, list, interaction): - """ Print out an interaction map named 'name' of the groups in 'list' - based on the function 'interaction' """ - # return an empty string, if the list is empty - if len(list)==0: + if len(list_) == 0: return '' - # for long list, use condensed formatting - if len(list)>10: + if len(list_) > 10: res = 'Condensed form:\n' - for i in range(len(list)): - for j in range(i,len(list)): - if interaction(list[i],list[j]): - res += 'Coupling: %9s - %9s\n'%(list[i].label,list[j].label) + for i, group1 in enumerate(list_): + for group2 in list_[i:]: + if interaction(group1, group2): + res += 'Coupling: {0:>9s} - {1:>9s}\n'.format( + group1.label, group2.label) return res - # Name and map header - res = '%s\n%12s'%(name,'') - for g in list: - res += '%9s | '%g.label - + res = '{0:s}\n{1:>12s}'.format(name, '') + for group in list_: + res += '{0:>9s} | '.format(group.label) # do the map - for g1 in list: - res += '\n%-12s'%(g1.label) - for g2 in list: + for group1 in list_: + res += '\n{0:<12s}'.format(group1.label) + for group2 in list_: tag = '' - if interaction(g1, g2): + if interaction(group1, group2): tag = ' X ' - res += '%10s| '%tag - + res += '{0:>10s}| '.format(tag) return res - diff --git a/propka/parameters.py b/propka/parameters.py index 143a59b..960cb81 100644 --- a/propka/parameters.py +++ b/propka/parameters.py @@ -1,232 +1,276 @@ - -from __future__ import division -from __future__ import print_function - -import math +"""Holds parameters and settings.""" +import pkg_resources import propka.lib as lib -import sys, os from propka.lib import info, warning -import pkg_resources # names and types of all key words in configuration file -matrices = ['interaction_matrix'] - -pair_wise_matrices = ['sidechain_cutoffs'] - -number_dictionaries = ['VanDerWaalsVolume','charge','model_pkas','ions', - 'valence_electrons','custom_model_pkas'] - -list_dictionaries = ['backbone_NH_hydrogen_bond','backbone_CO_hydrogen_bond'] - -string_dictionaries = ['protein_group_mapping'] - -string_lists = ['ignore_residues','angular_dependent_sidechain_interactions', - 'acid_list','base_list','exclude_sidechain_interactions', - 'backbone_reorganisation_list','write_out_order'] - -distances = ['desolv_cutoff','buried_cutoff','coulomb_cutoff1','coulomb_cutoff2'] - -parameters = ['Nmin','Nmax','desolvationSurfaceScalingFactor','desolvationPrefactor', - 'desolvationAllowance','coulomb_diel','COO_HIS_exception','OCO_HIS_exception', - 'CYS_HIS_exception','CYS_CYS_exception','min_ligand_model_pka','max_ligand_model_pka', - 'include_H_in_interactions','coupling_max_number_of_bonds', - 'min_bond_distance_for_hydrogen_bonds','coupling_penalty', - 'shared_determinants','common_charge_centre','hide_penalised_group', 'remove_penalised_group', - 'max_intrinsic_pKa_diff','min_interaction_energy','max_free_energy_diff','min_swap_pka_shift', - 'min_pka','max_pka','sidechain_interaction'] - -strings = ['version','output_file_tag','ligand_typing','pH','reference'] - - +MATRICES = ['interaction_matrix'] +PAIR_WISE_MATRICES = ['sidechain_cutoffs'] +NUMBER_DICTIONARIES = [ + 'VanDerWaalsVolume', 'charge', 'model_pkas', 'ions', 'valence_electrons', + 'custom_model_pkas'] +LIST_DICTIONARIES = ['backbone_NH_hydrogen_bond', 'backbone_CO_hydrogen_bond'] +STRING_DICTIONARIES = ['protein_group_mapping'] +STRING_LISTS = [ + 'ignore_residues', 'angular_dependent_sidechain_interactions', + 'acid_list', 'base_list', 'exclude_sidechain_interactions', + 'backbone_reorganisation_list', 'write_out_order'] +DISTANCES = ['desolv_cutoff', 'buried_cutoff', 'coulomb_cutoff1', + 'coulomb_cutoff2'] +PARAMETERS = [ + 'Nmin', 'Nmax', 'desolvationSurfaceScalingFactor', 'desolvationPrefactor', + 'desolvationAllowance', 'coulomb_diel', 'COO_HIS_exception', + 'OCO_HIS_exception', 'CYS_HIS_exception', 'CYS_CYS_exception', + 'min_ligand_model_pka', 'max_ligand_model_pka', + 'include_H_in_interactions', 'coupling_max_number_of_bonds', + 'min_bond_distance_for_hydrogen_bonds', 'coupling_penalty', + 'shared_determinants', 'common_charge_centre', 'hide_penalised_group', + 'remove_penalised_group', 'max_intrinsic_pka_diff', + 'min_interaction_energy', 'max_free_energy_diff', 'min_swap_pka_shift', + 'min_pka', 'max_pka', 'sidechain_interaction'] +STRINGS = ['version', 'output_file_tag', 'ligand_typing', 'pH', 'reference'] class Parameters: - def __init__(self, parameter_file): + """PROPKA parameter class.""" + def __init__(self, parameter_file): + """Initialize parameter class. + + Args: + parameter_file: file with parameters + """ + # TODO - need to define all members explicitly + self.model_pkas = {} + self.interaction_matrix = InteractionMatrix("interaction_matrix") + self.sidechain_cutoffs = None + # TODO - it would be nice to rename these; they're defined everywhere + self.COO_HIS_exception = None + self.OCO_HIS_exception = None + self.CYS_HIS_exception = None + self.CYS_CYS_exception = None + # These functions set up remaining data structures implicitly self.set_up_data_structures() self.read_parameters(parameter_file) - #self.print_interaction_parameters() - #self.print_interaction_parameters_latex() - #####self.print_interactions_latex() - #sys.exit(0) + def read_parameters(self, file_): + """Read parameters from file. - - return - - - def read_parameters(self, file): + Args: + file_: file to read + """ # try to locate the parameters file try: - ifile = pkg_resources.resource_filename(__name__, file) - input = lib.open_file_for_reading(ifile) - except: - input = lib.open_file_for_reading(file) - - for line in input: + ifile = pkg_resources.resource_filename(__name__, file_) + input_ = lib.open_file_for_reading(ifile) + except (IOError, FileNotFoundError, ValueError): + input_ = lib.open_file_for_reading(file_) + for line in input_: self.parse_line(line) - return - - def parse_line(self, line): + """Parse parameter file line.""" # first, remove comments comment_pos = line.find('#') if comment_pos != -1: line = line[:comment_pos] - # split the line into words words = line.split() if len(words) == 0: return - # parse the words - if len(words)==3 and words[0] in number_dictionaries: + if len(words) == 3 and words[0] in NUMBER_DICTIONARIES: self.parse_to_number_dictionary(words) - elif len(words)==2 and words[0] in string_lists: + elif len(words) == 2 and words[0] in STRING_LISTS: self.parse_to_string_list(words) - elif len(words)==2 and words[0] in distances: + elif len(words) == 2 and words[0] in DISTANCES: self.parse_distance(words) - elif len(words)==2 and words[0] in parameters: + elif len(words) == 2 and words[0] in PARAMETERS: self.parse_parameter(words) - elif len(words)==2 and words[0] in strings: + elif len(words) == 2 and words[0] in STRINGS: self.parse_string(words) - elif len(words)>2 and words[0] in list_dictionaries: + elif len(words) > 2 and words[0] in LIST_DICTIONARIES: self.parse_to_list_dictionary(words) - elif words[0] in matrices+pair_wise_matrices: + elif words[0] in MATRICES+PAIR_WISE_MATRICES: self.parse_to_matrix(words) - elif len(words)==3 and words[0] in string_dictionaries: + elif len(words) == 3 and words[0] in STRING_DICTIONARIES: self.parse_to_string_dictionary(words) - - #info(words) - - return - - def parse_to_number_dictionary(self, words): - exec('self.%s[\'%s\'] = %s'%tuple(words)) - return + """Parse field to number dictionary. + + Args: + words: strings to parse. + """ + dict_ = getattr(self, words[0]) + key = words[1] + value = words[2] + dict_[key] = float(value) def parse_to_string_dictionary(self, words): - exec('self.%s[\'%s\'] = \'%s\''%tuple(words)) - return + """Parse field to string dictionary. + + Args: + words: strings to parse + """ + dict_ = getattr(self, words[0]) + key = words[1] + value = words[2] + dict_[key] = value def parse_to_list_dictionary(self, words): - exec('if not \'%s\' in self.%s.keys(): self.%s[\'%s\'] = []'%(words[1],words[0],words[0],words[1])) - for word in words[2:]: - exec('self.%s[\'%s\'].append(%s)'%(words[0],words[1],word)) + """Parse field to list dictionary. - return + Args: + words: strings to parse. + """ + dict_ = getattr(self, words[0]) + key = words[1] + if not key in dict_: + dict_[key] = [] + for value in words[2:]: + if isinstance(value, list): + dict_[key].append([float(x) for x in value]) + dict_[key].append(float(value)) def parse_to_string_list(self, words): - exec('self.%s.append(\'%s\')'%tuple(words)) - return + """Parse field to string list. + + Args: + words: strings to parse + """ + list_ = getattr(self, words[0]) + value = words[1] + list_.append(value) def parse_to_matrix(self, words): - exec('self.%s.add(%s)'%(words[0],tuple(words[1:]))) - return + """Parse field to matrix. + + Args: + words: strings to parse + """ + matrix = getattr(self, words[0]) + value = tuple(words[1:]) + matrix.add(value) def parse_distance(self, words): - # float check needed - exec('self.%s = %s'%tuple(words)) - exec('self.%s_squared = pow(%s,2)'%tuple(words)) - return + """Parse field to distance. + + Args: + words: strings to parse + """ + value = float(words[1]) + setattr(self, words[0], value) + value_sq = value*value + attr = "{0:s}_squared".format(words[0]) + setattr(self, attr, value_sq) def parse_parameter(self, words): - exec('self.%s = %s'%tuple(words)) - return + """Parse field to parameters. + + Args: + words: strings to parse + """ + value = float(words[1]) + setattr(self, words[0], value) def parse_string(self, words): - #info('self.%s = \'%s\''%tuple(words)) - exec('self.%s = \'%s\''%tuple(words)) - return + """Parse field to strings. + Args: + words: strings to parse + """ + setattr(self, words[0], words[1]) def set_up_data_structures(self): - for key_word in number_dictionaries+list_dictionaries+string_dictionaries: - exec('self.%s = {}'%key_word) - for key_word in string_lists: - exec('self.%s = []'%key_word) - for key_word in strings: - exec('self.%s = \'\''%key_word) - for key_word in matrices: - exec('self.%s = Interaction_matrix(\'%s\')'%(key_word,key_word)) - for key_word in pair_wise_matrices: - exec('self.%s =Pair_wise_matrix(\'%s\')'%(key_word,key_word)) + """Set up internal data structures. - return + TODO - it would be better to make these assignments explicit in + __init__. + """ + for key_word in (NUMBER_DICTIONARIES + LIST_DICTIONARIES + + STRING_DICTIONARIES): + setattr(self, key_word, {}) + for key_word in STRING_LISTS: + setattr(self, key_word, []) + for key_word in STRINGS: + setattr(self, key_word, "") + for key_word in MATRICES: + matrix = InteractionMatrix(key_word) + setattr(self, key_word, matrix) + for key_word in PAIR_WISE_MATRICES: + matrix = PairwiseMatrix(key_word) + setattr(self, key_word, matrix) def print_interaction_parameters(self): + """Print interaction parameters.""" info('--------------- Model pKa values ----------------------') - for k in self.model_pkas.keys(): - info('%3s %8.2f' % (k, self.model_pkas[k])) + for k in self.model_pkas: + info('{0:>3s} {1:8.2f}'.format(k, self.model_pkas[k])) info('') info('--------------- Interactions --------------------------') - agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - - map = { - 'CG' :['ARG'], - 'C2N':['ARG'], - 'N30':['N+','LYS'], - 'N31':['N+','LYS'], - 'N32':['N+','LYS'], - 'N33':['N+','LYS'] , - 'NAR':['HIS'], - 'OCO':['COO'], - 'OP' :[],#['TYR','SER'], - 'SH' :['CYS'] , - 'NP1':[], - 'OH' :['ROH'], - 'O3' :[] , - 'CL' :[], - 'F' :[], - 'NAM':['AMD'], - 'N1' :[], - 'O2' :[]} - - - for g1 in agroups: - for g2 in lgroups: - - interaction = '%3s %3s %1s %4s %4s'%(g1,g2, - self.interaction_matrix[g1][g2], - self.sidechain_cutoffs.get_value(g1,g2)[0], - self.sidechain_cutoffs.get_value(g1,g2)[1]) - + agroups = [ + 'COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', + 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', + 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] + lgroups = [ + 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', + 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] + map_ = { + 'CG': ['ARG'], 'C2N': ['ARG'], 'N30': ['N+', 'LYS'], + 'N31': ['N+', 'LYS'], 'N32': ['N+', 'LYS'], 'N33': ['N+', 'LYS'], + 'NAR': ['HIS'], 'OCO': ['COO'], 'OP': [], 'SH': ['CYS'], + 'NP1': [], 'OH': ['ROH'], 'O3': [], 'CL': [], 'F': [], + 'NAM': ['AMD'], 'N1': [], 'O2': []} + for group1 in agroups: + for group2 in lgroups: + fmt = "{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}" + interaction = fmt.format( + grp1=group1, grp2=group2, + mat=self.interaction_matrix[group1][group2], + val1=self.sidechain_cutoffs.get_value(group1, group2)[0], + val2=self.sidechain_cutoffs.get_value(group1, group2)[1]) map_interaction = '' - if g2 in map: - for m in map[g2]: - map_interaction += '|%3s %3s %1s %4s %4s'%(g1,m, - self.interaction_matrix[g1][m], - self.sidechain_cutoffs.get_value(g1,m)[0], - self.sidechain_cutoffs.get_value(g1,m)[1]) - if self.interaction_matrix[g1][m] != self.interaction_matrix[g1][g2]: + if group2 in map_: + for val in map_[group2]: + fmt = "|{grp1:>3s} {grp2:>3s} {mat:1s} {val1:4} {val2:4}" + map_interaction += fmt.format( + group1, val, self.interaction_matrix[group1][val], + self.sidechain_cutoffs.get_value(group1, val)[0], + self.sidechain_cutoffs.get_value(group1, val)[1]) + if (self.interaction_matrix[group1][val] + != self.interaction_matrix[group1][group2]): map_interaction += '* ' - if self.sidechain_cutoffs.get_value(g1,m)[0] != self.sidechain_cutoffs.get_value(g1,g2)[0] or \ - self.sidechain_cutoffs.get_value(g1,m)[1] != self.sidechain_cutoffs.get_value(g1,g2)[1]: + if (self.sidechain_cutoffs.get_value(group1, val)[0] + != self.sidechain_cutoffs.get_value( + group1, group2)[0] + or self.sidechain_cutoffs.get_value( + group1, val)[1] + != self.sidechain_cutoffs.get_value( + group1, group2)[1]): map_interaction += '! ' else: map_interaction += ' ' - if len(map[g2])==0 and (self.sidechain_cutoffs.get_value(g1,g2)[0] !=3 or self.sidechain_cutoffs.get_value(g1,g2)[1] != 4): + if (len(map_[group2]) == 0 + and (self.sidechain_cutoffs.get_value( + group1, group2)[0] + != 3 + or self.sidechain_cutoffs.get_value( + group1, group2)[1] + != 4)): map_interaction += '? ' - info(interaction, map_interaction) - - if g1==g2: + if group1 == group2: break info('-') - info('--------------- Exceptions ----------------------------') info('COO-HIS', self.COO_HIS_exception) info('OCO-HIS', self.OCO_HIS_exception) info('CYS-HIS', self.CYS_HIS_exception) info('CYS-CYS', self.CYS_CYS_exception) - info('--------------- Mapping -------------------------------') info(""" Titratable: @@ -251,284 +295,281 @@ NAM N1 O2 """) - return - - - - - def print_interaction_parameters_latex(self): -# info('--------------- Model pKa values ----------------------') -# for k in self.model_pkas.keys(): -# info('%3s %8.2f'%(k,self.model_pkas[k])) - -# info('') -# info('--------------- Interactions --------------------------') - agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - - map = { - 'CG' :['ARG'], - 'C2N':['ARG'], - 'N30':['N+','LYS'], - 'N31':['N+','LYS'], - 'N32':['N+','LYS'], - 'N33':['N+','LYS'] , - 'NAR':['HIS'], - 'OCO':['COO'], - 'OP' :[],#['TYR','SER'], - 'SH' :['CYS'] , - 'NP1':['AMD'], - 'OH' :['ROH'], - 'O3' :[] , - 'CL' :[], - 'F' :[], - 'NAM':[], - 'N1' :[], - 'O2' :[]} - - - s = """ -\\begin{longtable}{lllll} -\\caption{Ligand interaction parameters. For interactions not listed, the default value of %s is applied.} -\\label{tab:ligand_interaction_parameters}\\\\ - -\\toprule -Group1 & Group2 & Interaction & c1 &c2 \\\\ -\\midrule -\\endfirsthead - -\\multicolumn{5}{l}{\\emph{continued from the previous page}}\\\\ -\\toprule -Group1 & Group2 & Interaction & c1 &c2 \\\\ -\\midrule -\\endhead - -\\midrule -\\multicolumn{5}{r}{\\emph{continued on the next page}}\\\\ -\\endfoot - -\\bottomrule -\\endlastfoot - -"""%(self.sidechain_cutoffs.default) - for g1 in agroups: - for g2 in lgroups: - if self.interaction_matrix[g1][g2]=='-': + """Print interaction parameters in LaTeX format.""" + # TODO - if these lists and dictionaries are the same as above, then + # should be constants at the level of the module + agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', + 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', + 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', + 'N1', 'O2', 'OP', 'SH'] + lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', + 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', + 'SH'] + lines = [ + "", + "\\begin{{longtable}}{{lllll}}", + ("\\caption{{Ligand interaction parameters. For interactions not " + "listed, the default value of {0:s} is applied.}}").format( + self.sidechain_cutoffs.default), + "\\label{{tab:ligand_interaction_parameters}}\\\\", + "\\toprule", + "Group1 & Group2 & Interaction & c1 &c2 \\\\", + "\\midrule", + "\\endfirsthead", + "", + "\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous page}}}\\\\", + "\\toprule", + "Group1 & Group2 & Interaction & c1 &c2 \\\\", + "\\midrule", + "\\endhead", + "", + "\\midrule", + "\\multicolumn{{5}}{{r}}{\\emph{{continued on the next page}}}\\\\", + "\\endfoot", + "", + "\\bottomrule", + "\\endlastfoot", + ""] + str_ = "\n".join(lines) + for group1 in agroups: + for group2 in lgroups: + if self.interaction_matrix[group1][group2] == '-': continue - if self.sidechain_cutoffs.get_value(g1,g2)==self.sidechain_cutoffs.default: + if (self.sidechain_cutoffs.get_value(group1, group2) + == self.sidechain_cutoffs.default): continue - - - s+= '%3s & %3s & %1s & %4s & %4s\\\\ \n'%(g1,g2, - self.interaction_matrix[g1][g2], - self.sidechain_cutoffs.get_value(g1,g2)[0], - self.sidechain_cutoffs.get_value(g1,g2)[1]) - - if g1==g2: + fmt = ( + "{grp1:>3s} & {grp2:>3s} & {mat:1s} & {val1:4} & " + "{val2:4}\\\\ \n") + str_ += fmt.format( + group1, group2, + self.interaction_matrix[group1][group2], + self.sidechain_cutoffs.get_value(group1, group2)[0], + self.sidechain_cutoffs.get_value(group1, group2)[1]) + if group1 == group2: break - - s += ' \\end{longtable}\n' - info(s) - return + str_ += ' \\end{{longtable}}\n' + info(str_) def print_interactions_latex(self): - agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - lgroups = ['CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - - - s = """ -\\begin{longtable}{%s} -\\caption{Ligand interaction parameters. For interactions not listed, the default value of %s is applied.} -\\label{tab:ligand_interaction_parameters}\\\\ - -\\toprule -Group1 & Group2 & Interaction & c1 &c2 \\\\ -\\midrule -\\endfirsthead - -\\multicolumn{5}{l}{\\emph{continued from the previous page}}\\\\ -\\toprule -Group1 & Group2 & Interaction & c1 &c2 \\\\ -\\midrule -\\endhead - -\\midrule -\\multicolumn{5}{r}{\\emph{continued on the next page}}\\\\ -\\endfoot - -\\bottomrule -\\endlastfoot - -"""%('l'*len(agroups),self.sidechain_cutoffs.default) - for g1 in agroups: - for g2 in agroups: - - s+= '%3s & %3s & %1s & %4s & %4s\\\\ \n'%(g1,g2, - self.interaction_matrix[g1][g2], - self.sidechain_cutoffs.get_value(g1,g2)[0], - self.sidechain_cutoffs.get_value(g1,g2)[1]) - - if g1==g2: + """Print interactions in LaTeX.""" + # TODO - are these the same lists as above? Convert to module constants. + agroups = ['COO', 'HIS', 'CYS', 'TYR', 'SER', 'N+', 'LYS', 'AMD', + 'ARG', 'TRP', 'ROH', 'CG', 'C2N', 'N30', 'N31', 'N32', + 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', + 'N1', 'O2', 'OP', 'SH'] + lines = [ + "", + "\\begin{longtable}{{{0:s}}}".format('l'*len(agroups)), + ("\\caption{{Ligand interaction parameters. For interactions not " + "listed, the default value of {0:s} is applied.}}").format( + str(self.sidechain_cutoffs.default)), + "\\label{{tab:ligand_interaction_parameters}}\\\\", + "\\toprule", + "Group1 & Group2 & Interaction & c1 &c2 \\\\", + "\\midrule", + "\\endfirsthead", + "", + "\\multicolumn{{5}}{{l}}{\\emph{{continued from the previous page}}}\\\\", + "\\toprule", + "Group1 & Group2 & Interaction & c1 &c2 \\\\", + "\\midrule", + "\\endhead", + "", + "\\midrule", + "\\multicolumn{{5}}{{r}}{\\emph{{continued on the next page}}}\\\\", + "\\endfoot", + "", + "\\bottomrule", + "\\endlastfoot", + "" + ] + str_ = "\n".join(lines) + for group1 in agroups: + for group2 in agroups: + fmt = '{g1:>3s} & {g2:>3s} & {mat:1s} & {val1:>4s} & {val2:>4s}\\\\ \n' + str_ += fmt.format( + group1, group2, self.interaction_matrix[group1][group2], + str(self.sidechain_cutoffs.get_value(group1, group2)[0]), + str(self.sidechain_cutoffs.get_value(group1, group2)[1])) + if group1 == group2: break - - s += ' \\end{longtable}\n' - info(s) - return + str_ += ' \\end{{longtable}}\n' + info(str_) +class InteractionMatrix: + """Interaction matrix class.""" - -class Interaction_matrix: def __init__(self, name): + """Initialize with name of matrix. + + Args: + name: name of interaction matrix + """ self.name = name + self.value = None self.ordered_keys = [] self.dictionary = {} - return - def add(self,words): + def add(self, words): + """Add values to matrix. + + Args: + words: values to add + """ new_group = words[0] self.ordered_keys.append(new_group) - if not new_group in self.dictionary.keys(): self.dictionary[new_group] = {} - - for i in range(len(self.ordered_keys)): - group = self.ordered_keys[i] - if len(words)>i+1: + for i, group in enumerate(self.ordered_keys): + if len(words) > i+1: try: - exec('self.value = %s'%words[i+1]) - except: + self.value = float(words[i+1]) + except ValueError: self.value = words[i+1] self.dictionary[group][new_group] = self.value self.dictionary[new_group][group] = self.value - - return - def get_value(self, item1, item2): + """Get specific matrix value. + + Args: + item1: matrix row index + item2: matrix column index + Returns: + matrix value or None + """ try: return self.dictionary[item1][item2] - except: + except KeyError: return None def __getitem__(self, group): + """Get specific group from matrix. + + Args: + group: group to get + """ if group not in self.dictionary.keys(): - raise Exception('%s not found in interaction matrix %s'%(group,self.name)) + str_ = '{0:s} not found in interaction matrix {1:s}'.format( + group, self.name) + raise KeyError(str_) return self.dictionary[group] - def keys(self): + """Get keys from matrix. + + Returns: + dictionary key list + """ return self.dictionary.keys() def __str__(self): - s = ' ' - for k1 in self.ordered_keys: - s+='%3s '%k1 - s+='\n' - for k1 in self.ordered_keys: - s+='%3s '%k1 - for k2 in self.ordered_keys: - s+='%3s '%self[k1][k2] - s+='\n' - - return s -# ks = ['COO', 'SER', 'ARG', 'LYS', 'HIS', 'AMD', 'CYS', 'TRP','ROH','TYR','N+','CG', 'C2N', 'N30', 'N31', 'N32', 'N33', 'NAR', 'OCO', 'NP1', 'OH', 'O3', 'CL', 'F', 'NAM', 'N1', 'O2', 'OP', 'SH'] - -# p = '' -# n=0 -# for i in range(len(ks)): -# for j in range(i,len(ks)): -# if not [0.0,0.0]==self[ks[i]][ks[j]]: -# if not [3.0,4.0]==self[ks[i]][ks[j]]: -# p+='sidechain_cutoff %3s %3s %s\n'%(ks[i],ks[j],self[ks[i]][ks[j]]) -# n+=1 - -# info('total',n,len(ks)) -# return p + str_ = ' ' + for key in self.ordered_keys: + str_ += '{0:>3s} '.format(key) + str_ += '\n' + for key1 in self.ordered_keys: + str_ += '{0:>3s} '.format(key1) + for key2 in self.ordered_keys: + str_ += '{0:>3s} '.format(self[key1][key2]) + str_ += '\n' + return str_ +class PairwiseMatrix: + """Pairwise interaction matrix class.""" -class Pair_wise_matrix: def __init__(self, name): + """Initialize pairwise matrix. + + Args: + name: name of pairwise interaction + """ self.name = name self.dictionary = {} self.default = [0.0, 0.0] - return - def add(self,words): + def add(self, words): + """Add information to the matrix. + + TODO - this function unnecessarily bundles arguments into a tuple + + Args: + words: tuple with assignment information and value + """ # assign the default value - if len(words)==3 and words[0]=='default': + if len(words) == 3 and words[0] == 'default': self.default = [float(words[1]), float(words[2])] return - # assign non-default values - g1 = words[0] - g2 = words[1] - v = [float(words[2]), float(words[3])] + group1 = words[0] + group2 = words[1] + value = [float(words[2]), float(words[3])] + self.insert(group1, group2, value) + self.insert(group2, group1, value) - self.insert(g1,g2,v) - self.insert(g2,g1,v) + def insert(self, key1, key2, value): + """Insert value into matrix. - return - - def insert(self, k1,k2,v): - - if k1 in self.dictionary.keys() and k2 in self.dictionary[k1].keys(): - if k1!=k2: - warning('Parameter value for %s, %s defined more than once' % (k1, k2)) - - if not k1 in self.dictionary: - self.dictionary[k1] = {} - - self.dictionary[k1][k2] =v - - return + Args: + key1: first matrix key (row) + key2: second matrix key (column) + value: value to insert + """ + if key1 in self.dictionary and key2 in self.dictionary[key1]: + if key1 != key2: + str_ = ( + 'Parameter value for {0:s}, {1:s} defined more ' + 'than once'.format(key1, key2)) + warning(str_) + if not key1 in self.dictionary: + self.dictionary[key1] = {} + self.dictionary[key1][key2] = value def get_value(self, item1, item2): + """Get specified value from matrix. + Args: + item1: row index + item2: column index + Returns: + matrix value (or default) + """ try: return self.dictionary[item1][item2] - except: + except KeyError: return self.default def __getitem__(self, group): + """Get item from matrix corresponding to specific group. + + Args: + group: group to retrieve + Returns: + matrix information + """ if group not in self.dictionary.keys(): - raise Exception('%s not found in interaction matrix %s'%(group,self.name)) + str_ = '{0:s} not found in interaction matrix {1:s}'.format( + group, self.name) + raise KeyError(str_) return self.dictionary[group] - def keys(self): + """Get keys from matrix. + + Returns: + dictionary key list + """ return self.dictionary.keys() def __str__(self): - s='' - for k1 in self.keys(): - for k2 in self[k1].keys(): - s += '%s %s %s\n'%(k1,k2,self[k1][k2]) - - return s - - - - - - - - - - - - - - - - - - - - - - - + str_ = '' + for key1 in self.keys(): + for key2 in self[key1].keys(): + str_ += '{0:s} {1:s} {2:s}\n'.format( + key1, key2, self[key1][key2]) + return str_ diff --git a/propka/pdb.py b/propka/pdb.py index 0b01588..0074938 100644 --- a/propka/pdb.py +++ b/propka/pdb.py @@ -1,239 +1,257 @@ - -from __future__ import division -from __future__ import print_function - -import string, sys, copy - +"""PDB parsing functionality.""" import propka.lib -from propka.lib import info, warning - +from propka.lib import warning from propka.atom import Atom -from propka.conformation_container import Conformation_container +from propka.conformation_container import ConformationContainer -expected_atom_numbers = {'ALA':5, - 'ARG':11, - 'ASN':8, - 'ASP':8, - 'CYS':6, - 'GLY':4, - 'GLN':9, - 'GLU':9, - 'HIS':10, - 'ILE':8, - 'LEU':8, - 'LYS':9, - 'MET':8, - 'PHE':11, - 'PRO':7, - 'SER':6, - 'THR':7, - 'TRP':14, - 'TYR':12, - 'VAL':7} + +EXPECTED_ATOM_NUMBERS = {'ALA': 5, 'ARG': 11, 'ASN': 8, 'ASP': 8, 'CYS': 6, + 'GLY': 4, 'GLN': 9, 'GLU': 9, 'HIS': 10, 'ILE': 8, + 'LEU': 8, 'LYS': 9, 'MET': 8, 'PHE': 11, 'PRO': 7, + 'SER': 6, 'THR': 7, 'TRP': 14, 'TYR': 12, 'VAL': 7} def read_pdb(pdb_file, parameters, molecule): - conformations = {} + """Parse a PDB file. + Args: + pdb_file: file to read + parameters: parameters to guide parsing + molecule: molecular container + Returns: + list with elements: + 1. list of conformations + 2. list of names + """ + conformations = {} # read in all atoms in the file - lines = get_atom_lines_from_pdb(pdb_file, ignore_residues = parameters.ignore_residues, keep_protons = molecule.options.keep_protons, chains=molecule.options.chains) + lines = get_atom_lines_from_pdb( + pdb_file, ignore_residues=parameters.ignore_residues, + keep_protons=molecule.options.keep_protons, + chains=molecule.options.chains) for (name, atom) in lines: if not name in conformations.keys(): - conformations[name] = Conformation_container(name=name, parameters=parameters, molecular_container=molecule) + conformations[name] = ConformationContainer( + name=name, parameters=parameters, molecular_container=molecule) conformations[name].add_atom(atom) - # make a sorted list of conformation names names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) - return [conformations, names] -def protein_precheck(conformations, names): +def protein_precheck(conformations, names): + """Check protein for correct number of atoms, etc. + + Args: + names: conformation names to check + """ for name in names: atoms = conformations[name].atoms - # Group the atoms by their residue: atoms_by_residue = {} - for a in atoms: - if a.element != 'H': - res_id = resid_from_atom(a) + for atom in atoms: + if atom.element != 'H': + res_id = resid_from_atom(atom) try: - atoms_by_residue[res_id].append(a) + atoms_by_residue[res_id].append(atom) except KeyError: - atoms_by_residue[res_id] = [a] - + atoms_by_residue[res_id] = [atom] for res_id, res_atoms in atoms_by_residue.items(): res_name = res_atoms[0].res_name - residue_label = '%3s%5s'%(res_name, res_id) - + residue_label = '{0:>3s}{1:>5s}'.format(res_name, res_id) # ignore ligand residues - if res_name not in expected_atom_numbers: + if res_name not in EXPECTED_ATOM_NUMBERS: continue - # check for c-terminal if 'C-' in [a.terminal for a in res_atoms]: - if len(res_atoms) != expected_atom_numbers[res_name]+1: - warning('Unexpected number (%d) of atoms in residue %s in conformation %s' % (len(res_atoms), residue_label, name)) + if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]+1: + str_ = ("Unexpected number ({num:d}) of atoms in residue " + "{res:s} in conformation {conf:s}".format( + num=len(res_atoms), res=residue_label, + conf=name)) + warning(str_) continue - # check number of atoms in residue - if len(res_atoms) != expected_atom_numbers[res_name]: - warning('Unexpected number (%d) of atoms in residue %s in conformation %s' % (len(res_atoms), residue_label, name)) - - return - -def resid_from_atom(a): - return '%4d %s %s'%(a.res_num,a.chain_id,a.icode) + if len(res_atoms) != EXPECTED_ATOM_NUMBERS[res_name]: + str_ = ("Unexpected number ({num:d}) of atoms in residue " + "{res:s} in conformation {conf:s}".format( + num=len(res_atoms), res=residue_label, + conf=name)) + warning(str_) -def get_atom_lines_from_pdb(pdb_file, ignore_residues = [], keep_protons=False, tags = ['ATOM ', 'HETATM'], chains=None): +def resid_from_atom(atom): + """Return string with atom residue information. + Args: + atom: atom to generate string for + Returns + string + """ + return '{0:>4d} {1:s} {2:s}'.format( + atom.res_num, atom.chain_id, atom.icode) + + +def get_atom_lines_from_pdb(pdb_file, ignore_residues=[], keep_protons=False, + tags=['ATOM ', 'HETATM'], chains=None): + """Get atom lines from PDB file. + + Args: + pdb_file: PDB file to parse + ignore_residues: list of residues to ignore + keep_protons: bool to keep/ignore protons + tags: tags of lines that include atoms + chains: list of chains + """ lines = propka.lib.open_file_for_reading(pdb_file).readlines() nterm_residue = 'next_residue' old_residue = None terminal = None model = 1 - - for line in lines: tag = line[0:6] - # set the model number if tag == 'MODEL ': model = int(line[6:]) nterm_residue = 'next_residue' - if tag == 'TER ': nterm_residue = 'next_residue' - if tag in tags: alt_conf_tag = line[16] - residue_name = line[12:16] - residue_number = line[22:26] - + residue_name = line[12: 16] + residue_number = line[22: 26] # check if we want this residue - if line[17:20] in ignore_residues: + if line[17: 20] in ignore_residues: continue if chains and line[21] not in chains: continue - # set the Nterm residue number - nessecary because we may need to # identify more than one N+ group for structures with alt_conf tags if nterm_residue == 'next_residue' and tag == 'ATOM ': - # make sure that we reached a new residue - nessecary if OXT is not the last atom in - # the previous residue + # make sure that we reached a new residue - nessecary if OXT is + # not the last atom inthe previous residue if old_residue != residue_number: nterm_residue = residue_number old_residue = None - - # Identify the configuration # convert digits to letters if alt_conf_tag in '123456789': alt_conf_tag = chr(ord(alt_conf_tag)+16) if alt_conf_tag == ' ': alt_conf_tag = 'A' - conformation = '%d%s'%(model, alt_conf_tag) - + conformation = '{0:d}{1:s}'.format(model, alt_conf_tag) # set the terminal if tag == 'ATOM ': - if residue_name.strip() == 'N' and nterm_residue == residue_number: + if (residue_name.strip() == 'N' + and nterm_residue == residue_number): terminal = 'N+' - if residue_name.strip() in ['OXT','O\'\'']: + if residue_name.strip() in ['OXT', 'O\'\'']: terminal = 'C-' nterm_residue = 'next_residue' old_residue = residue_number # and yield the atom atom = Atom(line=line) atom.terminal = terminal - #if keep_protons: - # atom.is_protonated = True - if not (atom.element == 'H' and not keep_protons): #ignore hydrogen + #ignore hydrogen + if not (atom.element == 'H' and not keep_protons): yield (conformation, atom) - terminal = None - return - def write_pdb(conformation, filename): + """Write PDB conformation to a file. + + Args: + conformation: conformation container + filename: filename for output + """ write_pdb_for_atoms(conformation.atoms, filename) - return + def write_pdb_for_atoms(atoms, filename, make_conect_section=False): - out = propka.lib.open_file_for_writing(filename) + """Write out PDB file for atoms. + Args: + atoms: list of atoms + filename: name of file + make_conect_section: generate a CONECT PDB section + """ + out = propka.lib.open_file_for_writing(filename) for atom in atoms: out.write(atom.make_pdb_line()) - if make_conect_section: for atom in atoms: out.write(atom.make_conect_line()) - - out.close() - return - - def write_mol2_for_atoms(atoms, filename): + """Write out MOL2 file for atoms. - header = '@MOLECULE\n\n%d %d\nSMALL\nUSER_CHARGES\n' - + Args: + atoms: list of atoms + filename: name of file + """ + # TODO - header needs to be converted to format string + header = '@MOLECULE\n\n{natom:d} {id:d}\nSMALL\nUSER_CHARGES\n' atoms_section = '@ATOM\n' - for i in range(len(atoms)): - atoms_section += atoms[i].make_mol2_line(i+1) - - + for i, atom in enumerate(atoms): + atoms_section += atom.make_mol2_line(i+1) bonds_section = '@BOND\n' - id = 1 - for i in range(len(atoms)): - for j in range(i+1,len(atoms)): - if atoms[i] in atoms[j].bonded_atoms: - type = get_bond_order(atoms[i],atoms[j]) - bonds_section += '%7d %7d %7d %7s\n'%(id, i+1, j+1, type) - id+=1 - + id_ = 1 + for i, atom1 in enumerate(atoms): + for j, atom2 in enumerate(atoms, i+1): + if atom1 in atom2.bonded_atoms: + type_ = get_bond_order(atom1, atom2) + bonds_section += '{0:>7d} {1:>7d} {2:>7d} {3:>7s}\n'.format( + id_, i+1, j+1, type_) + id_ += 1 substructure_section = '@SUBSTRUCTURE\n\n' - if len(atoms)>0: - substructure_section = '@SUBSTRUCTURE\n%-7d %10s %7d\n'%(atoms[0].res_num,atoms[0].res_name,atoms[0].numb) - + if len(atoms) > 0: + substructure_section = ( + '@SUBSTRUCTURE\n{0:<7d} {1:>10s} {2:>7d}\n'.format( + atoms[0].res_num, atoms[0].res_name, atoms[0].numb)) out = propka.lib.open_file_for_writing(filename) - out.write(header%(len(atoms),id-1)) + out.write(header.format(natom=len(atoms), id=id_-1)) out.write(atoms_section) out.write(bonds_section) out.write(substructure_section) out.close() - return def get_bond_order(atom1, atom2): - type = '1' + """Get the order of a bond between two atoms. + + Args: + atom1: first atom in bond + atom2: second atom in bond + Returns: + string with bond type + """ + type_ = '1' pi_electrons1 = atom1.num_pi_elec_2_3_bonds pi_electrons2 = atom2.num_pi_elec_2_3_bonds - if '.ar' in atom1.sybyl_type: - pi_electrons1 -=1 + pi_electrons1 -= 1 if '.ar' in atom2.sybyl_type: - pi_electrons2 -=1 - + pi_electrons2 -= 1 if pi_electrons1 > 0 and pi_electrons2 > 0: - type = '%d'%(min(pi_electrons1, pi_electrons2)+1) - + type_ = '{0:d}'.format(min(pi_electrons1, pi_electrons2)+1) if '.ar' in atom1.sybyl_type and '.ar' in atom2.sybyl_type: - type = 'ar' - - - return type - + type_ = 'ar' + return type_ def write_input(molecular_container, filename): - out = propka.lib.open_file_for_writing(filename) + """Write PROPKA input file for molecular container. + Args: + molecular_container: molecular container + filename: output file name + """ + out = propka.lib.open_file_for_writing(filename) for conformation_name in molecular_container.conformation_names: - out.write('MODEL %s\n'%conformation_name) + out.write('MODEL {0:s}\n'.format(conformation_name)) # write atoms for atom in molecular_container.conformations[conformation_name].atoms: out.write(atom.make_input_line()) @@ -241,49 +259,59 @@ def write_input(molecular_container, filename): for atom in molecular_container.conformations[conformation_name].atoms: out.write(atom.make_conect_line()) # write covalently coupled groups - for group in molecular_container.conformations[conformation_name].groups: + for group in ( + molecular_container.conformations[conformation_name].groups): out.write(group.make_covalently_coupled_line()) # write non-covalently coupled groups - for group in molecular_container.conformations[conformation_name].groups: + for group in ( + molecular_container.conformations[conformation_name].groups): out.write(group.make_non_covalently_coupled_line()) out.write('ENDMDL\n') - out.close() - return +def read_input(input_file, parameters, molecule): + """Read PROPKA input file for molecular container. -def read_input(input_file, parameters,molecule): + Args: + input_file: input file + parameters: parameters for parsing/setup + molecule: molecular container + Returns: + list with [conformations, names of conformations] + """ conformations = {} - # read in all atoms in the input file lines = get_atom_lines_from_input(input_file) for (name, atom) in lines: if not name in conformations.keys(): - conformations[name] = Conformation_container(name=name, parameters=parameters, molecular_container=molecule) + conformations[name] = ConformationContainer( + name=name, parameters=parameters, + molecular_container=molecule) conformations[name].add_atom(atom) - # make a sorted list of conformation names names = sorted(conformations.keys(), key=propka.lib.conformation_sorter) - return [conformations, names] +def get_atom_lines_from_input(input_file, tags=['ATOM ', 'HETATM']): + """Get atom lines from a PROPKA input file. -def get_atom_lines_from_input(input_file, tags = ['ATOM ','HETATM']): + Args: + input_file: input file + tags: tags defining atom lines + Yields: + conformation container, list of atoms + """ lines = propka.lib.open_file_for_reading(input_file).readlines() conformation = '' - atoms = {} numbers = [] - for line in lines: tag = line[0:6] - # set the conformation if tag == 'MODEL ': conformation = line[6:].strip() - # found an atom - save it if tag in tags: atom = Atom(line=line) @@ -292,46 +320,39 @@ def get_atom_lines_from_input(input_file, tags = ['ATOM ','HETATM']): atom.is_protonated = True atoms[atom.numb] = atom numbers.append(atom.numb) - # found bonding information - apply it - if tag == 'CONECT' and len(line)>14: + if tag == 'CONECT' and len(line) > 14: conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] center_atom = atoms[int(conect_numbers[0])] - for n in conect_numbers[1:]: - b = atoms[int(n)] + for num in conect_numbers[1:]: + bond_atom = atoms[int(num)] # remember to check for cysteine bridges - if center_atom.element == 'S' and b.element == 'S': - center_atom.cysteine_bridge = True - b.cysteine_bridge = True + if center_atom.element == 'S' and bond_atom.element == 'S': + center_atom.cysteine_bridge = True + bond_atom.cysteine_bridge = True # set up bonding - if not b in center_atom.bonded_atoms: - center_atom.bonded_atoms.append(b) - if not center_atom in b.bonded_atoms: - b.bonded_atoms.append(center_atom) - + if not bond_atom in center_atom.bonded_atoms: + center_atom.bonded_atoms.append(bond_atom) + if not center_atom in bond_atom.bonded_atoms: + bond_atom.bonded_atoms.append(center_atom) # found info on covalent coupling - if tag == 'CCOUPL' and len(line)>14: + if tag == 'CCOUPL' and len(line) > 14: conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] center_atom = atoms[int(conect_numbers[0])] - for n in conect_numbers[1:]: - cg = atoms[int(n)] - center_atom.group.couple_covalently(cg.group) - + for num in conect_numbers[1:]: + cov_atom = atoms[int(num)] + center_atom.group.couple_covalently(cov_atom.group) # found info on non-covalent coupling - if tag == 'NCOUPL' and len(line)>14: + if tag == 'NCOUPL' and len(line) > 14: conect_numbers = [line[i:i+5] for i in range(6, len(line)-1, 5)] center_atom = atoms[int(conect_numbers[0])] - for n in conect_numbers[1:]: - cg = atoms[int(n)] - center_atom.group.couple_non_covalently(cg.group) - + for num in conect_numbers[1:]: + cov_atom = atoms[int(num)] + center_atom.group.couple_non_covalently(cov_atom.group) # this conformation is done - yield the atoms if tag == 'ENDMDL': - for n in numbers: - yield (conformation, atoms[n]) + for num in numbers: + yield (conformation, atoms[num]) # prepare for next conformation atoms = {} numbers = [] - - - return diff --git a/propka/propka.cfg b/propka/propka.cfg index a12e38a..c78ea43 100644 --- a/propka/propka.cfg +++ b/propka/propka.cfg @@ -1,6 +1,6 @@ # PropKa configuration file -version version_A +version VersionA # Model pKa values model_pkas C- 3.20 @@ -342,7 +342,7 @@ common_charge_centre 0 remove_penalised_group 1 # non-covalent coupling -max_intrinsic_pKa_diff 2.0 +max_intrinsic_pka_diff 2.0 min_interaction_energy 0.5 max_free_energy_diff 1.0 min_swap_pka_shift 1.0 diff --git a/propka/protonate.py b/propka/protonate.py index cef3216..bd74894 100644 --- a/propka/protonate.py +++ b/propka/protonate.py @@ -1,441 +1,365 @@ -#!/usr/bin/python +"""Protonate a structure.""" +import math +import propka.bonds +import propka.atom +from propka.vector_algebra import rotate_vector_around_an_axis, Vector +from propka.lib import warning, debug -from __future__ import division -from __future__ import print_function - -from propka.vector_algebra import * -import propka.bonds, propka.pdb, propka.atom -from propka.lib import info, warning, debug class Protonate: """ Protonates atoms using VSEPR theory """ def __init__(self, verbose=False): - self.verbose=verbose - - self.valence_electrons = {'H': 1, - 'He':2, - 'Li':1, - 'Be':2, - 'B': 3, - 'C': 4, - 'N': 5, - 'O': 6, - 'F': 7, - 'Ne':8, - 'Na':1, - 'Mg':2, - 'Al':3, - 'Si':4, - 'P': 5, - 'S': 6, - 'Cl':7, - 'Ar':8, - 'K': 1, - 'Ca':2, - 'Sc':2, - 'Ti':2, - 'Va':2, - 'Cr':1, - 'Mn':2, - 'Fe':2, - 'Co':2, - 'Ni':2, - 'Cu':1, - 'Zn':2, - 'Ga':3, - 'Ge':4, - 'As':5, - 'Se':6, - 'Br':7, - 'Kr':8, - 'I':7, - } - - - - - - self.standard_charges= {'ARG-NH1':1.0, - 'ASP-OD2':-1.0, - 'GLU-OE2':-1.0, - 'HIS-ND1':1.0, - 'LYS-NZ':1.0, - 'N+':1.0, - 'C-':-1.0} - - - self.sybyl_charges = {'N.pl3':+1, - 'N.3':+1, - 'N.4':+1, - 'N.ar':+1, - 'O.co2-':-1} - - - self.bond_lengths = {'C':1.09, - 'N':1.01, - 'O':0.96, - 'F':0.92, - 'Cl':1.27, - 'Br':1.41, - 'I':1.61, - 'S':1.35} - - - # protonation_methods[steric_number] = method - self.protonation_methods = {4:self.tetrahedral, - 3:self.trigonal} - - - return - - + """Initialize with flag for verbosity + Args: + verbose: True for verbose output + """ + self.verbose = verbose + self.valence_electrons = { + 'H': 1, 'He': 2, 'Li': 1, 'Be': 2, 'B': 3, 'C': 4, 'N': 5, + 'O': 6, 'F': 7, 'Ne': 8, 'Na': 1, 'Mg': 2, 'Al': 3, 'Si': 4, + 'P': 5, 'S': 6, 'Cl': 7, 'Ar': 8, 'K': 1, 'Ca': 2, 'Sc': 2, + 'Ti': 2, 'Va': 2, 'Cr': 1, 'Mn': 2, 'Fe': 2, 'Co': 2, 'Ni': 2, + 'Cu': 1, 'Zn': 2, 'Ga': 3, 'Ge': 4, 'As': 5, 'Se': 6, 'Br': 7, + 'Kr': 8, 'I': 7} + # TODO - consider putting charges in a configuration file + self.standard_charges = { + 'ARG-NH1': 1.0, 'ASP-OD2': -1.0, 'GLU-OE2': -1.0, 'HIS-ND1': 1.0, + 'LYS-NZ': 1.0, 'N+': 1.0, 'C-': -1.0} + self.sybyl_charges = { + 'N.pl3': 1, 'N.3': 1, 'N.4': 1, 'N.ar': 1, 'O.co2-': 1} + # TODO - consider putting bond lengths in a configuration file + self.bond_lengths = { + 'C': 1.09, 'N': 1.01, 'O': 0.96, 'F': 0.92, 'Cl': 1.27, + 'Br': 1.41, 'I': 1.61, 'S': 1.35} + self.protonation_methods = {4: self.tetrahedral, 3: self.trigonal} def protonate(self, molecules): - """ Will protonate all atoms in the molecular container """ + """Protonate all atoms in the molecular container. + Args: + molecules: molecular containers + """ debug('----- Protonation started -----') # Remove all currently present hydrogen atoms self.remove_all_hydrogen_atoms(molecules) - # protonate all atoms for name in molecules.conformation_names: - non_H_atoms = molecules.conformations[name].get_non_hydrogen_atoms() - - for atom in non_H_atoms: + non_h_atoms = (molecules.conformations[name] + .get_non_hydrogen_atoms()) + for atom in non_h_atoms: self.protonate_atom(atom) - # fix hydrogen names - #self.set_proton_names(non_H_atoms) + @staticmethod + def remove_all_hydrogen_atoms(molecular_container): + """Remove all hydrogen atoms from molecule. - return - - - def remove_all_hydrogen_atoms(self, molecular_container): + Args: + molecular_container: molecule to remove hydrogens from + """ for name in molecular_container.conformation_names: - molecular_container.conformations[name].atoms = molecular_container.conformations[name].get_non_hydrogen_atoms() - return - + molecular_container.conformations[name].atoms = ( + molecular_container.conformations[name] + .get_non_hydrogen_atoms()) def set_charge(self, atom): + """Set charge for atom. + + Args: + atom: atom to be charged + """ # atom is a protein atom - if atom.type=='atom': - key = '%3s-%s'%(atom.res_name, atom.name) + if atom.type == 'atom': + key = '{0:3s}-{1:s}'.format(atom.res_name, atom.name) if atom.terminal: debug(atom.terminal) - key=atom.terminal - if key in list(self.standard_charges.keys()): + key = atom.terminal + if key in self.standard_charges: atom.charge = self.standard_charges[key] debug('Charge', atom, atom.charge) atom.charge_set = True # atom is a ligand atom - elif atom.type=='hetatm': - if atom.sybyl_type in list(self.sybyl_charges.keys()): + elif atom.type == 'hetatm': + if atom.sybyl_type in self.sybyl_charges: atom.charge = self.sybyl_charges[atom.sybyl_type] - atom.sybyl_type = atom.sybyl_type.replace('-','') + atom.sybyl_type = atom.sybyl_type.replace('-', '') atom.charge_set = True - return - def protonate_atom(self, atom): - if atom.is_protonated: return - if atom.element == 'H': return + """Protonate an atom. + Args: + atom: atom to be protonated + """ + if atom.is_protonated: + return + if atom.element == 'H': + return self.set_charge(atom) self.set_number_of_protons_to_add(atom) self.set_steric_number_and_lone_pairs(atom) self.add_protons(atom) atom.is_protonated = True - return - def set_proton_names(self, heavy_atoms): + @staticmethod + def set_proton_names(heavy_atoms): + """Set names for protons. + Args: + heavy_atoms: list of heavy atoms with protons to be named + """ for heavy_atom in heavy_atoms: i = 1 for bonded in heavy_atom.bonded_atoms: - if bonded.element == 'H': - bonded.name+='%d'%i - i+=1 - - - return - + bonded.name += str(i) + i += 1 def set_number_of_protons_to_add(self, atom): - debug('*'*10) - debug('Setting number of protons to add for',atom) - atom.number_of_protons_to_add = 8 - debug(' %4d'%8) - atom.number_of_protons_to_add -= self.valence_electrons[atom.element] - debug('Valence eletrons: %4d'%-self.valence_electrons[atom.element]) - atom.number_of_protons_to_add -= len(atom.bonded_atoms) - debug('Number of bonds: %4d'%- len(atom.bonded_atoms)) - atom.number_of_protons_to_add -= atom.num_pi_elec_2_3_bonds - debug('Pi electrons: %4d'%-atom.num_pi_elec_2_3_bonds) - atom.number_of_protons_to_add += int(atom.charge) - debug('Charge: %4.1f'%atom.charge) + """Set the number of protons to add to this atom. + Args: + atom: atom for calculation + """ + debug('*'*10) + debug('Setting number of protons to add for', atom) + atom.number_of_protons_to_add = 8 + debug(" 8") + atom.number_of_protons_to_add -= self.valence_electrons[atom.element] + debug('Valence electrons: {0:>4d}'.format( + -self.valence_electrons[atom.element])) + atom.number_of_protons_to_add -= len(atom.bonded_atoms) + debug('Number of bonds: {0:>4d}'.format(-len(atom.bonded_atoms))) + atom.number_of_protons_to_add -= atom.num_pi_elec_2_3_bonds + debug('Pi electrons: {0:>4d}'.format(-atom.num_pi_elec_2_3_bonds)) + atom.number_of_protons_to_add += int(atom.charge) + debug('Charge: {0:>4.1f}'.format(atom.charge)) debug('-'*10) debug(atom.number_of_protons_to_add) - return - def set_steric_number_and_lone_pairs(self, atom): + """Set steric number and lone pairs for atom. + Args: + atom: atom for calculation + """ # If we already did this, there is no reason to do it again if atom.steric_num_lone_pairs_set: return - debug('='*10) - debug('Setting steric number and lone pairs for',atom) - - # costumly set the N backbone atoms up for peptide bond trigonal planer shape - #if atom.name == 'N' and len(atom.bonded_atoms) == 2: - # atom.steric_number = 3 - # atom.number_of_lone_pairs = 0 - # self.display 'Peptide bond: steric number is %d and number of lone pairs is %s'%(atom.steric_number, - # atom.number_of_lone_pairs) - # return - - + debug('Setting steric number and lone pairs for', atom) atom.steric_number = 0 - - debug('%65s: %4d'%('Valence electrons',self.valence_electrons[atom.element])) + debug('{0:>65s}: {1:>4d}'.format( + 'Valence electrons', self.valence_electrons[atom.element])) atom.steric_number += self.valence_electrons[atom.element] - - debug('%65s: %4d'%('Number of bonds',len(atom.bonded_atoms))) + debug('{0:>65s}: {1:>4d}'.format( + 'Number of bonds', len(atom.bonded_atoms))) atom.steric_number += len(atom.bonded_atoms) - - debug('%65s: %4d'%('Number of hydrogen atoms to add',atom.number_of_protons_to_add)) + debug('{0:>65s}: {1:>4d}'.format( + 'Number of hydrogen atoms to add', atom.number_of_protons_to_add)) atom.steric_number += atom.number_of_protons_to_add - - debug('%65s: %4d'%('Number of pi-electrons in double and triple bonds(-)',atom.num_pi_elec_2_3_bonds)) + debug('{0:>65s}: {1:>4d}'.format( + 'Number of pi-electrons in double and triple bonds(-)', + atom.num_pi_elec_2_3_bonds)) atom.steric_number -= atom.num_pi_elec_2_3_bonds - - debug('%65s: %4d'%('Number of pi-electrons in conjugated double and triple bonds(-)',atom.num_pi_elec_conj_2_3_bonds)) + debug('{0:>65s}: {1:>4d}'.format( + 'Number of pi-electrons in conjugated double and triple bonds(-)', + atom.num_pi_elec_conj_2_3_bonds)) atom.steric_number -= atom.num_pi_elec_conj_2_3_bonds - - debug('%65s: %4d'%('Number of donated co-ordinated bonds',0)) + debug('{0:>65s}: {1:>4d}'.format( + 'Number of donated co-ordinated bonds', 0)) atom.steric_number += 0 - - debug('%65s: %4.1f'%('Charge(-)',atom.charge)) + debug('{0:>65s}: {1:>4.1f}'.format( + 'Charge(-)', atom.charge)) atom.steric_number -= atom.charge - atom.steric_number = math.floor(atom.steric_number/2.0) - - atom.number_of_lone_pairs = atom.steric_number - len(atom.bonded_atoms) - atom.number_of_protons_to_add - + atom.number_of_lone_pairs = ( + atom.steric_number-len(atom.bonded_atoms)-atom.number_of_protons_to_add) debug('-'*70) - debug('%65s: %4d'%('Steric number',atom.steric_number)) - debug('%65s: %4d'%('Number of lone pairs',atom.number_of_lone_pairs)) - + debug('{0:>65s}: {1:>4d}'.format( + 'Steric number', atom.steric_number)) + debug('{0:>65s}: {1:>4d}'.format( + 'Number of lone pairs', atom.number_of_lone_pairs)) atom.steric_num_lone_pairs_set = True - return - - def add_protons(self, atom): + """Add protons to atom. + + Args: + atom: atom for calculation + """ # decide which method to use - debug('PROTONATING',atom) + debug('PROTONATING', atom) if atom.steric_number in list(self.protonation_methods.keys()): self.protonation_methods[atom.steric_number](atom) else: - warning('Do not have a method for protonating', atom, '(steric number: %d)' % atom.steric_number) - - return - + warning('Do not have a method for protonating', atom, + '(steric number: {0:d})'.format(atom.steric_number)) def trigonal(self, atom): - debug('TRIGONAL - %d bonded atoms'%(len(atom.bonded_atoms))) + """Add hydrogens in trigonal geometry. + + Args: + atom: atom to protonate + """ + debug('TRIGONAL - {0:d} bonded atoms'.format(len(atom.bonded_atoms))) rot_angle = math.radians(120.0) - - c = vector(atom1 = atom) - + cvec = Vector(atom1=atom) # 0 bonds if len(atom.bonded_atoms) == 0: pass - # 1 bond if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0: # Add another atom with the right angle to the first one - a = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]) + avec = Vector(atom1=atom, atom2=atom.bonded_atoms[0]) # use plane of bonded trigonal atom - e.g. arg - self.set_steric_number_and_lone_pairs(atom.bonded_atoms[0]) - if atom.bonded_atoms[0].steric_number == 3 and len(atom.bonded_atoms[0].bonded_atoms)>1: - # use other atoms bonded to the neighbour to establish the plane, if possible + if (atom.bonded_atoms[0].steric_number == 3 + and len(atom.bonded_atoms[0].bonded_atoms) > 1): + # use other atoms bonded to the neighbour to establish the + # plane, if possible other_atom_indices = [] - for i in range(len(atom.bonded_atoms[0].bonded_atoms)): - if atom.bonded_atoms[0].bonded_atoms[i] != atom: + for i, bonded_atom in enumerate( + atom.bonded_atoms[0].bonded_atoms): + if bonded_atom != atom: other_atom_indices.append(i) - - - v1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]) - v2 = vector(atom1 = atom.bonded_atoms[0], - atom2 = atom.bonded_atoms[0].bonded_atoms[other_atom_indices[0]]) - - axis = v1**v2 - - # this is a trick to make sure that the order of atoms doesn't influence - # the final postions of added protons - if len(other_atom_indices)>1: - v3 = vector(atom1 = atom.bonded_atoms[0], - atom2 = atom.bonded_atoms[0].bonded_atoms[other_atom_indices[1]]) - - axis2 = v1**v3 - - if axis * axis2>0: + vec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]) + vec2 = Vector(atom1=atom.bonded_atoms[0], + atom2=atom.bonded_atoms[0] + .bonded_atoms[other_atom_indices[0]]) + axis = vec1**vec2 + # this is a trick to make sure that the order of atoms doesn't + # influence the final postions of added protons + if len(other_atom_indices) > 1: + vec3 = Vector(atom1=atom.bonded_atoms[0], + atom2=atom.bonded_atoms[0] + .bonded_atoms[other_atom_indices[1]]) + axis2 = vec1**vec3 + if axis*axis2 > 0: axis = axis+axis2 else: axis = axis-axis2 - else: - axis = a.orthogonal() - - a = rotate_vector_around_an_axis(rot_angle, axis, a) - a = self.set_bond_distance(a, atom.element) - self.add_proton(atom, c+a) - + axis = avec.orthogonal() + avec = rotate_vector_around_an_axis(rot_angle, axis, avec) + avec = self.set_bond_distance(avec, atom.element) + self.add_proton(atom, cvec+avec) # 2 bonds if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0: # Add another atom with the right angle to the first two - a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0) - a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0) + avec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0) + avec2 = Vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0) - new_a = -a1 - a2 + new_a = -avec1 - avec2 new_a = self.set_bond_distance(new_a, atom.element) - self.add_proton(atom, c+new_a) - - - return - + self.add_proton(atom, cvec+new_a) def tetrahedral(self, atom): - debug('TETRAHEDRAL - %d bonded atoms'%(len(atom.bonded_atoms))) + """Protonate atom in tetrahedral geometry. + + Args: + atom: atom to protonate. + """ + debug( + 'TETRAHEDRAL - {0:d} bonded atoms'.format(len(atom.bonded_atoms))) + # TODO - might be good to move tetrahedral angle to constant rot_angle = math.radians(109.5) - - # sanity check - # if atom.number_of_protons_to_add + len(atom.bonded_atoms) != 4: - # self.display 'Error: Attempting tetrahedral structure with %d bonds'%(atom.number_of_protons_to_add + - # len(atom.bonded_atoms)) - - c = vector(atom1 = atom) - + cvec = Vector(atom1=atom) # 0 bonds if len(atom.bonded_atoms) == 0: pass - # 1 bond if len(atom.bonded_atoms) == 1 and atom.number_of_protons_to_add > 0: # Add another atom with the right angle to the first one - a = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]) - axis = a.orthogonal() - a = rotate_vector_around_an_axis(rot_angle, axis, a) - a = self.set_bond_distance(a, atom.element) - self.add_proton(atom, c+a) - + avec = Vector(atom1=atom, atom2=atom.bonded_atoms[0]) + axis = avec.orthogonal() + avec = rotate_vector_around_an_axis(rot_angle, axis, avec) + avec = self.set_bond_distance(avec, atom.element) + self.add_proton(atom, cvec+avec) # 2 bonds if len(atom.bonded_atoms) == 2 and atom.number_of_protons_to_add > 0: # Add another atom with the right angle to the first two - a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0) - a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0) - - axis = a1 + a2 - - new_a = rotate_vector_around_an_axis(math.radians(90), axis, -a1) + avec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0) + avec2 = Vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0) + axis = avec1 + avec2 + new_a = rotate_vector_around_an_axis(math.radians(90), axis, + -avec1) new_a = self.set_bond_distance(new_a, atom.element) - self.add_proton(atom, c+new_a) - + self.add_proton(atom, cvec+new_a) # 3 bonds if len(atom.bonded_atoms) == 3 and atom.number_of_protons_to_add > 0: - a1 = vector(atom1 = atom, atom2 = atom.bonded_atoms[0]).rescale(1.0) - a2 = vector(atom1 = atom, atom2 = atom.bonded_atoms[1]).rescale(1.0) - a3 = vector(atom1 = atom, atom2 = atom.bonded_atoms[2]).rescale(1.0) - - new_a = -a1-a2-a3 + avec1 = Vector(atom1=atom, atom2=atom.bonded_atoms[0]).rescale(1.0) + avec2 = Vector(atom1=atom, atom2=atom.bonded_atoms[1]).rescale(1.0) + avec3 = Vector(atom1=atom, atom2=atom.bonded_atoms[2]).rescale(1.0) + new_a = -avec1-avec2-avec3 new_a = self.set_bond_distance(new_a, atom.element) - self.add_proton(atom, c+new_a) + self.add_proton(atom, cvec+new_a) - return + @staticmethod + def add_proton(atom, position): + """Add a proton to an atom at a specific position. - - def add_proton(self, atom, position): + Args: + atom: atom to protonate + position: position for proton + """ # Create the new proton - new_H = propka.atom.Atom() - new_H.set_property(numb = None, - name = 'H%s'%atom.name[1:], - res_name = atom.res_name, - chain_id = atom.chain_id, - res_num = atom.res_num, - x = round(position.x,3), # round of to three digimal points - y = round(position.y,3), # to avoid round-off differences - z = round(position.z,3), # when input file - occ = None, - beta = None) - new_H.element = 'H' - new_H.type = atom.type - - new_H.bonded_atoms = [atom] - new_H.charge = 0 - new_H.steric_number = 0 - new_H.number_of_lone_pairs = 0 - new_H.number_of_protons_to_add = 0 - new_H.num_pi_elec_2_3_bonds = 0 - new_H.is_protonates = True - - atom.bonded_atoms.append(new_H) - atom.number_of_protons_to_add -=1 - atom.conformation_container.add_atom(new_H) - + new_h = propka.atom.Atom() + new_h.set_property( + numb=None, + name='H{0:s}'.format(atom.name[1:]), + res_name=atom.res_name, + chain_id=atom.chain_id, + res_num=atom.res_num, + x=round(position.x, 3), # round of to three decimal points to + # avoid round-off differences in input file + y=round(position.y, 3), + z=round(position.z, 3), + occ=None, + beta=None) + new_h.element = 'H' + new_h.type = atom.type + new_h.bonded_atoms = [atom] + new_h.charge = 0 + new_h.steric_number = 0 + new_h.number_of_lone_pairs = 0 + new_h.number_of_protons_to_add = 0 + new_h.num_pi_elec_2_3_bonds = 0 + new_h.is_protonates = True + atom.bonded_atoms.append(new_h) + atom.number_of_protons_to_add -= 1 + atom.conformation_container.add_atom(new_h) # update names of all protons on this atom - new_H.residue_label = "%-3s%4d%2s" % (new_H.name,new_H.res_num, new_H.chain_id) + new_h.residue_label = "{0:<3s}{1:>4d}{2:>2s}".format( + new_h.name, new_h.res_num, new_h.chain_id) no_protons = atom.count_bonded_elements('H') if no_protons > 1: i = 1 for proton in atom.get_bonded_elements('H'): - proton.name = 'H%s%d'%(atom.name[1:],i) - proton.residue_label = "%-3s%4d%2s" % (proton.name,proton.res_num, proton.chain_id) - i+=1 + proton.name = 'H{0:s}{1:d}'.format( + atom.name[1:], i) + proton.residue_label = "{0:<3s}{1:>4d}{2:>2s}".format( + proton.name, proton.res_num, proton.chain_id) + i += 1 + debug('added', new_h, 'to', atom) + def set_bond_distance(self, bvec, element): + """Set bond distance between atom and element. - debug('added',new_H, 'to',atom) - return - - def set_bond_distance(self, a, element): - d = 1.0 + Args: + bvec: bond vector + element: bonded element + Returns: + scaled bond vector + """ + dist = 1.0 if element in list(self.bond_lengths.keys()): - d = self.bond_lengths[element] + dist = self.bond_lengths[element] else: - warning('Bond length for %s not found, using the standard value of %f' % (element, d)) - - a = a.rescale(d) - - return a - - -if __name__ == '__main__': - import protein, pdb, sys,os - arguments = sys.argv - if len(arguments) != 2: - info('Usage: protonate.py ') - sys.exit(0) - - filename = arguments[1] - if not os.path.isfile(filename): - info('Error: Could not find \"%s\"' % filename) - sys.exit(1) - - - p = Protonate() - pdblist = pdb.readPDB(filename) - my_protein = protein.Protein(pdblist,'test.pdb') - - p.remove_all_hydrogen_atoms_from_protein(my_protein) - my_protein.writePDB('before_protonation.pdb') - - p.protonate_protein(my_protein) - - ## write out protonated file - my_protein.writePDB('protonated.pdb') + str_ = ( + 'Bond length for {0:s} not found, using the standard value ' + 'of {1:f}'.format(element, dist)) + warning(str_) + bvec = bvec.rescale(dist) + return bvec diff --git a/propka/run.py b/propka/run.py index d5b3cc2..e851309 100644 --- a/propka/run.py +++ b/propka/run.py @@ -1,24 +1,24 @@ -# entry point for propka script +"""Entry point for PROPKA script.""" import logging -import propka.lib, propka.molecular_container +from propka.lib import loadOptions +from propka.molecular_container import Molecular_container _LOGGER = logging.getLogger("PROPKA") -def main(): - """ - Reads in structure files, calculates pKa values, and prints pKa files - """ - # loading options, flaggs and arguments - options = propka.lib.loadOptions() +def main(optargs=None): + """Read in structure files, calculate pKa values, and print pKa files.""" + # loading options, flags and arguments + optargs = optargs if optargs is not None else [] + options = loadOptions(*optargs) pdbfiles = options.filenames - for pdbfile in pdbfiles: - my_molecule = propka.molecular_container.Molecular_container(pdbfile, options) + my_molecule = Molecular_container(pdbfile, options) my_molecule.calculate_pka() my_molecule.write_pka() + def single(pdbfile, optargs=None): """Run a single PROPKA calculation using *pdbfile* as input. @@ -27,15 +27,15 @@ def single(pdbfile, optargs=None): .. rubric:: Example :: - single("protein.pdb", optargs=["--mutation=N25R/N181D", "-v", "--pH=7.2"]) + single("protein.pdb", optargs=["--mutation=N25R/N181D", "-v", + "--pH=7.2"]) """ optargs = optargs if optargs is not None else [] - options = propka.lib.loadOptions(*optargs) + options = loadOptions(*optargs) pdbfile = options.filenames.pop(0) if len(options.filenames) > 0: - _LOGGER.warning("Ignoring filenames: %s", options.filenames) - - my_molecule = propka.molecular_container.Molecular_container(pdbfile, options) + _LOGGER.warning("Ignoring filenames: {0:s}".format(options.filenames)) + my_molecule = Molecular_container(pdbfile, options) my_molecule.calculate_pka() my_molecule.write_pka() return my_molecule diff --git a/propka/vector_algebra.py b/propka/vector_algebra.py index 202d9c5..1303de5 100644 --- a/propka/vector_algebra.py +++ b/propka/vector_algebra.py @@ -1,11 +1,21 @@ -from __future__ import division -from __future__ import print_function +"""Vector algebra for PROPKA.""" import math -from propka.lib import info +from propka.lib import info, get_sorted_configurations -class vector: - """ Vector """ - def __init__(self, xi=0.0, yi=0.0, zi=0.0, atom1 = 0, atom2 = 0): + +class Vector: + """Vector""" + + def __init__(self, xi=0.0, yi=0.0, zi=0.0, atom1=None, atom2=None): + """Initialize vector. + + Args: + xi: default x-coordinate + yi: default y-coordinate + zi: default z-coordinate + atom1: atom center used to define default coordinate + atom2: two atom centers used to define vector + """ self.x = xi self.y = yi self.z = zi @@ -22,224 +32,280 @@ class vector: self.y = atom2.y - self.y self.z = atom2.z - self.z - return - def __add__(self, other): - return vector(self.x + other.x, + return Vector(self.x + other.x, self.y + other.y, - self.z + other.z) + self.z + other.z) def __sub__(self, other): - return vector(self.x - other.x, + return Vector(self.x - other.x, self.y - other.y, self.z - other.z) - - def __mul__(self, other): - """ Dot product, scalar and matrix multiplication """ - if isinstance(other,vector): + def __mul__(self, other): + """Dot product, scalar and matrix multiplication.""" + if isinstance(other, Vector): return self.x * other.x + self.y * other.y + self.z * other.z - elif isinstance(other, matrix4x4): - return vector( - xi = other.a11*self.x + other.a12*self.y + other.a13*self.z + other.a14*1.0, - yi = other.a21*self.x + other.a22*self.y + other.a23*self.z + other.a24*1.0, - zi = other.a31*self.x + other.a32*self.y + other.a33*self.z + other.a34*1.0 + elif isinstance(other, Matrix4x4): + return Vector( + xi=other.a11*self.x + other.a12*self.y + other.a13*self.z + + other.a14*1.0, + yi=other.a21*self.x + other.a22*self.y + other.a23*self.z + + other.a24*1.0, + zi=other.a31*self.x + other.a32*self.y + other.a33*self.z + + other.a34*1.0 ) elif type(other) in [int, float]: - return vector(self.x * other, self.y * other, self.z * other) + return Vector(self.x * other, self.y * other, self.z * other) else: - info('%s not supported' % type(other)) + info('{0:s} not supported'.format(type(other))) raise TypeError - def __rmul__(self,other): - return self.__mul__(other) - + def __rmul__(self, other): + return self.__mul__(other) def __pow__(self, other): - """ Cross product """ - return vector(self.y * other.z - self.z * other.y, + """Cross product.""" + return Vector(self.y * other.z - self.z * other.y, self.z * other.x - self.x * other.z, self.x * other.y - self.y * other.x) - def __neg__(self): - res = vector(xi = -self.x, - yi = -self.y, - zi = -self.z) + res = Vector(xi=-self.x, + yi=-self.y, + zi=-self.z) return res def sq_length(self): + """Return vector squared-length""" return self.x * self.x + self.y * self.y + self.z * self.z def length(self): + """Return vector length.""" return math.sqrt(self.sq_length()) def __str__(self): - return '%10.4f %10.4f %10.4f'%(self.x, self.y, self.z) + return '{0:>10.4f} {1:>10.4f} {2:>10.4f}'.format( + self.x, self.y, self.z) def __repr__(self): return '' def orthogonal(self): """ Returns a vector orthogonal to self """ - res = vector(self.y, -self.x, 0) - + res = Vector(self.y, -self.x, 0) if abs(self.y) < abs(self.z): - res = vector(self.z, 0, -self.x) - + res = Vector(self.z, 0, -self.x) return res def rescale(self, new_length): """ Rescale vector to new length while preserving direction """ frac = new_length/(self.length()) - res = vector(xi = self.x*frac, - yi = self.y*frac, - zi = self.z*frac) + res = Vector(xi=self.x*frac, yi=self.y*frac, zi=self.z*frac) return res -class matrix4x4: +class Matrix4x4: + """A 4-by-4 matrix class.""" + def __init__(self, a11i=0.0, a12i=0.0, a13i=0.0, a14i=0.0, a21i=0.0, a22i=0.0, a23i=0.0, a24i=0.0, a31i=0.0, a32i=0.0, a33i=0.0, a34i=0.0, a41i=0.0, a42i=0.0, a43i=0.0, a44i=0.0): - + """Initialize with matrix elements.""" + # Row 1 self.a11 = a11i self.a12 = a12i self.a13 = a13i self.a14 = a14i - + # Row 2 self.a21 = a21i self.a22 = a22i self.a23 = a23i self.a24 = a24i - + # Row 3 self.a31 = a31i self.a32 = a32i self.a33 = a33i self.a34 = a34i - + # Row 4 self.a41 = a41i self.a42 = a42i self.a43 = a43i self.a44 = a44i - return + +def angle(avec, bvec): + """Get the angle between two vectors. + + Args: + avec: vector 1 + bvec: vector 2 + Returns: + angle in radians + """ + dot = avec * bvec + return math.acos(dot / (avec.length() * bvec.length())) +def angle_degrees(avec, bvec): + """Get the angle between two vectors in degrees. + + Args: + avec: vector 1 + bvec: vector 2 + Returns: + angle in degrees + """ + return math.degrees(angle(avec, bvec)) +def signed_angle_around_axis(avec, bvec, axis): + """Get signed angle of two vectors around axis in radians. -# methods working on vectors + Args: + avec: vector 1 + bvec: vector 2 + axis: axis + Returns: + angle in radians + """ + norma = avec**axis + normb = bvec**axis + ang = angle(norma, normb) + dot_ = bvec*(avec**axis) + if dot_ < 0: + ang = -ang + return ang -def angle(a, b): - dot = a * b - return math.acos(dot / (a.length() * b.length())) +def rotate_vector_around_an_axis(theta, axis, vec): + """Rotate vector around an axis. - -def angle_degrees(a,b): - return math.degrees(angle(a, b)) - - -def signed_angle_around_axis(a,b, axis): - na = a**axis - nb = b**axis - - v = angle(na,nb) - - d = b*(a**axis) - - if d < 0: - v =-v - - return v - -def signed_angle_degrees(a,b): - return 180/math.pi * signed_angle(a, b) - - -def rotate_vector_around_an_axis(theta, axis, v): - #print "# 1. rotate space about the z-axis so that the rotation axis lies in the xz-plane" - gamma = 0.0 - if axis.y != 0: - if axis.x != 0: - gamma = -axis.x/abs(axis.x)*math.asin(axis.y/(math.sqrt(axis.x*axis.x + axis.y*axis.y))) - else: - gamma = math.pi/2.0 - - Rz = rotate_atoms_around_z_axis(gamma) - v = Rz * v - axis = Rz * axis - - #print "# 2. rotate space about the y-axis so that the rotation axis lies along the z-axis" - beta = 0.0 + Args: + theta: rotation angle (in radians) + axis: axis for rotation + vec: vector to rotate + Returns: + rotated vector + """ + gamma = 0.0 + if axis.y != 0: if axis.x != 0: - beta = -axis.x/abs(axis.x)*math.acos(axis.z/math.sqrt(axis.x*axis.x + axis.z*axis.z)) - Ry = rotate_atoms_around_y_axis(beta) - v = Ry * v - axis = Ry *axis + gamma = -axis.x/abs(axis.x)*math.asin( + axis.y/(math.sqrt(axis.x*axis.x + axis.y*axis.y))) + else: + gamma = math.pi/2.0 + rot_z = rotate_atoms_around_z_axis(gamma) + vec = rot_z * vec + axis = rot_z * axis + beta = 0.0 + if axis.x != 0: + beta = -axis.x/abs(axis.x)*math.acos( + axis.z/math.sqrt(axis.x*axis.x + axis.z*axis.z)) + rot_y = rotate_atoms_around_y_axis(beta) + vec = rot_y * vec + axis = rot_y * axis + rot_z = rotate_atoms_around_z_axis(theta) + vec = rot_z * vec + rot_y = rotate_atoms_around_y_axis(-beta) + vec = rot_y * vec + rot_z = rotate_atoms_around_z_axis(-gamma) + vec = rot_z * vec + return vec - #print "# 3. perform the desired rotation by theta about the z-axis" - Rz = rotate_atoms_around_z_axis(theta) - v = Rz * v - #print "# 4. apply the inverse of step 2." - Ry = rotate_atoms_around_y_axis(-beta) - v = Ry * v - - #print "# 5. apply the inverse of step 1." - Rz = rotate_atoms_around_z_axis(-gamma) - v = Rz * v - - return v +def rotate_atoms_around_z_axis(theta): + """Get rotation matrix for z-axis. -def rotate_atoms_around_z_axis(angle): - Rz = matrix4x4( - a11i = math.cos(angle), a12i = -math.sin(angle), a13i = 0.0, a14i = 0.0, - a21i = math.sin(angle), a22i = math.cos(angle), a23i = 0.0, a24i = 0.0, - a31i = 0.0 , a32i = 0.0 , a33i = 1.0, a34i = 0.0, - a41i = 0.0 , a42i = 0.0 , a43i = 0.0, a44i = 1.0 + Args: + theta: angle of rotation (radians) + Returns: + rotation matrix + """ + return Matrix4x4( + a11i=math.cos(theta), + a12i=-math.sin(theta), + a13i=0.0, + a14i=0.0, + a21i=math.sin(theta), + a22i=math.cos(theta), + a23i=0.0, + a24i=0.0, + a31i=0.0, + a32i=0.0, + a33i=1.0, + a34i=0.0, + a41i=0.0, + a42i=0.0, + a43i=0.0, + a44i=1.0 ) - - return Rz -def rotate_atoms_around_y_axis(angle): - Ry = matrix4x4( - a11i = math.cos(angle), a12i = 0.0, a13i = math.sin(angle), a14i = 0.0, - a21i = 0.0 , a22i = 1.0, a23i = 0.0 , a24i = 0.0, - a31i = -math.sin(angle), a32i = 0.0, a33i = math.cos(angle), a34i = 0.0, - a41i = 0.0 , a42i = 0.0, a43i = 0.0 , a44i = 1.0 +def rotate_atoms_around_y_axis(theta): + """Get rotation matrix for y-axis. + + Args: + theta: angle of rotation (radians) + Returns: + rotation matrix + """ + return Matrix4x4( + a11i=math.cos(theta), + a12i=0.0, + a13i=math.sin(theta), + a14i=0.0, + a21i=0.0, + a22i=1.0, + a23i=0.0, + a24i=0.0, + a31i=-math.sin(theta), + a32i=0.0, + a33i=math.cos(theta), + a34i=0.0, + a41i=0.0, + a42i=0.0, + a43i=0.0, + a44i=1.0 ) - - return Ry +class MultiVector: + """Collection of vectors for multiple configurations of atoms. -class multi_vector: - def __init__(self, atom1=0, atom2=0): + TODO - this class does not appear to be used or covered by tests + """ + + def __init__(self, atom1=None, atom2=None): + """Initialize with atom configurations. + + Args: + atom1: first atom to define vector + atom2: second atom to define vector + """ self.vectors = [] self.keys = [] - + self.result = None # store vectors for all configurations of atoms - if atom1!=0: - self.keys = lib.get_sorted_configurations(atom1.configurations.keys()) - if atom2!=0: - keys2 = lib.get_sorted_configurations(atom2.configurations.keys()) + if atom1 is not None: + self.keys = get_sorted_configurations(atom1.configurations.keys()) + if atom2 is not None: + keys2 = get_sorted_configurations(atom2.configurations.keys()) if self.keys != keys2: - raise 'Cannot make multi vector: Atomic configurations mismatch for\n %s\n %s\n'%(atom1,atom2) + str_ = ('Cannot make multi vector: Atomic configurations ' + 'mismatch for\n {0:s}\n {1:s}\n'.format( + atom1, atom2)) + raise KeyError(str_) for key in self.keys: atom1.setConfiguration(key) - if atom2!=0: + if atom2 != 0: atom2.setConfiguration(key) - v = vector(atom1=atom1, atom2=atom2) - self.vectors.append(v) - #info(key,v) - return - - def __getattribute__(self,name): + vec = Vector(atom1=atom1, atom2=atom2) + self.vectors.append(vec) + + def __getattribute__(self, name): try: return object.__getattribute__(self, name) except AttributeError: @@ -247,72 +313,103 @@ class multi_vector: def __str__(self): res = '' - for i in range(len(self.keys)): - res += '%s %s\n'%(self.keys[i], self.vectors[i]) + for i, key in enumerate(self.keys): + res += '{0:s} {1:s}\n'.format(key, self.vectors[i]) return res - def do_job(self, job): - #info(job) - self.res = multi_vector() - for i in range(len(self.vectors)): - self.res.vectors.append(eval('self.vectors[%d].%s()'%(i,job))) - self.res.keys.append(self.keys[i]) + """Append vectors to configuration. + + Args: + job: name of function to apply to vectors + Returns: + TODO - figure out what this is + """ + self.result = MultiVector() + for i, vector in enumerate(self.vectors): + func = getattr(vector, job) + self.result.vectors.append(func()) + self.result.keys.append(self.keys[i]) return self.get_result + @property def get_result(self): - return self.res - - def generic_operation(self, operation, other): - if self.keys != other.keys: - raise 'Incompatable keys' + """Return the latest result.""" + return self.result - self.res = multi_vector() + def generic_operation(self, operation, other): + """Perform a generic operation between two MultiVector objects. + + Args: + operation: operation to perform (string) + other: other MultiVector object + """ + if self.keys != other.keys: + raise 'Incompatible keys' + self.result = MultiVector() for i in range(len(self.vectors)): - self.res.vectors.append(eval('self.vectors[%d] %s other.vectors[%d]'%(i,operation,i))) - self.res.keys.append(self.keys[i]) - return + self.result.vectors.append( + # TODO - eliminate eval() or entire class + eval( + 'self.vectors[{0:d}] {1:s} other.vectors[{2:d}]'.format( + i, operation, i))) + self.result.keys.append(self.keys[i]) def __add__(self, other): - self.generic_operation('+',other) - return self.res + self.generic_operation('+', other) + return self.result def __sub__(self, other): - self.generic_operation('-',other) - return self.res + self.generic_operation('-', other) + return self.result def __mul__(self, other): - self.generic_operation('*',other) - return self.res + self.generic_operation('*', other) + return self.result def __pow__(self, other): - self.generic_operation('**',other) - return self.res + self.generic_operation('**', other) + return self.result - def generic_self_operation(self, operation): + @staticmethod + def generic_self_operation(_): + """TODO - delete this.""" return def __neg__(self): - self.generic_operation('*',-1.0) - return self.res + self.generic_operation('*', -1.0) + return self.result def rescale(self, new_length): - self.res = multi_vector() - for i in range(len(self.vectors)): - self.res.vectors.append(self.vectors[i].rescale(new_length)) - self.res.keys.append(self.keys[i]) + """Rescale multi-vector to new length. + + Args: + new_length: new length for multi-vector + Result: + MultiVector object + """ + self.result = MultiVector() + for i, vector in enumerate(self.vectors): + self.result.vectors.append(vector.rescale(new_length)) + self.result.keys.append(self.keys[i]) return self.res -def rotate_multi_vector_around_an_axis(theta, axis, v): - """ both axis ans v must be multi_vectors """ - - if axis.keys != v.keys: - raise 'Incompatible keys in rotate multi_vector' - - res = multi_vector() - for i in range(len(v.keys)): - res.vectors.append(rotate_vector_around_an_axis(theta, axis.vectors[i], v.vectors[i])) - res.keys.append(v.keys[i]) - +def rotate_multi_vector_around_an_axis(theta, axis, vec): + """Rotate a multi-vector around an axis. + + NOTE - both axis ans v must be MultiVectors. + + Args: + theta: angle (in radians) + axis: multi-vector axis + vec: multi-vector vector + """ + if axis.keys != vec.keys: + raise 'Incompatible keys in rotate MultiVector' + res = MultiVector() + for i, key in enumerate(vec.keys): + res.vectors.append(rotate_vector_around_an_axis( + theta, axis.vectors[i], vec.vectors[i])) + res.keys.append(key) return res diff --git a/propka/version.py b/propka/version.py index e09416c..9ff5d87 100644 --- a/propka/version.py +++ b/propka/version.py @@ -1,216 +1,290 @@ -from __future__ import division -from __future__ import print_function -import math -import sys, os +"""Contains version-specific methods and parameters. -import propka.lib as lib -from propka.lib import info, warning -import propka.calculations as calculations -import propka.parameters +TODO - this module unnecessarily confuses the code. Can we eliminate it? +""" +from propka.lib import info +import propka.calculations as calcs -class version: - def __init__(self,parameters): +class Version: + """Store version-specific methods and parameters.""" + def __init__(self, parameters): self.parameters = parameters - return + self.desolvation_model = self.empty_function + self.weight_pair_method = self.empty_function + self.hydrogen_bond_interaction_model = self.empty_function + self.sidechain_interaction_model = self.empty_function + self.electrostatic_interaction_model = self.empty_function + self.coulomb_interaction_model = self.empty_function + self.check_coulomb_pair_method = self.empty_function + self.backbone_reorganisation_method = self.empty_function + self.exception_check_method = self.empty_function + self.molecular_preparation_method = self.empty_function + self.prepare_bonds = self.empty_function + + @staticmethod + def empty_function(*args): + """Placeholder function so we don't use uninitialized variables. + + Args: + args: whatever arguments would have been passed to the function + Raises: + NotImplementedError + """ + err = "Called an empty Version function with args {0:s}".format(args) + raise NotImplementedError(err) - # desolvation def calculate_desolvation(self, group): + """Calculate desolvation energy using assigned model.""" return self.desolvation_model(self.parameters, group) - def calculatePairWeight(self, Nmass1, Nmass2): - return self.weight_pair_method(self.parameters, Nmass1, Nmass2) + def calculate_pair_weight(self, num_volume1, num_volume2): + """Calculate pair weight using assigned model.""" + return self.weight_pair_method( + self.parameters, num_volume1, num_volume2) - # side chains def hydrogen_bond_interaction(self, group1, group2): + """Calculate H-bond energy using assigned model.""" return self.hydrogen_bond_interaction_model(group1, group2, self) - def calculateSideChainEnergy(self, distance, dpka_max, cutoff, weight, f_angle): - return self.sidechain_interaction_model(distance, dpka_max, cutoff, f_angle) # weight is ignored in 3.0 Sep07 + def calculate_side_chain_energy(self, distance, dpka_max, cutoff, _, + f_angle): + """Calculate sidechain energy using assigned model.""" + return self.sidechain_interaction_model( + distance, dpka_max, cutoff, f_angle) - # coulomb def electrostatic_interaction(self, group1, group2, distance): - return self.electrostatic_interaction_model(group1, group2, distance, self) + """Calculate electrostatic energy using assigned model.""" + return self.electrostatic_interaction_model( + group1, group2, distance, self) - def calculateCoulombEnergy(self, distance, weight): - return self.coulomb_interaction_model(distance, weight, self.parameters) + def calculate_coulomb_energy(self, distance, weight): + """Calculate Coulomb energy using assigned model.""" + return self.coulomb_interaction_model( + distance, weight, self.parameters) - def checkCoulombPair(self, group1, group2, distance): - return self.check_coulomb_pair_method(self.parameters, group1, group2, distance) + def check_coulomb_pair(self, group1, group2, distance): + """Check Coulomb pair using assigned model.""" + return self.check_coulomb_pair_method( + self.parameters, group1, group2, distance) - # backbone re-organisation - def calculateBackBoneReorganization(self, conformation): - return self.backbone_reorganisation_method(self.parameters, conformation) + def calculate_backbone_reorganization(self, conformation): + """Calculate backbone reorganization using assigned model.""" + return self.backbone_reorganisation_method( + self.parameters, conformation) - # exceptions - def checkExceptions(self, group1, group2): + def check_exceptions(self, group1, group2): + """Calculate exceptions using assigned model.""" return self.exception_check_method(self, group1, group2) def setup_bonding_and_protonation(self, molecular_container): - return self.molecular_preparation_method(self.parameters, molecular_container) + """Setup bonding and protonation using assigned model.""" + return self.molecular_preparation_method( + self.parameters, molecular_container) def setup_bonding(self, molecular_container): + """Setup bonding using assigned model.""" return self.prepare_bonds(self.parameters, molecular_container) +class VersionA(Version): + """TODO - figure out what this is.""" -class version_A(version): def __init__(self, parameters): + """Initialize object with parameters.""" # set the calculation rutines used in this version - version.__init__(self, parameters) - - # atom naming, bonding, and protonation - self.molecular_preparation_method = propka.calculations.setup_bonding_and_protonation - self.prepare_bonds = propka.calculations.setup_bonding - - - # desolvation related methods - self.desolvation_model = calculations.radial_volume_desolvation - self.weight_pair_method = calculations.calculatePairWeight - - # side chain methods - self.sidechain_interaction_model = propka.calculations.HydrogenBondEnergy - self.hydrogen_bond_interaction_model = propka.calculations.hydrogen_bond_interaction - - # colomb methods - self.electrostatic_interaction_model = propka.calculations.electrostatic_interaction - self.check_coulomb_pair_method = propka.calculations.checkCoulombPair - self.coulomb_interaction_model = propka.calculations.CoulombEnergy - - #backbone - self.backbone_interaction_model = propka.calculations.HydrogenBondEnergy - self.backbone_reorganisation_method = propka.calculations.BackBoneReorganization - - # exception methods - self.exception_check_method = propka.calculations.checkExceptions - return + super().__init__(parameters) + self.molecular_preparation_method = calcs.setup_bonding_and_protonation + self.prepare_bonds = calcs.setup_bonding + self.desolvation_model = calcs.radial_volume_desolvation + self.weight_pair_method = calcs.calculate_pair_weight + self.sidechain_interaction_model = calcs.hydrogen_bond_energy + self.hydrogen_bond_interaction_model = calcs.hydrogen_bond_interaction + self.electrostatic_interaction_model = calcs.electrostatic_interaction + self.check_coulomb_pair_method = calcs.check_coulomb_pair + self.coulomb_interaction_model = calcs.coulomb_energy + self.backbone_interaction_model = calcs.hydrogen_bond_energy + self.backbone_reorganisation_method = calcs.backbone_reorganization + self.exception_check_method = calcs.check_exceptions def get_hydrogen_bond_parameters(self, atom1, atom2): + """Get hydrogen bond parameters for two atoms. + + Args: + atom1: first atom + atom2: second atom + Returns: + [dpka_max, cutoff] + """ dpka_max = self.parameters.sidechain_interaction - cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type) + cutoff = self.parameters.sidechain_cutoffs.get_value( + atom1.group_type, atom2.group_type) return [dpka_max, cutoff] def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom): + """Get hydrogen bond parameters between backbone atom and other atom. + + Args: + backbone_atom: backbone atom + atom: other atom + Returns + [v, [c1, c3]] TODO - figure out what this is + """ if backbone_atom.group_type == 'BBC': if atom.group_type in self.parameters.backbone_CO_hydrogen_bond.keys(): - [v,c1,c2] = self.parameters.backbone_CO_hydrogen_bond[atom.group_type] - return [v,[c1,c2]] - + [v, c1, c2] = self.parameters.backbone_CO_hydrogen_bond[ + atom.group_type] + return [v, [c1, c2]] if backbone_atom.group_type == 'BBN': if atom.group_type in self.parameters.backbone_NH_hydrogen_bond.keys(): - [v,c1,c2] = self.parameters.backbone_NH_hydrogen_bond[atom.group_type] - return [v,[c1,c2]] - + [v, c1, c2] = self.parameters.backbone_NH_hydrogen_bond[ + atom.group_type] + return [v, [c1, c2]] return None +class SimpleHB(VersionA): + """A simple hydrogen bond version.""" - -class simple_hb(version_A): def __init__(self, parameters): + """Initialize object with parameters.""" # set the calculation rutines used in this version - version_A.__init__(self, parameters) + super().__init__(parameters) info('Using simple hb model') - return def get_hydrogen_bond_parameters(self, atom1, atom2): - return self.parameters.hydrogen_bonds.get_value(atom1.element, atom2.element) + """Get hydrogen bond parameters for two atoms. + Args: + atom1: first atom + atom2: second atom + Returns: + [dpka_max, cutoff] + """ + return self.parameters.hydrogen_bonds.get_value( + atom1.element, atom2.element) def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom): - return self.parameters.hydrogen_bonds.get_value(backbone_atom.element, atom.element) + """Get hydrogen bond parameters between backbone atom and other atom. + + Args: + backbone_atom: backbone atom + atom: other atom + Returns + [v, [c1, c3]] TODO - figure out what this is + """ + return self.parameters.hydrogen_bonds.get_value( + backbone_atom.element, atom.element) +class ElementBasedLigandInteractions(VersionA): + """TODO - figure out what this is.""" - -class element_based_ligand_interactions(version_A): def __init__(self, parameters): + """Initialize object with parameters.""" # set the calculation rutines used in this version - version_A.__init__(self, parameters) + super().__init__(parameters) info('Using detailed SC model!') return def get_hydrogen_bond_parameters(self, atom1, atom2): - if not 'hetatm' in [atom1.type, atom2.type]: - # this is a protein-protein interaction - dpka_max = self.parameters.sidechain_interaction.get_value(atom1.group_type, atom2.group_type) - cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type) - return [dpka_max, cutoff] + """Get hydrogen bond parameters for two atoms. + Args: + atom1: first atom + atom2: second atom + Returns: + [dpka_max, cutoff] + """ + if 'hetatm' not in [atom1.type, atom2.type]: + # this is a protein-protein interaction + dpka_max = self.parameters.sidechain_interaction.get_value( + atom1.group_type, atom2.group_type) + cutoff = self.parameters.sidechain_cutoffs.get_value( + atom1.group_type, atom2.group_type) + return [dpka_max, cutoff] # at least one ligand atom is involved in this interaction # make sure that we are using the heavy atoms for finding paramters elements = [] - for a in [atom1, atom2]: - if a.element == 'H': elements.append(a.bonded_atoms[0].element) - else: elements.append(a.element) - - return self.parameters.hydrogen_bonds.get_value(elements[0], elements[1]) - + for atom in [atom1, atom2]: + if atom.element == 'H': + elements.append(atom.bonded_atoms[0].element) + else: + elements.append(atom.element) + return self.parameters.hydrogen_bonds.get_value( + elements[0], elements[1]) def get_backbone_hydrogen_bond_parameters(self, backbone_atom, atom): + """Get hydrogen bond parameters between backbone atom and other atom. + + Args: + backbone_atom: backbone atom + atom: other atom + Returns + [v, [c1, c3]] TODO - figure out what this is + """ if atom.type == 'atom': # this is a backbone-protein interaction - if backbone_atom.group_type == 'BBC' and\ - atom.group_type in self.parameters.backbone_CO_hydrogen_bond.keys(): - [v,c1,c2] = self.parameters.backbone_CO_hydrogen_bond[atom.group_type] - return [v,[c1,c2]] + if (backbone_atom.group_type == 'BBC' + and atom.group_type + in self.parameters.backbone_CO_hydrogen_bond.keys()): + [v, c1, c2] = self.parameters.backbone_CO_hydrogen_bond[ + atom.group_type] + return [v, [c1, c2]] - if backbone_atom.group_type == 'BBN' and\ - atom.group_type in self.parameters.backbone_NH_hydrogen_bond.keys(): - [v,c1,c2] = self.parameters.backbone_NH_hydrogen_bond[atom.group_type] - return [v,[c1,c2]] + if (backbone_atom.group_type == 'BBN' + and atom.group_type + in self.parameters.backbone_NH_hydrogen_bond.keys()): + [v, c1, c2] = self.parameters.backbone_NH_hydrogen_bond[ + atom.group_type] + return [v, [c1, c2]] else: # this is a backbone-ligand interaction # make sure that we are using the heavy atoms for finding paramters elements = [] - for a in [backbone_atom, atom]: - if a.element == 'H': elements.append(a.bonded_atoms[0].element) - else: elements.append(a.element) - - res = self.parameters.hydrogen_bonds.get_value(elements[0], elements[1]) + for atom2 in [backbone_atom, atom]: + if atom2.element == 'H': + elements.append(atom2.bonded_atoms[0].element) + else: + elements.append(atom2.element) + res = self.parameters.hydrogen_bonds.get_value( + elements[0], elements[1]) if not res: - info('Could not determine backbone interaction parameters for:', - backbone_atom, atom) - - return - + info( + 'Could not determine backbone interaction parameters for:', + backbone_atom, atom) + return None return None +class Propka30(Version): + """Version class for PROPKA 3.0.""" -class propka30(version): def __init__(self, parameters): - # set the calculation rutines used in this version - version.__init__(self, parameters) - - # atom naming, bonding, and protonation - self.molecular_preparation_method = propka.calculations.setup_bonding_and_protonation_30_style - - # desolvation related methods - self.desolvation_model = calculations.radial_volume_desolvation - self.weight_pair_method = calculations.calculatePairWeight - - # side chain methods - self.sidechain_interaction_model = propka.calculations.HydrogenBondEnergy - - # colomb methods - self.check_coulomb_pair_method = propka.calculations.checkCoulombPair - self.coulomb_interaction_model = propka.calculations.CoulombEnergy - - #backbone - self.backbone_reorganisation_method = propka.calculations.BackBoneReorganization - - # exception methods - self.exception_check_method = propka.calculations.checkExceptions - - - return + """Initialize object with parameters.""" + # set the calculation routines used in this version + super().__init__(parameters) + self.molecular_preparation_method = ( + calcs.setup_bonding_and_protonation_30_style) + self.desolvation_model = calcs.radial_volume_desolvation + self.weight_pair_method = calcs.calculate_pair_weight + self.sidechain_interaction_model = calcs.hydrogen_bond_energy + self.check_coulomb_pair_method = calcs.check_coulomb_pair + self.coulomb_interaction_model = calcs.coulomb_energy + self.backbone_reorganisation_method = calcs.backbone_reorganization + self.exception_check_method = calcs.check_exceptions def get_hydrogen_bond_parameters(self, atom1, atom2): - dpka_max = self.parameters.sidechain_interaction.get_value(atom1.group_type, atom2.group_type) - cutoff = self.parameters.sidechain_cutoffs.get_value(atom1.group_type, atom2.group_type) + """Get hydrogen bond parameters for two atoms. + + Args: + atom1: first atom + atom2: second atom + Returns: + [dpka_max, cutoff] + """ + dpka_max = self.parameters.sidechain_interaction.get_value( + atom1.group_type, atom2.group_type) + cutoff = self.parameters.sidechain_cutoffs.get_value( + atom1.group_type, atom2.group_type) return [dpka_max, cutoff] - - - - diff --git a/scripts/propka31.py b/scripts/propka31.py index 3b8c81e..d3459c8 100755 --- a/scripts/propka31.py +++ b/scripts/propka31.py @@ -1,32 +1,30 @@ #!/usr/bin/env python +"""PROPKA script. -# This is the original propka script. However, this distribute-based -# installation moved the main() function into propka.run.main and just -# generates a script called propka31 from the setup.py installation -# script. You should not need to use this script. -# -# (Also note that there can be import problems because the script name -# is the same as the module name; that's why the new script is called -# propka31.) +This is the original propka script. However, this distribute-based +installation moved the main() function into propka.run.main and just +generates a script called propka31 from the setup.py installation +script. You should not need to use this script. + +(Also note that there can be import problems because the script name +is the same as the module name; that's why the new script is called +propka31.) +""" +from propka.lib import loadOptions +from propka.molecular_container import Molecular_container -import propka.lib, propka.molecular_container def main(): - """ - Reads in structure files, calculates pKa values, and prints pKa files - """ + """Read in structure files, calculates pKa values, and prints pKa files.""" # loading options, flaggs and arguments - options = propka.lib.loadOptions() + options = loadOptions([]) pdbfiles = options.filenames for pdbfile in pdbfiles: - my_molecule = propka.molecular_container.Molecular_container(pdbfile, options) + my_molecule = Molecular_container(pdbfile, options) my_molecule.calculate_pka() my_molecule.write_pka() if __name__ == '__main__': - #import cProfile - #cProfile.run('main()',sort=1) main() - diff --git a/tests/test_basic_regression.py b/tests/test_basic_regression.py index 7e2a687..cd8036e 100644 --- a/tests/test_basic_regression.py +++ b/tests/test_basic_regression.py @@ -22,7 +22,8 @@ MAX_ERR_DECIMALS = 2 TEST_DIR = Path("tests") # Location for test PDBs PDB_DIR = Path("pdb") -# Location for results for comparing output (allow running from tests/ and ../tests/) +# Location for results for comparing output (allow running from tests/ and +# ../tests/) RESULTS_DIR = Path("tests/results") if not RESULTS_DIR.is_dir(): _LOGGER.warning("Switching to sub-directory") @@ -47,7 +48,9 @@ def get_test_dirs(): if test_path.is_dir(): path_dict[key] = test_path else: - errstr = "Can't find %s test files in %s" % (key, [TEST_DIR / path, path]) + errstr = ( + "Can't find {0:s} test files in {1:s}".format( + key, [TEST_DIR / path, path])) raise FileNotFoundError(errstr) return path_dict @@ -63,11 +66,13 @@ def run_propka(options, pdb_path, tmp_path): options += [str(pdb_path)] args = propka.lib.loadOptions(options) try: - _LOGGER.warning("Working in tmpdir %s because of PROPKA file output; need to fix this.", - tmp_path) + _LOGGER.warning( + "Working in tmpdir {0:s} because of PROPKA file output; " + "need to fix this.".format(str(tmp_path))) cwd = Path.cwd() os.chdir(tmp_path) - molecule = propka.molecular_container.Molecular_container(str(pdb_path), args) + molecule = propka.molecular_container.Molecular_container( + str(pdb_path), args) molecule.calculate_pka() molecule.write_pka() finally: @@ -90,7 +95,7 @@ def compare_output(pdb, tmp_path, ref_path): ref_data.append(float(line)) test_data = [] - pka_path = Path(tmp_path) / ("%s.pka" % pdb) + pka_path = Path(tmp_path) / ("{0:s}.pka".format(pdb)) with open(pka_path, "rt") as pka_file: at_pka = False for line in pka_file: @@ -100,12 +105,15 @@ def compare_output(pdb, tmp_path, ref_path): elif line.startswith("---"): at_pka = False else: - m = re.search(r'([0-9]+\.[0-9]+)', line) - value = float(m.group(0)) + match = re.search(r'([0-9]+\.[0-9]+)', line) + value = float(match.group(0)) test_data.append(value) - errstr = "Error exceeds maximum allowed value (%d decimal places)" % MAX_ERR_DECIMALS - assert_almost_equal(test_data, ref_data, decimal=MAX_ERR_DECIMALS, - err_msg=errstr, verbose=True) + errstr = ( + "Error exceeds maximum allowed value ({0:d} decimal places)".format( + MAX_ERR_DECIMALS)) + assert_almost_equal( + test_data, ref_data, decimal=MAX_ERR_DECIMALS, err_msg=errstr, + verbose=True) @pytest.mark.parametrize("pdb, options", [ @@ -113,24 +121,26 @@ def compare_output(pdb, tmp_path, ref_path): pytest.param('1HPX', [], id="1HPX: no options"), pytest.param('4DFR', [], id="4DFR: no options"), pytest.param('3SGB', [], id="3SGB: no options"), - pytest.param('3SGB-subset', ["--titrate_only", - "E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"], - id="3SGB: --titrate_only"), + pytest.param('3SGB-subset', [ + "--titrate_only", + "E:17,E:18,E:19,E:29,E:44,E:45,E:46,E:118,E:119,E:120,E:139"], + id="3SGB: --titrate_only"), pytest.param('1HPX-warn', ['--quiet'], id="1HPX-warn: --quiet")]) def test_regression(pdb, options, tmp_path): """Basic regression test of PROPKA functionality.""" path_dict = get_test_dirs() - ref_path = path_dict["results"] / ("%s.dat" % pdb) + ref_path = path_dict["results"] / ("{0:s}.dat".format(pdb)) if ref_path.is_file(): ref_path = ref_path.resolve() else: - _LOGGER.warning("Missing results file for comparison: %s", ref_path) + _LOGGER.warning("Missing results file for comparison: {0:s}".format( + str(ref_path))) ref_path = None - pdb_path = path_dict["pdbs"] / ("%s.pdb" % pdb) + pdb_path = path_dict["pdbs"] / ("{0:s}.pdb".format(pdb)) if pdb_path.is_file(): pdb_path = pdb_path.resolve() else: - errstr = "Missing PDB file: %s" % pdb_path + errstr = "Missing PDB file: {0:s}".format(pdb_path) raise FileNotFoundError(errstr) tmp_path = Path(tmp_path).resolve() diff --git a/tests/test_hybrid36.py b/tests/test_hybrid36.py new file mode 100644 index 0000000..261db2a --- /dev/null +++ b/tests/test_hybrid36.py @@ -0,0 +1,60 @@ +"""Test the hybrid36 module.""" +import unittest +import propka.hybrid36 as hybrid36 + + +class Hybrid36Test(unittest.TestCase): + """Test class for hybrid36.""" + + def test_decode(self): + """Test decoding functions.""" + test_values = { + "99999": 99999, + "A0000": 100000, + "0": 0, + "9": 9, + "A": 10, + " ZZZZY": 43770014, + "ZZZZZ": 43770015, # ZZZZZ - A0000 + 100000 + "a0000": 43770016, + "zzzzz": 87440031, + "zzzzy": 87440030, + "99": 99, + "A0": 100, + "ZZ": 1035, + "zz": 1971, + "-99999": -99999, + "-A0000": -100000, + "-0": 0, + "-9": -9, + "-A": -10, + "-ZZZZY": -43770014, + "-ZZZZZ": -43770015, # ZZZZZ - A0000 + 100000 + "-a0000": -43770016, + "-zzzzz": -87440031, + "-zzzzy": -87440030, + "-99": -99, + "-A0": -100, + "-ZZ": -1035, + "-zz": -1971, + "PROPKA": 954495146, + "A001Z": 100071, + "B0000": 1779616, + } + for key, value in test_values.items(): + self.assertEqual(hybrid36.decode(key), value) + + def test_errors(self): + """Test values that should raise errors.""" + test_values = [ + "99X99", + "X9-99", + "XYZa", + "", + "-", + "!NotOk", + ] + for value in test_values: + with self.assertRaises(ValueError) as err: + hybrid36.decode(value) + self.assertTrue(value in str(err.exception))